Home | History | Annotate | Line # | Download | only in string
strlen.S revision 1.5
      1  1.5      ross /*	$NetBSD: strlen.S,v 1.5 2006/08/15 02:23:54 ross Exp $ */
      2  1.1  christos 
      3  1.4      ross /*-
      4  1.4      ross  * Copyright (C) 2001	Martin J. Laubach <mjl (at) NetBSD.org>
      5  1.4      ross  * All rights reserved.
      6  1.4      ross  *
      7  1.4      ross  * Redistribution and use in source and binary forms, with or without
      8  1.4      ross  * modification, are permitted provided that the following conditions
      9  1.4      ross  * are met:
     10  1.4      ross  * 1. Redistributions of source code must retain the above copyright
     11  1.4      ross  *    notice, this list of conditions and the following disclaimer.
     12  1.4      ross  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.4      ross  *    notice, this list of conditions and the following disclaimer in the
     14  1.4      ross  *    documentation and/or other materials provided with the distribution.
     15  1.4      ross  * 3. The name of the author may not be used to endorse or promote products
     16  1.4      ross  *    derived from this software without specific prior written permission.
     17  1.4      ross  *
     18  1.4      ross  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  1.4      ross  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  1.4      ross  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  1.4      ross  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  1.4      ross  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  1.4      ross  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  1.4      ross  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  1.4      ross  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  1.4      ross  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  1.4      ross  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  1.4      ross  */
     29  1.4      ross /*----------------------------------------------------------------------*/
     30  1.1  christos 
     31  1.4      ross #define _NOREGNAMES
     32  1.4      ross 
     33  1.4      ross #include <machine/asm.h>
     34  1.4      ross 
     35  1.4      ross /*----------------------------------------------------------------------*/
     36  1.4      ross /* The algorithm here uses the following techniques:
     37  1.4      ross 
     38  1.4      ross    1) Given a word 'x', we can test to see if it contains any 0 bytes
     39  1.4      ross       by subtracting 0x01010101, and seeing if any of the high bits of each
     40  1.4      ross       byte changed from 0 to 1. This works because the least significant
     41  1.4      ross       0 byte must have had no incoming carry (otherwise it's not the least
     42  1.4      ross       significant), so it is 0x00 - 0x01 == 0xff. For all other
     43  1.4      ross       byte values, either they have the high bit set initially, or when
     44  1.4      ross       1 is subtracted you get a value in the range 0x00-0x7f, none of which
     45  1.4      ross       have their high bit set. The expression here is
     46  1.4      ross       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
     47  1.4      ross       there were no 0x00 bytes in the word.
     48  1.4      ross 
     49  1.4      ross    2) Given a word 'x', we can test to see _which_ byte was zero by
     50  1.4      ross       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
     51  1.4      ross       This produces 0x80 in each byte that was zero, and 0x00 in all
     52  1.4      ross       the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
     53  1.4      ross       byte, and the '| x' part ensures that bytes with the high bit set
     54  1.4      ross       produce 0x00. The addition will carry into the high bit of each byte
     55  1.4      ross       iff that byte had one of its low 7 bits set. We can then just see
     56  1.4      ross       which was the most significant bit set and divide by 8 to find how
     57  1.4      ross       many to add to the index.
     58  1.4      ross       This is from the book 'The PowerPC Compiler Writer's Guide',
     59  1.4      ross       by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
     60  1.4      ross */
     61  1.4      ross /*----------------------------------------------------------------------*/
     62  1.4      ross 
     63  1.4      ross 		.text
     64  1.4      ross 		.align 4
     65  1.4      ross 
     66  1.4      ross ENTRY(strlen)
     67  1.4      ross 
     68  1.4      ross 		/* Setup constants */
     69  1.4      ross 		lis	%r10, 0x7f7f
     70  1.4      ross 		lis	%r9, 0xfefe
     71  1.4      ross 		ori	%r10, %r10, 0x7f7f
     72  1.4      ross 		ori	%r9, %r9, 0xfeff
     73  1.4      ross 
     74  1.4      ross 		/* Mask out leading bytes on non aligned strings */
     75  1.4      ross 		rlwinm.	%r8, %r3, 3, 27, 28	/* leading bits to mask */
     76  1.5      ross #ifdef _LP64
     77  1.5      ross 		clrrdi	%r5, %r3, 2		/*  clear low 2 addr bits */
     78  1.5      ross #else
     79  1.4      ross 		clrrwi	%r5, %r3, 2		/*  clear low 2 addr bits */
     80  1.5      ross #endif
     81  1.4      ross 		li	%r0, -1
     82  1.4      ross 		beq+	3f			/* skip alignment if already */
     83  1.4      ross 						/* aligned */
     84  1.4      ross 
     85  1.4      ross 		srw	%r0, %r0, %r8		/* make 0000...1111 mask */
     86  1.4      ross 
     87  1.4      ross 		lwz	%r7, 0(%r5)
     88  1.4      ross 		nor	%r0, %r0, %r0		/* invert mask */
     89  1.4      ross 		or	%r7, %r7, %r0		/* make leading bytes != 0 */
     90  1.4      ross 		b	2f
     91  1.4      ross 
     92  1.4      ross 3:		subi	%r5, %r5, 4
     93  1.4      ross 
     94  1.4      ross 1:		lwzu	%r7, 4(%r5)		/* fetch data word */
     95  1.4      ross 
     96  1.4      ross 2:		nor	%r0, %r7, %r10		/* do step 1 */
     97  1.4      ross 		add	%r6, %r7, %r9
     98  1.4      ross 		and.	%r0, %r0, %r6
     99  1.4      ross 
    100  1.4      ross 		beq+	1b			/* no NUL bytes here */
    101  1.4      ross 
    102  1.4      ross 		and	%r8, %r7, %r10		/* ok, a NUL is somewhere */
    103  1.4      ross 		or	%r7, %r7, %r10		/* do step 2 to find out */
    104  1.4      ross 		add	%r0, %r8, %r10		/* where */
    105  1.4      ross 		nor	%r8, %r7, %r0
    106  1.4      ross 
    107  1.4      ross 		cntlzw	%r0, %r8		/* offset from this word */
    108  1.4      ross 		srwi	%r4, %r0, 3
    109  1.4      ross 
    110  1.4      ross 		add	%r4, %r5, %r4		/* r4 contains end pointer */
    111  1.4      ross 		/* NOTE: Keep it so this function returns the end pointer
    112  1.4      ross 		   in r4, so we can it use from other str* calls (strcat
    113  1.4      ross 		   comes to mind */
    114  1.4      ross 
    115  1.4      ross 		subf	%r3, %r3, %r4
    116  1.4      ross 		blr
    117  1.4      ross 
    118  1.4      ross /*----------------------------------------------------------------------*/
    119