Home | History | Annotate | Line # | Download | only in string
      1  1.1  matt /*	$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $ */
      2  1.1  matt 
      3  1.1  matt /*-
      4  1.1  matt  * Copyright (C) 2001	Martin J. Laubach <mjl (at) NetBSD.org>
      5  1.1  matt  * All rights reserved.
      6  1.1  matt  *
      7  1.1  matt  * Redistribution and use in source and binary forms, with or without
      8  1.1  matt  * modification, are permitted provided that the following conditions
      9  1.1  matt  * are met:
     10  1.1  matt  * 1. Redistributions of source code must retain the above copyright
     11  1.1  matt  *    notice, this list of conditions and the following disclaimer.
     12  1.1  matt  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1  matt  *    notice, this list of conditions and the following disclaimer in the
     14  1.1  matt  *    documentation and/or other materials provided with the distribution.
     15  1.1  matt  * 3. The name of the author may not be used to endorse or promote products
     16  1.1  matt  *    derived from this software without specific prior written permission.
     17  1.1  matt  *
     18  1.1  matt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  1.1  matt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  1.1  matt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  1.1  matt  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  1.1  matt  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  1.1  matt  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  1.1  matt  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  1.1  matt  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  1.1  matt  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  1.1  matt  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  1.1  matt  */
     29  1.1  matt /*----------------------------------------------------------------------*/
     30  1.1  matt 
     31  1.1  matt #include <machine/asm.h>
     32  1.1  matt 
     33  1.1  matt __RCSID("$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $");
     34  1.1  matt 
     35  1.1  matt /*----------------------------------------------------------------------*/
     36  1.1  matt /* The algorithm here uses the following techniques:
     37  1.1  matt 
     38  1.1  matt    1) Given a word 'x', we can test to see if it contains any 0 bytes
     39  1.1  matt       by subtracting 0x01010101, and seeing if any of the high bits of each
     40  1.1  matt       byte changed from 0 to 1. This works because the least significant
     41  1.1  matt       0 byte must have had no incoming carry (otherwise it's not the least
     42  1.1  matt       significant), so it is 0x00 - 0x01 == 0xff. For all other
     43  1.1  matt       byte values, either they have the high bit set initially, or when
     44  1.1  matt       1 is subtracted you get a value in the range 0x00-0x7f, none of which
     45  1.1  matt       have their high bit set. The expression here is
     46  1.1  matt       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
     47  1.1  matt       there were no 0x00 bytes in the word.
     48  1.1  matt 
     49  1.1  matt    2) Given a word 'x', we can test to see _which_ byte was zero by
     50  1.1  matt       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
     51  1.1  matt       This produces 0x80 in each byte that was zero, and 0x00 in all
     52  1.1  matt       the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
     53  1.1  matt       byte, and the '| x' part ensures that bytes with the high bit set
     54  1.1  matt       produce 0x00. The addition will carry into the high bit of each byte
     55  1.1  matt       iff that byte had one of its low 7 bits set. We can then just see
     56  1.1  matt       which was the most significant bit set and divide by 8 to find how
     57  1.1  matt       many to add to the index.
     58  1.1  matt       This is from the book 'The PowerPC Compiler Writer's Guide',
     59  1.1  matt       by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
     60  1.1  matt */
     61  1.1  matt /*----------------------------------------------------------------------*/
     62  1.1  matt 
     63  1.1  matt ENTRY(strlen)
     64  1.1  matt 
     65  1.1  matt 		l.or	r12, r3, r0		/* save start */
     66  1.1  matt 
     67  1.1  matt 		/* Setup constants */
     68  1.1  matt 		l.movhi	r13, 0x7f7f
     69  1.1  matt 		l.movhi	r15, 0xfefe
     70  1.1  matt 		l.ori	r13, r13, 0x7f7f
     71  1.1  matt 		l.ori	r15, r15, 0xfeff
     72  1.1  matt 
     73  1.1  matt 1:		l.andi	r7, r12, 3		/* get low bits of start */
     74  1.1  matt 		l.sfeqi	r7, 0			/* all clear? */
     75  1.1  matt 		l.bf	3f			/*   yes, skip alignment */
     76  1.1  matt 		l.nop				/* -- delay slot -- */
     77  1.1  matt 
     78  1.1  matt 		l.sub	r12, r12, r7		/* word align start */
     79  1.1  matt 		l.lwz	r8, 0(r12)		/* load data */
     80  1.1  matt 		l.addi	r6, r0, -1		/* r6 = 0xffffffff */
     81  1.1  matt 		l.slli	r5, r7, 3		/* bits to bytes */
     82  1.1  matt 		l.srl	r6, r6, r5		/* clear low (MSB) bytes */
     83  1.1  matt 		l.xori	r6, r6, -1		/* complement */
     84  1.1  matt 		l.or	r8, r8, r6		/* merge with loaded word */
     85  1.1  matt 		l.j	4f			/* and process */
     86  1.1  matt 		l.nop				/* -- delay-slot -- */
     87  1.1  matt 
     88  1.1  matt 2:		l.addi	r12, r12, 4		/* advance to next word */
     89  1.1  matt 3:		l.lwz	r8, 0(r12)		/* fetch data word */
     90  1.1  matt 
     91  1.1  matt 		// Step 1: (x + 0xfefefeff) & ~(x | 0x7f7f7f7f)
     92  1.1  matt 4:		l.or	r7, r8, r13		/* t0 = x | 0x7f7f7f7f */
     93  1.1  matt 		l.xori	r6, r7, -1		/* t1 = ~t0 */
     94  1.1  matt 		l.add	r5, r8, r15		/* t2 = x + 0xfefefeff */
     95  1.1  matt 		l.and	r4, r7, r5		/* t3 = t1 & t2 */
     96  1.1  matt 		l.sfeqi	r4, 0
     97  1.1  matt 		l.bf	2b			/* no NUL bytes here */
     98  1.1  matt 		l.nop				/* -- delay slot -- */
     99  1.1  matt 
    100  1.1  matt 		// Step 2: ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f)
    101  1.1  matt 		l.and	r7, r8, r13		/* t0 = x & 0x7f7f7f7f */
    102  1.1  matt 		l.or	r6, r8, r13		/* t1 = x | 0x7f7f7f7f */
    103  1.1  matt 		l.add	r5, r7, r13		/* t2 = t0 + 0x7f7f7f7f */
    104  1.1  matt 		l.or	r4, r5, r6		/* t3 = t2 | t1 */
    105  1.1  matt 		l.xori	r4, r4, -1		/* t3 = ~t3 */
    106  1.1  matt 
    107  1.1  matt 		l.fl1	r5, r4			/* find last bit set */
    108  1.1  matt 		l.ori	r6, r0, 32		/* bits per word */
    109  1.1  matt 		l.sub	r7, r6, r5		/* cvt to leading zeros */
    110  1.1  matt 		l.srli	r8, r7, 3		/* shift to byte count */
    111  1.1  matt 
    112  1.1  matt Ldone:
    113  1.1  matt 		l.add	r12, r12, r8		/* r12 contains end pointer */
    114  1.1  matt 
    115  1.1  matt 		/* NOTE: Keep it so this function returns the end pointer
    116  1.1  matt 		   in r12, so we can it use from other str* calls (strcat
    117  1.1  matt 		   comes to mind */
    118  1.1  matt 
    119  1.1  matt 		l.sub	r11, r12, r3		/* length = end - start */
    120  1.1  matt 		l.jr	lr			/* return */
    121  1.1  matt 		l.nop				/* -- delay slot -- */
    122  1.1  matt END(strlen)
    123  1.1  matt /*----------------------------------------------------------------------*/
    124