strlen.S revision 1.1 1 1.1 matt /* $NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $ */
2 1.1 matt
3 1.1 matt /*-
4 1.1 matt * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 1.1 matt * All rights reserved.
6 1.1 matt *
7 1.1 matt * Redistribution and use in source and binary forms, with or without
8 1.1 matt * modification, are permitted provided that the following conditions
9 1.1 matt * are met:
10 1.1 matt * 1. Redistributions of source code must retain the above copyright
11 1.1 matt * notice, this list of conditions and the following disclaimer.
12 1.1 matt * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 matt * notice, this list of conditions and the following disclaimer in the
14 1.1 matt * documentation and/or other materials provided with the distribution.
15 1.1 matt * 3. The name of the author may not be used to endorse or promote products
16 1.1 matt * derived from this software without specific prior written permission.
17 1.1 matt *
18 1.1 matt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 matt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 matt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 matt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 matt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 matt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 matt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 matt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 matt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 matt * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 matt */
29 1.1 matt /*----------------------------------------------------------------------*/
30 1.1 matt
31 1.1 matt #include <machine/asm.h>
32 1.1 matt
33 1.1 matt __RCSID("$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $");
34 1.1 matt
35 1.1 matt /*----------------------------------------------------------------------*/
36 1.1 matt /* The algorithm here uses the following techniques:
37 1.1 matt
38 1.1 matt 1) Given a word 'x', we can test to see if it contains any 0 bytes
39 1.1 matt by subtracting 0x01010101, and seeing if any of the high bits of each
40 1.1 matt byte changed from 0 to 1. This works because the least significant
41 1.1 matt 0 byte must have had no incoming carry (otherwise it's not the least
42 1.1 matt significant), so it is 0x00 - 0x01 == 0xff. For all other
43 1.1 matt byte values, either they have the high bit set initially, or when
44 1.1 matt 1 is subtracted you get a value in the range 0x00-0x7f, none of which
45 1.1 matt have their high bit set. The expression here is
46 1.1 matt (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
47 1.1 matt there were no 0x00 bytes in the word.
48 1.1 matt
49 1.1 matt 2) Given a word 'x', we can test to see _which_ byte was zero by
50 1.1 matt calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
51 1.1 matt This produces 0x80 in each byte that was zero, and 0x00 in all
52 1.1 matt the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
53 1.1 matt byte, and the '| x' part ensures that bytes with the high bit set
54 1.1 matt produce 0x00. The addition will carry into the high bit of each byte
55 1.1 matt iff that byte had one of its low 7 bits set. We can then just see
56 1.1 matt which was the most significant bit set and divide by 8 to find how
57 1.1 matt many to add to the index.
58 1.1 matt This is from the book 'The PowerPC Compiler Writer's Guide',
59 1.1 matt by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
60 1.1 matt */
61 1.1 matt /*----------------------------------------------------------------------*/
62 1.1 matt
63 1.1 matt ENTRY(strlen)
64 1.1 matt
65 1.1 matt l.or r12, r3, r0 /* save start */
66 1.1 matt
67 1.1 matt /* Setup constants */
68 1.1 matt l.movhi r13, 0x7f7f
69 1.1 matt l.movhi r15, 0xfefe
70 1.1 matt l.ori r13, r13, 0x7f7f
71 1.1 matt l.ori r15, r15, 0xfeff
72 1.1 matt
73 1.1 matt 1: l.andi r7, r12, 3 /* get low bits of start */
74 1.1 matt l.sfeqi r7, 0 /* all clear? */
75 1.1 matt l.bf 3f /* yes, skip alignment */
76 1.1 matt l.nop /* -- delay slot -- */
77 1.1 matt
78 1.1 matt l.sub r12, r12, r7 /* word align start */
79 1.1 matt l.lwz r8, 0(r12) /* load data */
80 1.1 matt l.addi r6, r0, -1 /* r6 = 0xffffffff */
81 1.1 matt l.slli r5, r7, 3 /* bits to bytes */
82 1.1 matt l.srl r6, r6, r5 /* clear low (MSB) bytes */
83 1.1 matt l.xori r6, r6, -1 /* complement */
84 1.1 matt l.or r8, r8, r6 /* merge with loaded word */
85 1.1 matt l.j 4f /* and process */
86 1.1 matt l.nop /* -- delay-slot -- */
87 1.1 matt
88 1.1 matt 2: l.addi r12, r12, 4 /* advance to next word */
89 1.1 matt 3: l.lwz r8, 0(r12) /* fetch data word */
90 1.1 matt
91 1.1 matt // Step 1: (x + 0xfefefeff) & ~(x | 0x7f7f7f7f)
92 1.1 matt 4: l.or r7, r8, r13 /* t0 = x | 0x7f7f7f7f */
93 1.1 matt l.xori r6, r7, -1 /* t1 = ~t0 */
94 1.1 matt l.add r5, r8, r15 /* t2 = x + 0xfefefeff */
95 1.1 matt l.and r4, r7, r5 /* t3 = t1 & t2 */
96 1.1 matt l.sfeqi r4, 0
97 1.1 matt l.bf 2b /* no NUL bytes here */
98 1.1 matt l.nop /* -- delay slot -- */
99 1.1 matt
100 1.1 matt // Step 2: ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f)
101 1.1 matt l.and r7, r8, r13 /* t0 = x & 0x7f7f7f7f */
102 1.1 matt l.or r6, r8, r13 /* t1 = x | 0x7f7f7f7f */
103 1.1 matt l.add r5, r7, r13 /* t2 = t0 + 0x7f7f7f7f */
104 1.1 matt l.or r4, r5, r6 /* t3 = t2 | t1 */
105 1.1 matt l.xori r4, r4, -1 /* t3 = ~t3 */
106 1.1 matt
107 1.1 matt l.fl1 r5, r4 /* find last bit set */
108 1.1 matt l.ori r6, r0, 32 /* bits per word */
109 1.1 matt l.sub r7, r6, r5 /* cvt to leading zeros */
110 1.1 matt l.srli r8, r7, 3 /* shift to byte count */
111 1.1 matt
112 1.1 matt Ldone:
113 1.1 matt l.add r12, r12, r8 /* r12 contains end pointer */
114 1.1 matt
115 1.1 matt /* NOTE: Keep it so this function returns the end pointer
116 1.1 matt in r12, so we can it use from other str* calls (strcat
117 1.1 matt comes to mind */
118 1.1 matt
119 1.1 matt l.sub r11, r12, r3 /* length = end - start */
120 1.1 matt l.jr lr /* return */
121 1.1 matt l.nop /* -- delay slot -- */
122 1.1 matt END(strlen)
123 1.1 matt /*----------------------------------------------------------------------*/
124