memchr.S revision 1.2 1 /*
2 * Written by J.T. Conklin <jtc (at) acorntoolworks.com>
3 * Public domain.
4 */
5
6 #include <machine/asm.h>
7
8 #if defined(LIBC_SCCS)
9 RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10 #endif
11
12 ENTRY(memchr)
13 pushl %esi
14 movl 8(%esp),%eax
15 movzbl 12(%esp),%ecx
16 movl 16(%esp),%esi
17
18 /*
19 * Align to word boundary.
20 * Consider unrolling loop?
21 */
22 testl %esi,%esi /* nbytes == 0? */
23 je .Lzero
24 .Lalign:
25 testb $3,%al
26 je .Lword_aligned
27 cmpb (%eax),%cl
28 je .Ldone
29 incl %eax
30 decl %esi
31 jnz .Lalign
32 jmp .Lzero
33
34 .Lword_aligned:
35 /* copy char to all bytes in word */
36 movb %cl,%ch
37 movl %ecx,%edx
38 sall $16,%ecx
39 orl %edx,%ecx
40
41 _ALIGN_TEXT
42 .Lloop:
43 cmpl $3,%esi /* nbytes > 4 */
44 jbe .Lbyte
45 movl (%eax),%edx
46 addl $4,%eax
47 xorl %ecx,%edx
48 subl $4,%esi
49 subl $0x01010101,%edx
50 testl $0x80808080,%edx
51 je .Lloop
52
53 /*
54 * In rare cases, the above loop may exit prematurely. We must
55 * return to the loop if none of the bytes in the word are
56 * equal to ch.
57 */
58
59 /*
60 * High load-use latency on the Athlon leads to significant
61 * stalls, so we preload the next char as soon as possible
62 * instead of using cmp mem8, reg8.
63 *
64 * Alignment here avoids a stall on the Athlon, even though
65 * it's not a branch target.
66 */
67 _ALIGN_TEXT
68 cmpb -4(%eax),%cl /* 1st byte == ch? */
69 movb -3(%eax),%dl
70 jne 1f
71 subl $4,%eax
72 jmp .Ldone
73
74 _ALIGN_TEXT
75 1: cmpb %dl,%cl /* 2nd byte == ch? */
76 movb -2(%eax),%dl
77 jne 1f
78 subl $3,%eax
79 jmp .Ldone
80
81 _ALIGN_TEXT
82 1: cmpb %dl,%cl /* 3rd byte == ch? */
83 movb -1(%eax),%dl
84 jne 1f
85 subl $2,%eax
86 jmp .Ldone
87
88 _ALIGN_TEXT
89 1: cmpb %dl,%cl /* 4th byte == ch? */
90 jne .Lloop
91 decl %eax
92 jmp .Ldone
93
94 .Lbyte:
95 testl %esi,%esi
96 je .Lzero
97 .Lbyte_loop:
98 cmpb (%eax),%cl
99 je .Ldone
100 incl %eax
101 decl %esi
102 jnz .Lbyte_loop
103
104 .Lzero:
105 xorl %eax,%eax
106
107 .Ldone:
108 popl %esi
109 ret
110 END(memchr)
111