strcat.S revision 1.2 1 /*
2 * Written by J.T. Conklin <jtc (at) acorntoolworks.com>
3 * Public domain.
4 */
5
6 #include <machine/asm.h>
7
8 #if defined(LIBC_SCCS)
9 RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10 #endif
11
12 ENTRY(strcat)
13 pushl %ebx
14 movl 8(%esp),%ecx
15 movl 12(%esp),%eax
16
17 /*
18 * Align destination to word boundary.
19 * Consider unrolling loop?
20 */
21 .Lscan:
22 .Lscan_align:
23 testb $3,%cl
24 je .Lscan_aligned
25 cmpb $0,(%ecx)
26 je .Lcopy
27 incl %ecx
28 jmp .Lscan_align
29
30 _ALIGN_TEXT
31 .Lscan_aligned:
32 .Lscan_loop:
33 movl (%ecx),%ebx
34 addl $4,%ecx
35 leal -0x01010101(%ebx),%edx
36 testl $0x80808080,%edx
37 je .Lscan_loop
38
39 /*
40 * In rare cases, the above loop may exit prematurely. We must
41 * return to the loop if none of the bytes in the word equal 0.
42 */
43
44 /*
45 * The optimal code for determining whether each byte is zero
46 * differs by processor. This space-optimized code should be
47 * acceptable on all, especially since we don't expect it to
48 * be run frequently,
49 */
50
51 testb %bl,%bl /* 1st byte == 0? */
52 jne 1f
53 subl $4,%ecx
54 jmp .Lcopy
55
56 1: testb %bh,%bh /* 2nd byte == 0? */
57 jne 1f
58 subl $3,%ecx
59 jmp .Lcopy
60
61 1: shrl $16,%ebx
62 testb %bl,%bl /* 3rd byte == 0? */
63 jne 1f
64 subl $2,%ecx
65 jmp .Lcopy
66
67 1: testb %bh,%bh /* 4th byte == 0? */
68 jne .Lscan_loop
69 subl $1,%ecx
70
71 /*
72 * Align source to a word boundary.
73 * Consider unrolling loop?
74 */
75 .Lcopy:
76 .Lcopy_align:
77 testl $3,%eax
78 je .Lcopy_aligned
79 movb (%eax),%bl
80 incl %eax
81 movb %bl,(%ecx)
82 incl %ecx
83 testb %bl,%bl
84 jne .Lcopy_align
85 jmp .Ldone
86
87 _ALIGN_TEXT
88 .Lcopy_loop:
89 movl %ebx,(%ecx)
90 addl $4,%ecx
91 .Lcopy_aligned:
92 movl (%eax),%ebx
93 addl $4,%eax
94 leal -0x01010101(%ebx),%edx
95 testl $0x80808080,%edx
96 je .Lcopy_loop
97
98 /*
99 * In rare cases, the above loop may exit prematurely. We must
100 * return to the loop if none of the bytes in the word equal 0.
101 */
102
103 movb %bl,(%ecx)
104 incl %ecx
105 testb %bl,%bl
106 je .Ldone
107
108 movb %bh,(%ecx)
109 incl %ecx
110 testb %bh,%bh
111 je .Ldone
112
113 shrl $16,%ebx
114 movb %bl,(%ecx)
115 incl %ecx
116 testb %bl,%bl
117 je .Ldone
118
119 movb %bh,(%ecx)
120 incl %ecx
121 testb %bh,%bh
122 jne .Lcopy_aligned
123
124 .Ldone:
125 movl 8(%esp),%eax
126 popl %ebx
127 ret
128 END(strcat)
129