strcat.S revision 1.1.50.1 1 /*
2 * Written by J.T. Conklin <jtc (at) acorntoolworks.com>
3 * Public domain.
4 */
5
6 #include <machine/asm.h>
7
8 #if defined(LIBC_SCCS)
9 RCSID("$NetBSD: strcat.S,v 1.1.50.1 2014/05/22 11:26:30 yamt Exp $")
10 #endif
11
12 ENTRY(strcat)
13 movq %rdi,%rax
14 movabsq $0x0101010101010101,%r8
15 movabsq $0x8080808080808080,%r9
16
17 /*
18 * Align destination to word boundary.
19 * Consider unrolling loop?
20 */
21 .Lscan:
22 .Lscan_align:
23 testb $7,%dil
24 je .Lscan_aligned
25 cmpb $0,(%rdi)
26 je .Lcopy
27 incq %rdi
28 jmp .Lscan_align
29
30 _ALIGN_TEXT
31 .Lscan_aligned:
32 .Lscan_loop:
33 movq (%rdi),%rdx
34 addq $8,%rdi
35 subq %r8,%rdx
36 testq %r9,%rdx
37 je .Lscan_loop
38
39 /*
40 * In rare cases, the above loop may exit prematurely. We must
41 * return to the loop if none of the bytes in the word equal 0.
42 */
43
44 cmpb $0,-8(%rdi) /* 1st byte == 0? */
45 jne 1f
46 subq $8,%rdi
47 jmp .Lcopy
48
49 1: cmpb $0,-7(%rdi) /* 2nd byte == 0? */
50 jne 1f
51 subq $7,%rdi
52 jmp .Lcopy
53
54 1: cmpb $0,-6(%rdi) /* 3rd byte == 0? */
55 jne 1f
56 subq $6,%rdi
57 jmp .Lcopy
58
59 1: cmpb $0,-5(%rdi) /* 4th byte == 0? */
60 jne 1f
61 subq $5,%rdi
62 jmp .Lcopy
63
64 1: cmpb $0,-4(%rdi) /* 5th byte == 0? */
65 jne 1f
66 subq $4,%rdi
67 jmp .Lcopy
68
69 1: cmpb $0,-3(%rdi) /* 6th byte == 0? */
70 jne 1f
71 subq $3,%rdi
72 jmp .Lcopy
73
74 1: cmpb $0,-2(%rdi) /* 7th byte == 0? */
75 jne 1f
76 subq $2,%rdi
77 jmp .Lcopy
78
79 1: cmpb $0,-1(%rdi) /* 8th byte == 0? */
80 jne .Lscan_loop
81 subq $1,%rdi
82
83 /*
84 * Align source to a word boundary.
85 * Consider unrolling loop?
86 */
87 .Lcopy:
88 .Lcopy_align:
89 testb $7,%sil
90 je .Lcopy_aligned
91 movb (%rsi),%dl
92 incq %rsi
93 movb %dl,(%rdi)
94 incq %rdi
95 testb %dl,%dl
96 jne .Lcopy_align
97 ret
98
99 _ALIGN_TEXT
100 .Lcopy_loop:
101 movq %rdx,(%rdi)
102 addq $8,%rdi
103 .Lcopy_aligned:
104 movq (%rsi),%rdx
105 movq %rdx,%rcx
106 addq $8,%rsi
107 subq %r8,%rcx
108 testq %r9,%rcx
109 je .Lcopy_loop
110
111 /*
112 * In rare cases, the above loop may exit prematurely. We must
113 * return to the loop if none of the bytes in the word equal 0.
114 */
115
116 movb %dl,(%rdi)
117 incq %rdi
118 testb %dl,%dl /* 1st byte == 0? */
119 je .Ldone
120
121 shrq $8,%rdx
122 movb %dl,(%rdi)
123 incq %rdi
124 testb %dl,%dl /* 2nd byte == 0? */
125 je .Ldone
126
127 shrq $8,%rdx
128 movb %dl,(%rdi)
129 incq %rdi
130 testb %dl,%dl /* 3rd byte == 0? */
131 je .Ldone
132
133 shrq $8,%rdx
134 movb %dl,(%rdi)
135 incq %rdi
136 testb %dl,%dl /* 4th byte == 0? */
137 je .Ldone
138
139 shrq $8,%rdx
140 movb %dl,(%rdi)
141 incq %rdi
142 testb %dl,%dl /* 5th byte == 0? */
143 je .Ldone
144
145 shrq $8,%rdx
146 movb %dl,(%rdi)
147 incq %rdi
148 testb %dl,%dl /* 6th byte == 0? */
149 je .Ldone
150
151 shrq $8,%rdx
152 movb %dl,(%rdi)
153 incq %rdi
154 testb %dl,%dl /* 7th byte == 0? */
155 je .Ldone
156
157 shrq $8,%rdx
158 movb %dl,(%rdi)
159 incq %rdi
160 testb %dl,%dl /* 8th byte == 0? */
161 jne .Lcopy_aligned
162
163 .Ldone:
164 ret
165 END(strcat)
166