strcpy_arm.S revision 1.1 1 /*-
2 * Copyright (c) 2013 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas of 3am Software Foundry.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <machine/asm.h>
31
32 RCSID("$NetBSD: strcpy_arm.S,v 1.1 2013/01/08 13:17:45 matt Exp $")
33
34 #ifdef STRLCPY
35 #ifdef _LIBC
36 WEAK_ALIAS(strlcpy, _strlcpy)
37 #endif
38 #define FUNCNAME strlcpy
39 #elif defined(STRNCPY)
40 #define FUNCNAME strncpy
41 #else
42 #define FUNCNAME strcpy
43 #endif
44
45 #ifdef _LIBC
46 #include "namespace.h"
47 #endif
48
49 #ifdef __ARMEL__
50 #define lslo lsr /* shift to lower address */
51 #define lshi lsl /* shift to higher address */
52 #define BYTE0 0x000000ff
53 #define BYTE1 0x0000ff00
54 #define BYTE2 0x00ff0000
55 #define BYTE3 0xff000000
56 #else
57 #define lslo lsl /* shift to lower address */
58 #define lshi lsr /* shift to higher address */
59 #define BYTE0 0xff000000
60 #define BYTE1 0x00ff0000
61 #define BYTE2 0x0000ff00
62 #define BYTE3 0x000000ff
63 #endif
64
65 /*
66 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte,
67 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8)
68 * instruction. For every non-NUL byte, the result for that byte will become
69 * 255. For NUL, it will be 254. When we complement the result of all 4 adds,
70 * if the result is non-0 then we must have encountered a NUL.
71 *
72 * For earlier architecture, we just use tst on all 4 bytes. There are other
73 * algorithms to detect NULs but they take longer and use more instructions.
74 */
75
76 /*
77 * char *strcpy(char *dst, const char *src);
78 * char *strncpy(char *dst, const char *src, size_t len);
79 * size_t strlcpy(char *dst, const char *src, size_t len);
80 */
81
82 .text
83 ENTRY(FUNCNAME)
84 #if defined(STRLCPY)
85 cmp r2, #1 /* is length 1 or less? */
86 bhi 1f /* no, do normal */
87 moveq r3, #0 /* = 1? load NUL */
88 streqb r3, [r0] /* = 1? write NUL to dst */
89 mov r0, r1 /* move src to r0 */
90 b PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */
91 1:
92 sub r2, r2, #1 /* leave one byte for NUL */
93 #endif
94 #if defined(STRNCPY)
95 cmp r2, #0 /* 0 length? */
96 RETc(eq) /* yes, just return */
97 #endif
98 push {r4-r9} /* save some registers */
99 #ifdef _ARM_ARCH_6
100 #ifdef _ARM_ARCH_7
101 movw r7, #0xfefe /* magic constant; 254 in each byte */
102 #else
103 mov r7, #0xfe /* put 254 in low byte */
104 orr r7, r7, r7, lsl #8 /* move to next byte */
105 #endif
106 orr r7, r7, r7, lsl #16 /* move to next halfword */
107 #endif
108
109 #if defined(STRLCPY)
110 add r6, r1, #1 /* save for return (deal with NUL) */
111 #else
112 mov r6, r0 /* save for return */
113 #endif
114
115 .Ldst_align:
116 tst r0, #3 /* check for dst alignment */
117 beq .Ldst_aligned /* ok, proceed to next check */
118 ldrb r5, [r1], #1 /* load a byte */
119 #if defined(STRNCPY)
120 subs r2, r2, #1 /* subtract out from count */
121 bmi .Ldst_full /* zero? the dst has no more room */
122 #endif
123 strb r5, [r0], #1 /* store a byte */
124 teq r5, #0 /* was it a NUL? */
125 beq .Lend_of_string /* yes, we are done */
126 #if defined(STRLCPY)
127 subs r2, r2, #1 /* subtract one from count */
128 streqb r2, [r0], #1 /* zero? write trailing NUL */
129 beq .Ldst_full /* zero? the dst has no more room */
130 #endif
131 b .Ldst_align /* loop around for next byte */
132 .Ldst_aligned:
133 tst r1, #3 /* get the misalignment of src */
134 bne .Lincongruent /* !=? incongruent (slower) */
135
136 /* =? congruent (faster) */
137
138 .Lcongruent:
139 #if defined(STRLCPY)
140 add r6, r6, #3 /* compensate for word post-inc */
141 #endif
142 b .Lcongruent_mainloop_load
143 .Lcongruent_mainloop:
144 #if defined(STRLCPY) || defined(STRNCPY)
145 subs r2, r2, #4 /* subtract 4 from the count */
146 bmi .Lno_more_room
147 #endif
148 str r5, [r0], #4 /* store word into dst */
149 #if defined(STRLCPY)
150 beq .Lno_more_room /* count is 0? no room in dst */
151 #endif
152 #if defined(STRNCPY)
153 beq .Ldst_full_word_aligned /* count is 0? no room in dst */
154 #endif
155 .Lcongruent_mainloop_load:
156 ldr r5, [r1], #4 /* load word from source */
157 #if defined(_ARM_ARCH_6)
158 uqadd8 r3, r5, r7 /* magic happens here */
159 mvns r3, r3 /* is the complemented result 0? */
160 beq .Lcongruent_mainloop /* yes, no NULs, do it again */
161 #else
162 tst r5, #BYTE0 /* does byte 0 contain a NUL? */
163 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */
164 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */
165 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */
166 bne .Lcongruent_mainloop /* yes, no NULs, do it again */
167 #endif
168 #if defined(STRLCPY) && 0
169 sub r1, r1, #3 /* back up src pointer */
170 #endif
171 #if defined(_ARM_ARCH_6)
172 #ifdef __ARMEL__
173 rev r3, r3 /* CLZ needs BE data */
174 #endif
175 clz r3, r3 /* count leading zeros */
176 #else
177 mov r3, #0 /* assume NUL is in byte 0 */
178 tst r5, #BYTE0 /* is NUL in byte 2? */
179 beq .Lcongruent_last_bytes /* yes, done searching. */
180 mov r3, #8 /* assume NUL is in byte 1 */
181 tst r5, #BYTE1 /* is NUL in byte 2? */
182 beq .Lcongruent_last_bytes /* yes, done searching. */
183 mov r3, #16 /* assume NUL is in byte 2 */
184 tst r5, #BYTE2 /* is NUL in byte 2? */
185 #if !defined(STRLCPY)
186 beq .Lcongruent_last_bytes /* yes, done searching. */
187 mov r3, #24 /* NUL must be in byte 3 */
188 #else
189 movne r3, #24 /* no, then NUL is in byte 3 */
190 #endif
191 #endif /* _ARM_ARCH_6 */
192 #if defined(STRLCPY)
193 .Lcongruent_last_bytes:
194 #endif
195 #if defined(STRLCPY)
196 add r1, r1, r3, lsr #3 /* position to point at NUL + 4 */
197 #endif
198 b .Llast_bytes /* store the last bytes */
199
200
201 .Lincongruent:
202 /*
203 * At this point dst is word aligned by src is not. Read bytes
204 * from src until it is read aligned.
205 */
206 and r3, r1, #3 /* extract misalignment */
207 mov r9, r3, lsl #3 /* calculate discard shift */
208 rsb r8, r9, #32 /* calculate insertion shift */
209 #if defined(STRLCPY)
210 add r6, r6, #3 /* compensate for word post-inc */
211 #endif
212 bic r1, r1, #3 /* word align src */
213 ldr r5, [r1], #4 /* load word frm src */
214 mov r4, r5, lslo r9 /* discard lo bytes from src */
215 tst r4, #BYTE0 /* does byte 0 contain a NUL? */
216 #if defined(STRNCPY)
217 beq .Lend_of_string /* yes, zero fill rest of string */
218 #else
219 moveq r3, r9 /* yes, set offset */
220 beq .Lincongruent_end_of_string /* yes, deal with the last bytes */
221 #endif
222 /*
223 * To make our test for NULs below do not generate false positives,
224 * fill the bytes in the word we don't want to match with all 1s.
225 */
226 mvn r3, #0 /* create a mask */
227 mov r3, r3, lslo r8 /* zero out byte being kept */
228 orr r3, r3, r5 /* merge src and mask */
229 #ifdef _ARM_ARCH_6
230 uqadd8 r3, r3, r7 /* NUL detection magic happens */
231 mvns r3, r3 /* is the complemented result 0? */
232 beq .Lincongruent_mainloop_load /* yes, no NUL encountered! */
233 #ifdef __ARMEL__
234 rev r3, r3 /* CLZ wants BE input */
235 #endif
236 clz r3, r3 /* count leading zeros */
237 #else
238 /*
239 * We already tested for byte 0 above so we don't need to it again.
240 */
241 mov r3, #24 /* assume NUL is in byte 3 */
242 tst r5, #BYTE1 /* did we find a NUL in byte 1? */
243 subeq r3, r3, #8 /* yes, decremnt byte position */
244 tstne r5, #BYTE2 /* no, did we find a NUL in byte 2? */
245 subeq r3, r3, #8 /* yes, decremnt byte position */
246 tstne r5, #BYTE3 /* no, did we find a NUL in byte 3? */
247 bne .Lincongruent_mainloop_load /* no, no NUL encountered! */
248 #endif
249 mov r5, r4 /* discard already dealt with bytes */
250 .Lincongruent_end_of_string:
251 #if defined(STRLCPY)
252 add r1, r1, r3, lsr #3 /* then add offset to NUL */
253 #endif
254 sub r3, r3, r9 /* adjust NUL offset */
255 b .Llast_bytes /* NUL encountered! finish up */
256
257 #if defined(STRLCPY) || defined(STRNCPY)
258 .Lincongruent_no_more_room:
259 mov r5, r4 /* move data to be stored to r5 */
260 b .Lno_more_room /* fill remaining space */
261 #endif /* STRLCPY || STRNCPY */
262
263 /*
264 * At this point both dst and src are word aligned and r4 contains
265 * partial contents from src.
266 */
267 .Lincongruent_mainloop:
268 orr r4, r4, r5, lshi r8 /* put new src data into dst word */
269 #if defined(STRLCPY) || defined(STRNCPY)
270 subs r2, r2, #4 /* subtract 4 from count */
271 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */
272 #endif
273 str r4, [r0], #4 /* store word in dst */
274 #if defined(STRLCPY)
275 beq .Lno_more_room /* space left is 0? stop copy */
276 #endif
277 #if defined(STRNCPY)
278 beq .Ldst_full_word_aligned /* space left is 0? stop copy */
279 #endif
280 mov r4, r5, lslo r9 /* move rest of src into dst word */
281 .Lincongruent_mainloop_load:
282 ldr r5, [r1], #4 /* read src */
283 #ifdef _ARM_ARCH_6
284 uqadd8 r3, r5, r7 /* magic happens here */
285 mvns r3, r3 /* is the complemented result 0? */
286 beq .Lincongruent_mainloop /* yes, no NUL encountered! */
287 /*
288 * fall into this since we encountered a NULL. At this point we have
289 * from 1-5 bytes (excluding trailing NUL) to write.
290 */
291 #ifdef __ARMEL__
292 rev r3, r3 /* CLZ works on BE data */
293 #endif
294 clz r3, r3 /* count leading zeroes */
295 #else
296 tst r5, #BYTE0 /* does byte 0 contain a NUL? */
297 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */
298 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */
299 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */
300 bne .Lincongruent_mainloop /* no, no NUL encountered! */
301 /*
302 * fall into this since we encountered a NULL. At this point we have
303 * from 1-5 bytes (excluding trailing NUL) to write.
304 */
305 mov r3, #0 /* assume a NUL is in byte 0 */
306 tst r5, #BYTE0 /* is there a NUL in byte 0? */
307 beq 1f /* yes, found a NUL! */
308 mov r3, #8 /* assume a NUL is in byte 1 */
309 tst r5, #BYTE1 /* is there a NUL in byte 0? */
310 beq 1f /* yes, found a NUL! */
311 tst r5, #BYTE2 /* is there a NUL in byte 2? */
312 moveq r3, #16 /* yes, mark its position */
313 movne r3, #24 /* no, it must be in byte 3 */
314 1:
315 #endif
316 orr r4, r4, r5, lshi r8 /* merge new and old src words */
317 #if defined(STRLCPY)
318 add r1, r1, r3, lsr #3 /* adjust src to point to NUL */
319 #endif
320 add r3, r3, r8 /* add remainder bytes worth */
321 cmp r3, #32 /* do we have at least one word to write? */
322 movlt r5, r4 /* no, move source bytes to expected reg */
323 blt .Llast_bytes /* no, deal with them */
324 #if defined(STRLCPY)
325 subs r2, r2, #4 /* subtract 4 from count */
326 bpl 1f /* we have space for at least 4 */
327 /*
328 * Since the space just went minus, we don't have enough room to
329 * write all 4 bytes. In fact, the most we can write is 3 so just
330 * just lie and say we have 3 bytes to write and discard the rest.
331 */
332 add r2, r2, #4 /* add 4 back */
333 mov r3, #24 /* say we have 3 bytes */
334 mov r5, r4 /* discard the bytes we can't store */
335 b .Llast_bytes /* and treat this as our last word */
336 1:
337 #elif defined(STRNCPY)
338 subs r2, r2, #4 /* subtract 4 from count */
339 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */
340 #endif
341 str r4, [r0], #4 /* store dst word */
342 #if defined(STRNCPY)
343 beq .Ldst_full_word_aligned /* space left is 0? stop copy */
344 #endif
345 #if defined(STRLCPY)
346 bne 1f /* we still have space remaining */
347 strb r2, [r0] /* write final NUL */
348 b .Lend_of_string /* we are done */
349 1:
350 #endif
351 /*
352 * Subtract the 32 bits just written from the number of bits left
353 * to write. If 0 bits are left and not doing strncpy, just write
354 * the trailing NUL and be done.
355 */
356 subs r3, r3, #32 /* we wrote one word */
357 #if !defined(STRNCPY)
358 bne 1f /* no more data? */
359 strb r3, [r0] /* write final NUL */
360 b .Lend_of_string /* we are done */
361 1:
362 #endif
363 /*
364 * At this point after writing 4 bytes, we have 0 or 1 bytes left to
365 * write (excluding the trailing NUL).
366 */
367 mov r5, r5, lslo r9 /* get remainder of src */
368
369 /* fall into .Llast_bytes */
370
371 #if !defined(STRLCPY)
372 .Lcongruent_last_bytes:
373 #endif
374 .Llast_bytes:
375 /*
376 * r5 contains the last word and is in host byte order.
377 * r3 contains number of bits left to copy (0..31).
378 * r1 should point to the NUL + 4.
379 */
380 bics ip, r3, #7 /* truncate bits, is result 0? */
381 #if !defined(STRNCPY)
382 bne 1f /* no, have to write some bytes */
383 strb ip, [r0] /* yes, write trailing NUL */
384 b .Lend_of_string /* yes, and we are the end */
385 1:
386 #endif
387 #if defined(STRLCPY) || defined(STRNCPY)
388 cmp r2, ip, lsr #3 /* is there enough room? */
389 movlt ip, r2, lsl #3 /* no, only fill remaining space */
390 #endif
391 mvn r3, #0 /* create a mask */
392 mov r3, r3, lshi ip /* clear leading bytes */
393 bic r5, r5, r3 /* clear trailing bytes */
394 #if defined(STRNCPY)
395 cmp r2, #4 /* room for 4 bytes? */
396 movge ip, #32 /* yes, we will write 4 bytes */
397 bge 2f /* yes, and go do it */
398 mvn r3, #0 /* create a mask (again) */
399 mov ip, r2, lsl #3 /* remaining space bytes -> bits */
400 mov r3, r3, lshi ip /* clear remaining bytes */
401 #elif defined(STRLCPY)
402 cmp r2, #3 /* do we have room for 3 bytes & NUL? */
403 bge 2f /* yes, just clear out dst */
404 mov r3, r3, lshi #8 /* mask out trailing NUL */
405 #else
406 cmp ip, #24 /* are we writing 3 bytes & a NUL? */
407 bge 2f /* yes, just overwrite dst */
408 mov r3, r3, lshi #8 /* mask out trailing NUL */
409 #endif /* !STRNCPY */
410 ldr r4, [r0] /* fetch dst word */
411 and r4, r4, r3 /* preserve trailing bytes */
412 orr r5, r5, r4 /* merge dst with src */
413 2: str r5, [r0], #4 /* store last word */
414 #if defined(STRNCPY)
415 subs r2, r2, ip, lsr #3 /* subtract bytes cleared from count */
416 beq .Ldst_full_word_aligned
417 #endif
418 b .Lend_of_string
419
420 #if defined(STRLCPY) || defined(STRNCPY)
421 .Lno_more_room:
422 #if defined(STRLCPY)
423 cmp r2, #-1 /* tried to write 3 bytes? */
424 blt 1f /* less, partial word write */
425 cmp r2, #0 /* no space left? */
426 streqb r2, [r0] /* write the final NUL */
427 bicne r5, r5, #BYTE3 /* clear trailing NUL */
428 strne r5, [r0] /* write last word */
429 b .Ldst_full_word_aligned /* the dst buffer is full */
430 1:
431 #endif /* STRLCPY */
432 add r2, r2, #4 /* restore remaining space */
433 ldr r4, [r0] /* load dst */
434 mvn r3, #0 /* create a mask */
435 mov r2, r2, lsl #3 /* bytes -> bits */
436 mov r3, r3, lshi r2 /* clear leading bytes */
437 bic r5, r5, r3 /* clear trailing bytes from src */
438 #if defined(STRLCPY)
439 mov r3, r3, lshi #8 /* mask out trailing NUL */
440 #endif /* STRLCPY */
441 and r4, r4, r3 /* preserve trailing bytes in dst */
442 orr r4, r4, r5 /* merge src with dst */
443 str r4, [r0], #4 /* write last word */
444 b .Ldst_full_word_aligned
445 #endif /* STRLCPY || STRNCPY */
446
447 #if defined(STRLCPY)
448 /*
449 * Destination was filled (and NUL terminated).
450 * All that's left is count the number of bytes left in src.
451 */
452 .Ldst_full:
453 1: tst r1, #3 /* dst word aligned? */
454 beq 2f /* yes, so do it word by word */
455 ldrb r5, [r1], #1 /* load next byte */
456 teq r5, #0 /* is it a NUL? */
457 bne 1b /* no, check alignment */
458 b .Lend_of_string /* and return */
459 2: add r6, r6, #3 /* compensate for post-inc */
460 .Ldst_full_word_aligned:
461 3: ldr r5, [r1], #4 /* load word from src */
462 #ifdef _ARM_ARCH_6
463 uqadd8 r5, r5, r7 /* perform NUL magic */
464 mvns r5, r5 /* complement all 0s? */
465 beq 3b /* yes, no NUL so get next word */
466 #else
467 tst r5, #BYTE0 /* does byte 0 contain a NUL? */
468 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */
469 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */
470 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */
471 bne 3b /* no, no NUL encountered! */
472 #endif
473 #ifdef _ARM_ARCH_6
474 #ifdef __ARMEL__
475 rev r5, r5 /* CLZ needs BE data */
476 #endif
477 clz r5, r5 /* count leading zeros */
478 add r1, r1, r5, lsr #3 /* add offset to NUL to src pointer */
479 #else
480 tst r5, #BYTE0 /* is there a NUL in byte 0? */
481 beq 4f /* yes, don't check any further */
482 add r1, r1, #1 /* no, advance src pointer by 1 */
483 tst r5, #BYTE1 /* is there a NUL in byte 1? */
484 beq 4f /* yes, don't check any further */
485 add r1, r1, #1 /* no, advance src pointer by 1 */
486 tst r5, #BYTE2 /* is there a NUL in byte 2? */
487 addne r1, r1, #1 /* no, there must be in byte 3 */
488 4:
489 #endif /* _ARM_ARCH_6 */
490 .Lend_of_string:
491 sub r0, r1, r6 /* subtract start from finish */
492 pop {r4-r9} /* restore registers */
493 RET
494 #elif defined(STRNCPY)
495 .Lend_of_string:
496 teq r2, #0 /* any bytes left to zero? */
497 beq 3f /* no, just return. */
498 mov r1, #0 /* yes, prepare to zero */
499 cmp r2, #16 /* some, but not a lot? */
500 ble 1f
501 mov r4, lr /* preserve lr */
502 bl PLT_SYM(_C_LABEL(memset)) /* yes, and let memset do it */
503 mov lr, r4 /* restore lr */
504 b 3f /* return */
505 1: add ip, r0, r2 /* calculate stopping point */
506 2: strb r1, [r0], #1 /* clear a byte */
507 cmp r0, ip /* done? */
508 blt 2b /* no, clear next byte */
509 3: mov r0, r6 /* restore dst pointer */
510 pop {r4-r9} /* restore registers */
511 RET
512 .Ldst_full:
513 .Ldst_full_word_aligned:
514 /*
515 * Destination was filled (but not NUL terminated).
516 * All that's left is return the start of dst
517 */
518 mov r0, r6 /* restore dst pointer */
519 pop {r4-r9} /* restore registers */
520 RET
521 #else
522 .Lend_of_string:
523 mov r0, r6 /* restore dst pointer */
524 pop {r4-r9} /* restore registers */
525 RET
526 #endif
527 END(FUNCNAME)
528