memmove.S revision 1.3.26.2 1 /* $NetBSD: memmove.S,v 1.3.26.2 2014/08/19 23:45:12 tls Exp $ */
2
3 /*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <machine/asm.h>
33
34 #ifndef _BCOPY
35 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
36 ENTRY(memmove)
37 #else
38 /* bcopy = memcpy/memmove with arguments reversed. */
39 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
40 ENTRY(bcopy)
41 /* switch the source and destination registers */
42 eor r0, r1, r0
43 eor r1, r0, r1
44 eor r0, r1, r0
45 #endif
46 /* Do the buffers overlap? */
47 cmp r0, r1
48 RETc(eq) /* Bail now if src/dst are the same */
49 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */
50 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */
51 cmp r3, r2 /* if (r3 >= len) we have an overlap */
52 bhs PLT_SYM(_C_LABEL(memcpy))
53
54 /* Determine copy direction */
55 cmp r1, r0
56 bcc .Lmemmove_backwards
57
58 moveq r0, #0 /* Quick abort for len=0 */
59 RETc(eq)
60
61 push {r0, lr} /* memmove() returns dest addr */
62 subs r2, r2, #4
63 blt .Lmemmove_fl4 /* less than 4 bytes */
64 ands r12, r0, #3
65 bne .Lmemmove_fdestul /* oh unaligned destination addr */
66 ands r12, r1, #3
67 bne .Lmemmove_fsrcul /* oh unaligned source addr */
68
69 .Lmemmove_ft8:
70 /* We have aligned source and destination */
71 subs r2, r2, #8
72 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
73 subs r2, r2, #0x14
74 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
75 push {r4} /* borrow r4 */
76
77 /* blat 32 bytes at a time */
78 /* XXX for really big copies perhaps we should use more registers */
79 .Lmemmove_floop32:
80 ldmia r1!, {r3, r4, r12, lr}
81 stmia r0!, {r3, r4, r12, lr}
82 ldmia r1!, {r3, r4, r12, lr}
83 stmia r0!, {r3, r4, r12, lr}
84 subs r2, r2, #0x20
85 bge .Lmemmove_floop32
86
87 cmn r2, #0x10
88 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
89 stmiage r0!, {r3, r4, r12, lr}
90 subge r2, r2, #0x10
91 pop {r4} /* return r4 */
92
93 .Lmemmove_fl32:
94 adds r2, r2, #0x14
95
96 /* blat 12 bytes at a time */
97 .Lmemmove_floop12:
98 ldmiage r1!, {r3, r12, lr}
99 stmiage r0!, {r3, r12, lr}
100 subsge r2, r2, #0x0c
101 bge .Lmemmove_floop12
102
103 .Lmemmove_fl12:
104 adds r2, r2, #8
105 blt .Lmemmove_fl4
106
107 subs r2, r2, #4
108 ldrlt r3, [r1], #4
109 strlt r3, [r0], #4
110 ldmiage r1!, {r3, r12}
111 stmiage r0!, {r3, r12}
112 subge r2, r2, #4
113
114 .Lmemmove_fl4:
115 /* less than 4 bytes to go */
116 adds r2, r2, #4
117 popeq {r0, pc} /* done */
118
119 /* copy the crud byte at a time */
120 cmp r2, #2
121 ldrb r3, [r1], #1
122 strb r3, [r0], #1
123 ldrbge r3, [r1], #1
124 strbge r3, [r0], #1
125 ldrbgt r3, [r1], #1
126 strbgt r3, [r0], #1
127 pop {r0, pc}
128
129 /* erg - unaligned destination */
130 .Lmemmove_fdestul:
131 rsb r12, r12, #4
132 cmp r12, #2
133
134 /* align destination with byte copies */
135 ldrb r3, [r1], #1
136 strb r3, [r0], #1
137 ldrbge r3, [r1], #1
138 strbge r3, [r0], #1
139 ldrbgt r3, [r1], #1
140 strbgt r3, [r0], #1
141 subs r2, r2, r12
142 blt .Lmemmove_fl4 /* less the 4 bytes */
143
144 ands r12, r1, #3
145 beq .Lmemmove_ft8 /* we have an aligned source */
146
147 /* erg - unaligned source */
148 /* This is where it gets nasty ... */
149 .Lmemmove_fsrcul:
150 bic r1, r1, #3
151 ldr lr, [r1], #4
152 cmp r12, #2
153 bgt .Lmemmove_fsrcul3
154 beq .Lmemmove_fsrcul2
155 cmp r2, #0x0c
156 blt .Lmemmove_fsrcul1loop4
157 sub r2, r2, #0x0c
158 push {r4, r5}
159
160 .Lmemmove_fsrcul1loop16:
161 #ifdef __ARMEB__
162 mov r3, lr, lsl #8
163 #else
164 mov r3, lr, lsr #8
165 #endif
166 ldmia r1!, {r4, r5, r12, lr}
167 #ifdef __ARMEB__
168 orr r3, r3, r4, lsr #24
169 mov r4, r4, lsl #8
170 orr r4, r4, r5, lsr #24
171 mov r5, r5, lsl #8
172 orr r5, r5, r12, lsr #24
173 mov r12, r12, lsl #8
174 orr r12, r12, lr, lsr #24
175 #else
176 orr r3, r3, r4, lsl #24
177 mov r4, r4, lsr #8
178 orr r4, r4, r5, lsl #24
179 mov r5, r5, lsr #8
180 orr r5, r5, r12, lsl #24
181 mov r12, r12, lsr #8
182 orr r12, r12, lr, lsl #24
183 #endif
184 stmia r0!, {r3-r5, r12}
185 subs r2, r2, #0x10
186 bge .Lmemmove_fsrcul1loop16
187 pop {r4, r5}
188 adds r2, r2, #0x0c
189 blt .Lmemmove_fsrcul1l4
190
191 .Lmemmove_fsrcul1loop4:
192 #ifdef __ARMEB__
193 mov r12, lr, lsl #8
194 #else
195 mov r12, lr, lsr #8
196 #endif
197 ldr lr, [r1], #4
198 #ifdef __ARMEB__
199 orr r12, r12, lr, lsr #24
200 #else
201 orr r12, r12, lr, lsl #24
202 #endif
203 str r12, [r0], #4
204 subs r2, r2, #4
205 bge .Lmemmove_fsrcul1loop4
206
207 .Lmemmove_fsrcul1l4:
208 sub r1, r1, #3
209 b .Lmemmove_fl4
210
211 .Lmemmove_fsrcul2:
212 cmp r2, #0x0c
213 blt .Lmemmove_fsrcul2loop4
214 sub r2, r2, #0x0c
215 push {r4, r5}
216
217 .Lmemmove_fsrcul2loop16:
218 #ifdef __ARMEB__
219 mov r3, lr, lsl #16
220 #else
221 mov r3, lr, lsr #16
222 #endif
223 ldmia r1!, {r4, r5, r12, lr}
224 #ifdef __ARMEB__
225 orr r3, r3, r4, lsr #16
226 mov r4, r4, lsl #16
227 orr r4, r4, r5, lsr #16
228 mov r5, r5, lsl #16
229 orr r5, r5, r12, lsr #16
230 mov r12, r12, lsl #16
231 orr r12, r12, lr, lsr #16
232 #else
233 orr r3, r3, r4, lsl #16
234 mov r4, r4, lsr #16
235 orr r4, r4, r5, lsl #16
236 mov r5, r5, lsr #16
237 orr r5, r5, r12, lsl #16
238 mov r12, r12, lsr #16
239 orr r12, r12, lr, lsl #16
240 #endif
241 stmia r0!, {r3-r5, r12}
242 subs r2, r2, #0x10
243 bge .Lmemmove_fsrcul2loop16
244 pop {r4, r5}
245 adds r2, r2, #0x0c
246 blt .Lmemmove_fsrcul2l4
247
248 .Lmemmove_fsrcul2loop4:
249 #ifdef __ARMEB__
250 mov r12, lr, lsl #16
251 #else
252 mov r12, lr, lsr #16
253 #endif
254 ldr lr, [r1], #4
255 #ifdef __ARMEB__
256 orr r12, r12, lr, lsr #16
257 #else
258 orr r12, r12, lr, lsl #16
259 #endif
260 str r12, [r0], #4
261 subs r2, r2, #4
262 bge .Lmemmove_fsrcul2loop4
263
264 .Lmemmove_fsrcul2l4:
265 sub r1, r1, #2
266 b .Lmemmove_fl4
267
268 .Lmemmove_fsrcul3:
269 cmp r2, #0x0c
270 blt .Lmemmove_fsrcul3loop4
271 sub r2, r2, #0x0c
272 push {r4, r5}
273
274 .Lmemmove_fsrcul3loop16:
275 #ifdef __ARMEB__
276 mov r3, lr, lsl #24
277 #else
278 mov r3, lr, lsr #24
279 #endif
280 ldmia r1!, {r4, r5, r12, lr}
281 #ifdef __ARMEB__
282 orr r3, r3, r4, lsr #8
283 mov r4, r4, lsl #24
284 orr r4, r4, r5, lsr #8
285 mov r5, r5, lsl #24
286 orr r5, r5, r12, lsr #8
287 mov r12, r12, lsl #24
288 orr r12, r12, lr, lsr #8
289 #else
290 orr r3, r3, r4, lsl #8
291 mov r4, r4, lsr #24
292 orr r4, r4, r5, lsl #8
293 mov r5, r5, lsr #24
294 orr r5, r5, r12, lsl #8
295 mov r12, r12, lsr #24
296 orr r12, r12, lr, lsl #8
297 #endif
298 stmia r0!, {r3-r5, r12}
299 subs r2, r2, #0x10
300 bge .Lmemmove_fsrcul3loop16
301 pop {r4, r5}
302 adds r2, r2, #0x0c
303 blt .Lmemmove_fsrcul3l4
304
305 .Lmemmove_fsrcul3loop4:
306 #ifdef __ARMEB__
307 mov r12, lr, lsl #24
308 #else
309 mov r12, lr, lsr #24
310 #endif
311 ldr lr, [r1], #4
312 #ifdef __ARMEB__
313 orr r12, r12, lr, lsr #8
314 #else
315 orr r12, r12, lr, lsl #8
316 #endif
317 str r12, [r0], #4
318 subs r2, r2, #4
319 bge .Lmemmove_fsrcul3loop4
320
321 .Lmemmove_fsrcul3l4:
322 sub r1, r1, #1
323 b .Lmemmove_fl4
324
325 .Lmemmove_backwards:
326 add r1, r1, r2
327 add r0, r0, r2
328 subs r2, r2, #4
329 blt .Lmemmove_bl4 /* less than 4 bytes */
330 ands r12, r0, #3
331 bne .Lmemmove_bdestul /* oh unaligned destination addr */
332 ands r12, r1, #3
333 bne .Lmemmove_bsrcul /* oh unaligned source addr */
334
335 .Lmemmove_bt8:
336 /* We have aligned source and destination */
337 subs r2, r2, #8
338 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
339 push {r4, lr}
340 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
341 blt .Lmemmove_bl32
342
343 /* blat 32 bytes at a time */
344 /* XXX for really big copies perhaps we should use more registers */
345 .Lmemmove_bloop32:
346 ldmdb r1!, {r3, r4, r12, lr}
347 stmdb r0!, {r3, r4, r12, lr}
348 ldmdb r1!, {r3, r4, r12, lr}
349 stmdb r0!, {r3, r4, r12, lr}
350 subs r2, r2, #0x20
351 bge .Lmemmove_bloop32
352
353 .Lmemmove_bl32:
354 cmn r2, #0x10
355 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
356 stmdbge r0!, {r3, r4, r12, lr}
357 subge r2, r2, #0x10
358 adds r2, r2, #0x14
359 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
360 stmdbge r0!, {r3, r12, lr}
361 subge r2, r2, #0x0c
362 pop {r4, lr}
363
364 .Lmemmove_bl12:
365 adds r2, r2, #8
366 blt .Lmemmove_bl4
367 subs r2, r2, #4
368 ldrlt r3, [r1, #-4]!
369 strlt r3, [r0, #-4]!
370 ldmdbge r1!, {r3, r12}
371 stmdbge r0!, {r3, r12}
372 subge r2, r2, #4
373
374 .Lmemmove_bl4:
375 /* less than 4 bytes to go */
376 adds r2, r2, #4
377 RETc(eq)
378
379 /* copy the crud byte at a time */
380 cmp r2, #2
381 ldrb r3, [r1, #-1]!
382 strb r3, [r0, #-1]!
383 ldrbge r3, [r1, #-1]!
384 strbge r3, [r0, #-1]!
385 ldrbgt r3, [r1, #-1]!
386 strbgt r3, [r0, #-1]!
387 RET
388
389 /* erg - unaligned destination */
390 .Lmemmove_bdestul:
391 cmp r12, #2
392
393 /* align destination with byte copies */
394 ldrb r3, [r1, #-1]!
395 strb r3, [r0, #-1]!
396 ldrbge r3, [r1, #-1]!
397 strbge r3, [r0, #-1]!
398 ldrbgt r3, [r1, #-1]!
399 strbgt r3, [r0, #-1]!
400 subs r2, r2, r12
401 blt .Lmemmove_bl4 /* less than 4 bytes to go */
402 ands r12, r1, #3
403 beq .Lmemmove_bt8 /* we have an aligned source */
404
405 /* erg - unaligned source */
406 /* This is where it gets nasty ... */
407 .Lmemmove_bsrcul:
408 bic r1, r1, #3
409 ldr r3, [r1, #0]
410 cmp r12, #2
411 blt .Lmemmove_bsrcul1
412 beq .Lmemmove_bsrcul2
413 cmp r2, #0x0c
414 blt .Lmemmove_bsrcul3loop4
415 sub r2, r2, #0x0c
416 push {r4, r5, lr}
417
418 .Lmemmove_bsrcul3loop16:
419 #ifdef __ARMEB__
420 mov lr, r3, lsr #8
421 #else
422 mov lr, r3, lsl #8
423 #endif
424 ldmdb r1!, {r3-r5, r12}
425 #ifdef __ARMEB__
426 orr lr, lr, r12, lsl #24
427 mov r12, r12, lsr #8
428 orr r12, r12, r5, lsl #24
429 mov r5, r5, lsr #8
430 orr r5, r5, r4, lsl #24
431 mov r4, r4, lsr #8
432 orr r4, r4, r3, lsl #24
433 #else
434 orr lr, lr, r12, lsr #24
435 mov r12, r12, lsl #8
436 orr r12, r12, r5, lsr #24
437 mov r5, r5, lsl #8
438 orr r5, r5, r4, lsr #24
439 mov r4, r4, lsl #8
440 orr r4, r4, r3, lsr #24
441 #endif
442 stmdb r0!, {r4, r5, r12, lr}
443 subs r2, r2, #0x10
444 bge .Lmemmove_bsrcul3loop16
445 pop {r4, r5, lr}
446 adds r2, r2, #0x0c
447 blt .Lmemmove_bsrcul3l4
448
449 .Lmemmove_bsrcul3loop4:
450 #ifdef __ARMEB__
451 mov r12, r3, lsr #8
452 #else
453 mov r12, r3, lsl #8
454 #endif
455 ldr r3, [r1, #-4]!
456 #ifdef __ARMEB__
457 orr r12, r12, r3, lsl #24
458 #else
459 orr r12, r12, r3, lsr #24
460 #endif
461 str r12, [r0, #-4]!
462 subs r2, r2, #4
463 bge .Lmemmove_bsrcul3loop4
464
465 .Lmemmove_bsrcul3l4:
466 add r1, r1, #3
467 b .Lmemmove_bl4
468
469 .Lmemmove_bsrcul2:
470 cmp r2, #0x0c
471 blt .Lmemmove_bsrcul2loop4
472 sub r2, r2, #0x0c
473 push {r4, r5, lr}
474
475 .Lmemmove_bsrcul2loop16:
476 #ifdef __ARMEB__
477 mov lr, r3, lsr #16
478 #else
479 mov lr, r3, lsl #16
480 #endif
481 ldmdb r1!, {r3-r5, r12}
482 #ifdef __ARMEB__
483 orr lr, lr, r12, lsl #16
484 mov r12, r12, lsr #16
485 orr r12, r12, r5, lsl #16
486 mov r5, r5, lsr #16
487 orr r5, r5, r4, lsl #16
488 mov r4, r4, lsr #16
489 orr r4, r4, r3, lsl #16
490 #else
491 orr lr, lr, r12, lsr #16
492 mov r12, r12, lsl #16
493 orr r12, r12, r5, lsr #16
494 mov r5, r5, lsl #16
495 orr r5, r5, r4, lsr #16
496 mov r4, r4, lsl #16
497 orr r4, r4, r3, lsr #16
498 #endif
499 stmdb r0!, {r4, r5, r12, lr}
500 subs r2, r2, #0x10
501 bge .Lmemmove_bsrcul2loop16
502 pop {r4, r5, lr}
503 adds r2, r2, #0x0c
504 blt .Lmemmove_bsrcul2l4
505
506 .Lmemmove_bsrcul2loop4:
507 #ifdef __ARMEB__
508 mov r12, r3, lsr #16
509 #else
510 mov r12, r3, lsl #16
511 #endif
512 ldr r3, [r1, #-4]!
513 #ifdef __ARMEB__
514 orr r12, r12, r3, lsl #16
515 #else
516 orr r12, r12, r3, lsr #16
517 #endif
518 str r12, [r0, #-4]!
519 subs r2, r2, #4
520 bge .Lmemmove_bsrcul2loop4
521
522 .Lmemmove_bsrcul2l4:
523 add r1, r1, #2
524 b .Lmemmove_bl4
525
526 .Lmemmove_bsrcul1:
527 cmp r2, #0x0c
528 blt .Lmemmove_bsrcul1loop4
529 sub r2, r2, #0x0c
530 push {r4, r5, lr}
531
532 .Lmemmove_bsrcul1loop32:
533 #ifdef __ARMEB__
534 mov lr, r3, lsr #24
535 #else
536 mov lr, r3, lsl #24
537 #endif
538 ldmdb r1!, {r3-r5, r12}
539 #ifdef __ARMEB__
540 orr lr, lr, r12, lsl #8
541 mov r12, r12, lsr #24
542 orr r12, r12, r5, lsl #8
543 mov r5, r5, lsr #24
544 orr r5, r5, r4, lsl #8
545 mov r4, r4, lsr #24
546 orr r4, r4, r3, lsl #8
547 #else
548 orr lr, lr, r12, lsr #8
549 mov r12, r12, lsl #24
550 orr r12, r12, r5, lsr #8
551 mov r5, r5, lsl #24
552 orr r5, r5, r4, lsr #8
553 mov r4, r4, lsl #24
554 orr r4, r4, r3, lsr #8
555 #endif
556 stmdb r0!, {r4, r5, r12, lr}
557 subs r2, r2, #0x10
558 bge .Lmemmove_bsrcul1loop32
559 pop {r4, r5, lr}
560 adds r2, r2, #0x0c
561 blt .Lmemmove_bsrcul1l4
562
563 .Lmemmove_bsrcul1loop4:
564 #ifdef __ARMEB__
565 mov r12, r3, lsr #24
566 #else
567 mov r12, r3, lsl #24
568 #endif
569 ldr r3, [r1, #-4]!
570 #ifdef __ARMEB__
571 orr r12, r12, r3, lsl #8
572 #else
573 orr r12, r12, r3, lsr #8
574 #endif
575 str r12, [r0, #-4]!
576 subs r2, r2, #4
577 bge .Lmemmove_bsrcul1loop4
578
579 .Lmemmove_bsrcul1l4:
580 add r1, r1, #1
581 b .Lmemmove_bl4
582 #ifndef _BCOPY
583 END(memmove)
584 #else
585 END(bcopy)
586 #endif
587
588 #if defined(__ARM_EABI__) && !defined(BCOPY)
589 STRONG_ALIAS(__aeabi_memmove, memmove)
590 STRONG_ALIAS(__aeabi_memmove4, memmove)
591 STRONG_ALIAS(__aeabi_memmove8, memmove)
592 #endif
593