memmove.S revision 1.5 1 /* $NetBSD: memmove.S,v 1.5 2013/08/11 04:56:32 matt Exp $ */
2
3 /*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <machine/asm.h>
33
34 #if defined(__ARM_EABI__) && !defined(BCOPY)
35 STRONG_ALIAS(__aeabi_memmove, memmove)
36 #endif
37
38 #ifndef _BCOPY
39 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
40 ENTRY(memmove)
41 #else
42 /* bcopy = memcpy/memmove with arguments reversed. */
43 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
44 ENTRY(bcopy)
45 /* switch the source and destination registers */
46 eor r0, r1, r0
47 eor r1, r0, r1
48 eor r0, r1, r0
49 #endif
50 /* Do the buffers overlap? */
51 cmp r0, r1
52 RETc(eq) /* Bail now if src/dst are the same */
53 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */
54 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */
55 cmp r3, r2 /* if (r3 >= len) we have an overlap */
56 bhs PIC_SYM(_C_LABEL(memcpy), PLT)
57
58 /* Determine copy direction */
59 cmp r1, r0
60 bcc .Lmemmove_backwards
61
62 moveq r0, #0 /* Quick abort for len=0 */
63 RETc(eq)
64
65 push {r0, lr} /* memmove() returns dest addr */
66 subs r2, r2, #4
67 blt .Lmemmove_fl4 /* less than 4 bytes */
68 ands r12, r0, #3
69 bne .Lmemmove_fdestul /* oh unaligned destination addr */
70 ands r12, r1, #3
71 bne .Lmemmove_fsrcul /* oh unaligned source addr */
72
73 .Lmemmove_ft8:
74 /* We have aligned source and destination */
75 subs r2, r2, #8
76 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
77 subs r2, r2, #0x14
78 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
79 push {r4} /* borrow r4 */
80
81 /* blat 32 bytes at a time */
82 /* XXX for really big copies perhaps we should use more registers */
83 .Lmemmove_floop32:
84 ldmia r1!, {r3, r4, r12, lr}
85 stmia r0!, {r3, r4, r12, lr}
86 ldmia r1!, {r3, r4, r12, lr}
87 stmia r0!, {r3, r4, r12, lr}
88 subs r2, r2, #0x20
89 bge .Lmemmove_floop32
90
91 cmn r2, #0x10
92 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
93 stmiage r0!, {r3, r4, r12, lr}
94 subge r2, r2, #0x10
95 pop {r4} /* return r4 */
96
97 .Lmemmove_fl32:
98 adds r2, r2, #0x14
99
100 /* blat 12 bytes at a time */
101 .Lmemmove_floop12:
102 ldmiage r1!, {r3, r12, lr}
103 stmiage r0!, {r3, r12, lr}
104 subsge r2, r2, #0x0c
105 bge .Lmemmove_floop12
106
107 .Lmemmove_fl12:
108 adds r2, r2, #8
109 blt .Lmemmove_fl4
110
111 subs r2, r2, #4
112 ldrlt r3, [r1], #4
113 strlt r3, [r0], #4
114 ldmiage r1!, {r3, r12}
115 stmiage r0!, {r3, r12}
116 subge r2, r2, #4
117
118 .Lmemmove_fl4:
119 /* less than 4 bytes to go */
120 adds r2, r2, #4
121 popeq {r0, pc} /* done */
122
123 /* copy the crud byte at a time */
124 cmp r2, #2
125 ldrb r3, [r1], #1
126 strb r3, [r0], #1
127 ldrbge r3, [r1], #1
128 strbge r3, [r0], #1
129 ldrbgt r3, [r1], #1
130 strbgt r3, [r0], #1
131 pop {r0, pc}
132
133 /* erg - unaligned destination */
134 .Lmemmove_fdestul:
135 rsb r12, r12, #4
136 cmp r12, #2
137
138 /* align destination with byte copies */
139 ldrb r3, [r1], #1
140 strb r3, [r0], #1
141 ldrbge r3, [r1], #1
142 strbge r3, [r0], #1
143 ldrbgt r3, [r1], #1
144 strbgt r3, [r0], #1
145 subs r2, r2, r12
146 blt .Lmemmove_fl4 /* less the 4 bytes */
147
148 ands r12, r1, #3
149 beq .Lmemmove_ft8 /* we have an aligned source */
150
151 /* erg - unaligned source */
152 /* This is where it gets nasty ... */
153 .Lmemmove_fsrcul:
154 bic r1, r1, #3
155 ldr lr, [r1], #4
156 cmp r12, #2
157 bgt .Lmemmove_fsrcul3
158 beq .Lmemmove_fsrcul2
159 cmp r2, #0x0c
160 blt .Lmemmove_fsrcul1loop4
161 sub r2, r2, #0x0c
162 push {r4, r5}
163
164 .Lmemmove_fsrcul1loop16:
165 #ifdef __ARMEB__
166 mov r3, lr, lsl #8
167 #else
168 mov r3, lr, lsr #8
169 #endif
170 ldmia r1!, {r4, r5, r12, lr}
171 #ifdef __ARMEB__
172 orr r3, r3, r4, lsr #24
173 mov r4, r4, lsl #8
174 orr r4, r4, r5, lsr #24
175 mov r5, r5, lsl #8
176 orr r5, r5, r12, lsr #24
177 mov r12, r12, lsl #8
178 orr r12, r12, lr, lsr #24
179 #else
180 orr r3, r3, r4, lsl #24
181 mov r4, r4, lsr #8
182 orr r4, r4, r5, lsl #24
183 mov r5, r5, lsr #8
184 orr r5, r5, r12, lsl #24
185 mov r12, r12, lsr #8
186 orr r12, r12, lr, lsl #24
187 #endif
188 stmia r0!, {r3-r5, r12}
189 subs r2, r2, #0x10
190 bge .Lmemmove_fsrcul1loop16
191 pop {r4, r5}
192 adds r2, r2, #0x0c
193 blt .Lmemmove_fsrcul1l4
194
195 .Lmemmove_fsrcul1loop4:
196 #ifdef __ARMEB__
197 mov r12, lr, lsl #8
198 #else
199 mov r12, lr, lsr #8
200 #endif
201 ldr lr, [r1], #4
202 #ifdef __ARMEB__
203 orr r12, r12, lr, lsr #24
204 #else
205 orr r12, r12, lr, lsl #24
206 #endif
207 str r12, [r0], #4
208 subs r2, r2, #4
209 bge .Lmemmove_fsrcul1loop4
210
211 .Lmemmove_fsrcul1l4:
212 sub r1, r1, #3
213 b .Lmemmove_fl4
214
215 .Lmemmove_fsrcul2:
216 cmp r2, #0x0c
217 blt .Lmemmove_fsrcul2loop4
218 sub r2, r2, #0x0c
219 push {r4, r5}
220
221 .Lmemmove_fsrcul2loop16:
222 #ifdef __ARMEB__
223 mov r3, lr, lsl #16
224 #else
225 mov r3, lr, lsr #16
226 #endif
227 ldmia r1!, {r4, r5, r12, lr}
228 #ifdef __ARMEB__
229 orr r3, r3, r4, lsr #16
230 mov r4, r4, lsl #16
231 orr r4, r4, r5, lsr #16
232 mov r5, r5, lsl #16
233 orr r5, r5, r12, lsr #16
234 mov r12, r12, lsl #16
235 orr r12, r12, lr, lsr #16
236 #else
237 orr r3, r3, r4, lsl #16
238 mov r4, r4, lsr #16
239 orr r4, r4, r5, lsl #16
240 mov r5, r5, lsr #16
241 orr r5, r5, r12, lsl #16
242 mov r12, r12, lsr #16
243 orr r12, r12, lr, lsl #16
244 #endif
245 stmia r0!, {r3-r5, r12}
246 subs r2, r2, #0x10
247 bge .Lmemmove_fsrcul2loop16
248 pop {r4, r5}
249 adds r2, r2, #0x0c
250 blt .Lmemmove_fsrcul2l4
251
252 .Lmemmove_fsrcul2loop4:
253 #ifdef __ARMEB__
254 mov r12, lr, lsl #16
255 #else
256 mov r12, lr, lsr #16
257 #endif
258 ldr lr, [r1], #4
259 #ifdef __ARMEB__
260 orr r12, r12, lr, lsr #16
261 #else
262 orr r12, r12, lr, lsl #16
263 #endif
264 str r12, [r0], #4
265 subs r2, r2, #4
266 bge .Lmemmove_fsrcul2loop4
267
268 .Lmemmove_fsrcul2l4:
269 sub r1, r1, #2
270 b .Lmemmove_fl4
271
272 .Lmemmove_fsrcul3:
273 cmp r2, #0x0c
274 blt .Lmemmove_fsrcul3loop4
275 sub r2, r2, #0x0c
276 push {r4, r5}
277
278 .Lmemmove_fsrcul3loop16:
279 #ifdef __ARMEB__
280 mov r3, lr, lsl #24
281 #else
282 mov r3, lr, lsr #24
283 #endif
284 ldmia r1!, {r4, r5, r12, lr}
285 #ifdef __ARMEB__
286 orr r3, r3, r4, lsr #8
287 mov r4, r4, lsl #24
288 orr r4, r4, r5, lsr #8
289 mov r5, r5, lsl #24
290 orr r5, r5, r12, lsr #8
291 mov r12, r12, lsl #24
292 orr r12, r12, lr, lsr #8
293 #else
294 orr r3, r3, r4, lsl #8
295 mov r4, r4, lsr #24
296 orr r4, r4, r5, lsl #8
297 mov r5, r5, lsr #24
298 orr r5, r5, r12, lsl #8
299 mov r12, r12, lsr #24
300 orr r12, r12, lr, lsl #8
301 #endif
302 stmia r0!, {r3-r5, r12}
303 subs r2, r2, #0x10
304 bge .Lmemmove_fsrcul3loop16
305 pop {r4, r5}
306 adds r2, r2, #0x0c
307 blt .Lmemmove_fsrcul3l4
308
309 .Lmemmove_fsrcul3loop4:
310 #ifdef __ARMEB__
311 mov r12, lr, lsl #24
312 #else
313 mov r12, lr, lsr #24
314 #endif
315 ldr lr, [r1], #4
316 #ifdef __ARMEB__
317 orr r12, r12, lr, lsr #8
318 #else
319 orr r12, r12, lr, lsl #8
320 #endif
321 str r12, [r0], #4
322 subs r2, r2, #4
323 bge .Lmemmove_fsrcul3loop4
324
325 .Lmemmove_fsrcul3l4:
326 sub r1, r1, #1
327 b .Lmemmove_fl4
328
329 .Lmemmove_backwards:
330 add r1, r1, r2
331 add r0, r0, r2
332 subs r2, r2, #4
333 blt .Lmemmove_bl4 /* less than 4 bytes */
334 ands r12, r0, #3
335 bne .Lmemmove_bdestul /* oh unaligned destination addr */
336 ands r12, r1, #3
337 bne .Lmemmove_bsrcul /* oh unaligned source addr */
338
339 .Lmemmove_bt8:
340 /* We have aligned source and destination */
341 subs r2, r2, #8
342 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
343 push {r4, lr}
344 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
345 blt .Lmemmove_bl32
346
347 /* blat 32 bytes at a time */
348 /* XXX for really big copies perhaps we should use more registers */
349 .Lmemmove_bloop32:
350 ldmdb r1!, {r3, r4, r12, lr}
351 stmdb r0!, {r3, r4, r12, lr}
352 ldmdb r1!, {r3, r4, r12, lr}
353 stmdb r0!, {r3, r4, r12, lr}
354 subs r2, r2, #0x20
355 bge .Lmemmove_bloop32
356
357 .Lmemmove_bl32:
358 cmn r2, #0x10
359 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
360 stmdbge r0!, {r3, r4, r12, lr}
361 subge r2, r2, #0x10
362 adds r2, r2, #0x14
363 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
364 stmdbge r0!, {r3, r12, lr}
365 subge r2, r2, #0x0c
366 pop {r4, lr}
367
368 .Lmemmove_bl12:
369 adds r2, r2, #8
370 blt .Lmemmove_bl4
371 subs r2, r2, #4
372 ldrlt r3, [r1, #-4]!
373 strlt r3, [r0, #-4]!
374 ldmdbge r1!, {r3, r12}
375 stmdbge r0!, {r3, r12}
376 subge r2, r2, #4
377
378 .Lmemmove_bl4:
379 /* less than 4 bytes to go */
380 adds r2, r2, #4
381 RETc(eq)
382
383 /* copy the crud byte at a time */
384 cmp r2, #2
385 ldrb r3, [r1, #-1]!
386 strb r3, [r0, #-1]!
387 ldrbge r3, [r1, #-1]!
388 strbge r3, [r0, #-1]!
389 ldrbgt r3, [r1, #-1]!
390 strbgt r3, [r0, #-1]!
391 RET
392
393 /* erg - unaligned destination */
394 .Lmemmove_bdestul:
395 cmp r12, #2
396
397 /* align destination with byte copies */
398 ldrb r3, [r1, #-1]!
399 strb r3, [r0, #-1]!
400 ldrbge r3, [r1, #-1]!
401 strbge r3, [r0, #-1]!
402 ldrbgt r3, [r1, #-1]!
403 strbgt r3, [r0, #-1]!
404 subs r2, r2, r12
405 blt .Lmemmove_bl4 /* less than 4 bytes to go */
406 ands r12, r1, #3
407 beq .Lmemmove_bt8 /* we have an aligned source */
408
409 /* erg - unaligned source */
410 /* This is where it gets nasty ... */
411 .Lmemmove_bsrcul:
412 bic r1, r1, #3
413 ldr r3, [r1, #0]
414 cmp r12, #2
415 blt .Lmemmove_bsrcul1
416 beq .Lmemmove_bsrcul2
417 cmp r2, #0x0c
418 blt .Lmemmove_bsrcul3loop4
419 sub r2, r2, #0x0c
420 push {r4, r5, lr}
421
422 .Lmemmove_bsrcul3loop16:
423 #ifdef __ARMEB__
424 mov lr, r3, lsr #8
425 #else
426 mov lr, r3, lsl #8
427 #endif
428 ldmdb r1!, {r3-r5, r12}
429 #ifdef __ARMEB__
430 orr lr, lr, r12, lsl #24
431 mov r12, r12, lsr #8
432 orr r12, r12, r5, lsl #24
433 mov r5, r5, lsr #8
434 orr r5, r5, r4, lsl #24
435 mov r4, r4, lsr #8
436 orr r4, r4, r3, lsl #24
437 #else
438 orr lr, lr, r12, lsr #24
439 mov r12, r12, lsl #8
440 orr r12, r12, r5, lsr #24
441 mov r5, r5, lsl #8
442 orr r5, r5, r4, lsr #24
443 mov r4, r4, lsl #8
444 orr r4, r4, r3, lsr #24
445 #endif
446 stmdb r0!, {r4, r5, r12, lr}
447 subs r2, r2, #0x10
448 bge .Lmemmove_bsrcul3loop16
449 pop {r4, r5, lr}
450 adds r2, r2, #0x0c
451 blt .Lmemmove_bsrcul3l4
452
453 .Lmemmove_bsrcul3loop4:
454 #ifdef __ARMEB__
455 mov r12, r3, lsr #8
456 #else
457 mov r12, r3, lsl #8
458 #endif
459 ldr r3, [r1, #-4]!
460 #ifdef __ARMEB__
461 orr r12, r12, r3, lsl #24
462 #else
463 orr r12, r12, r3, lsr #24
464 #endif
465 str r12, [r0, #-4]!
466 subs r2, r2, #4
467 bge .Lmemmove_bsrcul3loop4
468
469 .Lmemmove_bsrcul3l4:
470 add r1, r1, #3
471 b .Lmemmove_bl4
472
473 .Lmemmove_bsrcul2:
474 cmp r2, #0x0c
475 blt .Lmemmove_bsrcul2loop4
476 sub r2, r2, #0x0c
477 push {r4, r5, lr}
478
479 .Lmemmove_bsrcul2loop16:
480 #ifdef __ARMEB__
481 mov lr, r3, lsr #16
482 #else
483 mov lr, r3, lsl #16
484 #endif
485 ldmdb r1!, {r3-r5, r12}
486 #ifdef __ARMEB__
487 orr lr, lr, r12, lsl #16
488 mov r12, r12, lsr #16
489 orr r12, r12, r5, lsl #16
490 mov r5, r5, lsr #16
491 orr r5, r5, r4, lsl #16
492 mov r4, r4, lsr #16
493 orr r4, r4, r3, lsl #16
494 #else
495 orr lr, lr, r12, lsr #16
496 mov r12, r12, lsl #16
497 orr r12, r12, r5, lsr #16
498 mov r5, r5, lsl #16
499 orr r5, r5, r4, lsr #16
500 mov r4, r4, lsl #16
501 orr r4, r4, r3, lsr #16
502 #endif
503 stmdb r0!, {r4, r5, r12, lr}
504 subs r2, r2, #0x10
505 bge .Lmemmove_bsrcul2loop16
506 pop {r4, r5, lr}
507 adds r2, r2, #0x0c
508 blt .Lmemmove_bsrcul2l4
509
510 .Lmemmove_bsrcul2loop4:
511 #ifdef __ARMEB__
512 mov r12, r3, lsr #16
513 #else
514 mov r12, r3, lsl #16
515 #endif
516 ldr r3, [r1, #-4]!
517 #ifdef __ARMEB__
518 orr r12, r12, r3, lsl #16
519 #else
520 orr r12, r12, r3, lsr #16
521 #endif
522 str r12, [r0, #-4]!
523 subs r2, r2, #4
524 bge .Lmemmove_bsrcul2loop4
525
526 .Lmemmove_bsrcul2l4:
527 add r1, r1, #2
528 b .Lmemmove_bl4
529
530 .Lmemmove_bsrcul1:
531 cmp r2, #0x0c
532 blt .Lmemmove_bsrcul1loop4
533 sub r2, r2, #0x0c
534 push {r4, r5, lr}
535
536 .Lmemmove_bsrcul1loop32:
537 #ifdef __ARMEB__
538 mov lr, r3, lsr #24
539 #else
540 mov lr, r3, lsl #24
541 #endif
542 ldmdb r1!, {r3-r5, r12}
543 #ifdef __ARMEB__
544 orr lr, lr, r12, lsl #8
545 mov r12, r12, lsr #24
546 orr r12, r12, r5, lsl #8
547 mov r5, r5, lsr #24
548 orr r5, r5, r4, lsl #8
549 mov r4, r4, lsr #24
550 orr r4, r4, r3, lsl #8
551 #else
552 orr lr, lr, r12, lsr #8
553 mov r12, r12, lsl #24
554 orr r12, r12, r5, lsr #8
555 mov r5, r5, lsl #24
556 orr r5, r5, r4, lsr #8
557 mov r4, r4, lsl #24
558 orr r4, r4, r3, lsr #8
559 #endif
560 stmdb r0!, {r4, r5, r12, lr}
561 subs r2, r2, #0x10
562 bge .Lmemmove_bsrcul1loop32
563 pop {r4, r5, lr}
564 adds r2, r2, #0x0c
565 blt .Lmemmove_bsrcul1l4
566
567 .Lmemmove_bsrcul1loop4:
568 #ifdef __ARMEB__
569 mov r12, r3, lsr #24
570 #else
571 mov r12, r3, lsl #24
572 #endif
573 ldr r3, [r1, #-4]!
574 #ifdef __ARMEB__
575 orr r12, r12, r3, lsl #8
576 #else
577 orr r12, r12, r3, lsr #8
578 #endif
579 str r12, [r0, #-4]!
580 subs r2, r2, #4
581 bge .Lmemmove_bsrcul1loop4
582
583 .Lmemmove_bsrcul1l4:
584 add r1, r1, #1
585 b .Lmemmove_bl4
586