memmove.S revision 1.1.6.1 1 /* $NetBSD: memmove.S,v 1.1.6.1 2007/06/21 14:25:01 liamjfoy Exp $ */
2
3 /*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <machine/asm.h>
40
41 #ifndef _BCOPY
42 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
43 ENTRY(memmove)
44 #else
45 /* bcopy = memcpy/memmove with arguments reversed. */
46 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
47 ENTRY(bcopy)
48 /* switch the source and destination registers */
49 eor r0, r1, r0
50 eor r1, r0, r1
51 eor r0, r1, r0
52 #endif
53 /* Do the buffers overlap? */
54 cmp r0, r1
55 RETc(eq) /* Bail now if src/dst are the same */
56 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */
57 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */
58 cmp r3, r2 /* if (r3 >= len) we have an overlap */
59 bhs PIC_SYM(_C_LABEL(memcpy), PLT)
60
61 /* Determine copy direction */
62 cmp r1, r0
63 bcc .Lmemmove_backwards
64
65 moveq r0, #0 /* Quick abort for len=0 */
66 RETc(eq)
67
68 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
69 subs r2, r2, #4
70 blt .Lmemmove_fl4 /* less than 4 bytes */
71 ands r12, r0, #3
72 bne .Lmemmove_fdestul /* oh unaligned destination addr */
73 ands r12, r1, #3
74 bne .Lmemmove_fsrcul /* oh unaligned source addr */
75
76 .Lmemmove_ft8:
77 /* We have aligned source and destination */
78 subs r2, r2, #8
79 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
80 subs r2, r2, #0x14
81 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
82 stmdb sp!, {r4} /* borrow r4 */
83
84 /* blat 32 bytes at a time */
85 /* XXX for really big copies perhaps we should use more registers */
86 .Lmemmove_floop32:
87 ldmia r1!, {r3, r4, r12, lr}
88 stmia r0!, {r3, r4, r12, lr}
89 ldmia r1!, {r3, r4, r12, lr}
90 stmia r0!, {r3, r4, r12, lr}
91 subs r2, r2, #0x20
92 bge .Lmemmove_floop32
93
94 cmn r2, #0x10
95 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
96 stmgeia r0!, {r3, r4, r12, lr}
97 subge r2, r2, #0x10
98 ldmia sp!, {r4} /* return r4 */
99
100 .Lmemmove_fl32:
101 adds r2, r2, #0x14
102
103 /* blat 12 bytes at a time */
104 .Lmemmove_floop12:
105 ldmgeia r1!, {r3, r12, lr}
106 stmgeia r0!, {r3, r12, lr}
107 subges r2, r2, #0x0c
108 bge .Lmemmove_floop12
109
110 .Lmemmove_fl12:
111 adds r2, r2, #8
112 blt .Lmemmove_fl4
113
114 subs r2, r2, #4
115 ldrlt r3, [r1], #4
116 strlt r3, [r0], #4
117 ldmgeia r1!, {r3, r12}
118 stmgeia r0!, {r3, r12}
119 subge r2, r2, #4
120
121 .Lmemmove_fl4:
122 /* less than 4 bytes to go */
123 adds r2, r2, #4
124 ldmeqia sp!, {r0, pc} /* done */
125
126 /* copy the crud byte at a time */
127 cmp r2, #2
128 ldrb r3, [r1], #1
129 strb r3, [r0], #1
130 ldrgeb r3, [r1], #1
131 strgeb r3, [r0], #1
132 ldrgtb r3, [r1], #1
133 strgtb r3, [r0], #1
134 ldmia sp!, {r0, pc}
135
136 /* erg - unaligned destination */
137 .Lmemmove_fdestul:
138 rsb r12, r12, #4
139 cmp r12, #2
140
141 /* align destination with byte copies */
142 ldrb r3, [r1], #1
143 strb r3, [r0], #1
144 ldrgeb r3, [r1], #1
145 strgeb r3, [r0], #1
146 ldrgtb r3, [r1], #1
147 strgtb r3, [r0], #1
148 subs r2, r2, r12
149 blt .Lmemmove_fl4 /* less the 4 bytes */
150
151 ands r12, r1, #3
152 beq .Lmemmove_ft8 /* we have an aligned source */
153
154 /* erg - unaligned source */
155 /* This is where it gets nasty ... */
156 .Lmemmove_fsrcul:
157 bic r1, r1, #3
158 ldr lr, [r1], #4
159 cmp r12, #2
160 bgt .Lmemmove_fsrcul3
161 beq .Lmemmove_fsrcul2
162 cmp r2, #0x0c
163 blt .Lmemmove_fsrcul1loop4
164 sub r2, r2, #0x0c
165 stmdb sp!, {r4, r5}
166
167 .Lmemmove_fsrcul1loop16:
168 #ifdef __ARMEB__
169 mov r3, lr, lsl #8
170 #else
171 mov r3, lr, lsr #8
172 #endif
173 ldmia r1!, {r4, r5, r12, lr}
174 #ifdef __ARMEB__
175 orr r3, r3, r4, lsr #24
176 mov r4, r4, lsl #8
177 orr r4, r4, r5, lsr #24
178 mov r5, r5, lsl #8
179 orr r5, r5, r12, lsr #24
180 mov r12, r12, lsl #8
181 orr r12, r12, lr, lsr #24
182 #else
183 orr r3, r3, r4, lsl #24
184 mov r4, r4, lsr #8
185 orr r4, r4, r5, lsl #24
186 mov r5, r5, lsr #8
187 orr r5, r5, r12, lsl #24
188 mov r12, r12, lsr #8
189 orr r12, r12, lr, lsl #24
190 #endif
191 stmia r0!, {r3-r5, r12}
192 subs r2, r2, #0x10
193 bge .Lmemmove_fsrcul1loop16
194 ldmia sp!, {r4, r5}
195 adds r2, r2, #0x0c
196 blt .Lmemmove_fsrcul1l4
197
198 .Lmemmove_fsrcul1loop4:
199 #ifdef __ARMEB__
200 mov r12, lr, lsl #8
201 #else
202 mov r12, lr, lsr #8
203 #endif
204 ldr lr, [r1], #4
205 #ifdef __ARMEB__
206 orr r12, r12, lr, lsr #24
207 #else
208 orr r12, r12, lr, lsl #24
209 #endif
210 str r12, [r0], #4
211 subs r2, r2, #4
212 bge .Lmemmove_fsrcul1loop4
213
214 .Lmemmove_fsrcul1l4:
215 sub r1, r1, #3
216 b .Lmemmove_fl4
217
218 .Lmemmove_fsrcul2:
219 cmp r2, #0x0c
220 blt .Lmemmove_fsrcul2loop4
221 sub r2, r2, #0x0c
222 stmdb sp!, {r4, r5}
223
224 .Lmemmove_fsrcul2loop16:
225 #ifdef __ARMEB__
226 mov r3, lr, lsl #16
227 #else
228 mov r3, lr, lsr #16
229 #endif
230 ldmia r1!, {r4, r5, r12, lr}
231 #ifdef __ARMEB__
232 orr r3, r3, r4, lsr #16
233 mov r4, r4, lsl #16
234 orr r4, r4, r5, lsr #16
235 mov r5, r5, lsl #16
236 orr r5, r5, r12, lsr #16
237 mov r12, r12, lsl #16
238 orr r12, r12, lr, lsr #16
239 #else
240 orr r3, r3, r4, lsl #16
241 mov r4, r4, lsr #16
242 orr r4, r4, r5, lsl #16
243 mov r5, r5, lsr #16
244 orr r5, r5, r12, lsl #16
245 mov r12, r12, lsr #16
246 orr r12, r12, lr, lsl #16
247 #endif
248 stmia r0!, {r3-r5, r12}
249 subs r2, r2, #0x10
250 bge .Lmemmove_fsrcul2loop16
251 ldmia sp!, {r4, r5}
252 adds r2, r2, #0x0c
253 blt .Lmemmove_fsrcul2l4
254
255 .Lmemmove_fsrcul2loop4:
256 #ifdef __ARMEB__
257 mov r12, lr, lsl #16
258 #else
259 mov r12, lr, lsr #16
260 #endif
261 ldr lr, [r1], #4
262 #ifdef __ARMEB__
263 orr r12, r12, lr, lsr #16
264 #else
265 orr r12, r12, lr, lsl #16
266 #endif
267 str r12, [r0], #4
268 subs r2, r2, #4
269 bge .Lmemmove_fsrcul2loop4
270
271 .Lmemmove_fsrcul2l4:
272 sub r1, r1, #2
273 b .Lmemmove_fl4
274
275 .Lmemmove_fsrcul3:
276 cmp r2, #0x0c
277 blt .Lmemmove_fsrcul3loop4
278 sub r2, r2, #0x0c
279 stmdb sp!, {r4, r5}
280
281 .Lmemmove_fsrcul3loop16:
282 #ifdef __ARMEB__
283 mov r3, lr, lsl #24
284 #else
285 mov r3, lr, lsr #24
286 #endif
287 ldmia r1!, {r4, r5, r12, lr}
288 #ifdef __ARMEB__
289 orr r3, r3, r4, lsr #8
290 mov r4, r4, lsl #24
291 orr r4, r4, r5, lsr #8
292 mov r5, r5, lsl #24
293 orr r5, r5, r12, lsr #8
294 mov r12, r12, lsl #24
295 orr r12, r12, lr, lsr #8
296 #else
297 orr r3, r3, r4, lsl #8
298 mov r4, r4, lsr #24
299 orr r4, r4, r5, lsl #8
300 mov r5, r5, lsr #24
301 orr r5, r5, r12, lsl #8
302 mov r12, r12, lsr #24
303 orr r12, r12, lr, lsl #8
304 #endif
305 stmia r0!, {r3-r5, r12}
306 subs r2, r2, #0x10
307 bge .Lmemmove_fsrcul3loop16
308 ldmia sp!, {r4, r5}
309 adds r2, r2, #0x0c
310 blt .Lmemmove_fsrcul3l4
311
312 .Lmemmove_fsrcul3loop4:
313 #ifdef __ARMEB__
314 mov r12, lr, lsl #24
315 #else
316 mov r12, lr, lsr #24
317 #endif
318 ldr lr, [r1], #4
319 #ifdef __ARMEB__
320 orr r12, r12, lr, lsr #8
321 #else
322 orr r12, r12, lr, lsl #8
323 #endif
324 str r12, [r0], #4
325 subs r2, r2, #4
326 bge .Lmemmove_fsrcul3loop4
327
328 .Lmemmove_fsrcul3l4:
329 sub r1, r1, #1
330 b .Lmemmove_fl4
331
332 .Lmemmove_backwards:
333 add r1, r1, r2
334 add r0, r0, r2
335 subs r2, r2, #4
336 blt .Lmemmove_bl4 /* less than 4 bytes */
337 ands r12, r0, #3
338 bne .Lmemmove_bdestul /* oh unaligned destination addr */
339 ands r12, r1, #3
340 bne .Lmemmove_bsrcul /* oh unaligned source addr */
341
342 .Lmemmove_bt8:
343 /* We have aligned source and destination */
344 subs r2, r2, #8
345 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
346 stmdb sp!, {r4, lr}
347 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
348 blt .Lmemmove_bl32
349
350 /* blat 32 bytes at a time */
351 /* XXX for really big copies perhaps we should use more registers */
352 .Lmemmove_bloop32:
353 ldmdb r1!, {r3, r4, r12, lr}
354 stmdb r0!, {r3, r4, r12, lr}
355 ldmdb r1!, {r3, r4, r12, lr}
356 stmdb r0!, {r3, r4, r12, lr}
357 subs r2, r2, #0x20
358 bge .Lmemmove_bloop32
359
360 .Lmemmove_bl32:
361 cmn r2, #0x10
362 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
363 stmgedb r0!, {r3, r4, r12, lr}
364 subge r2, r2, #0x10
365 adds r2, r2, #0x14
366 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
367 stmgedb r0!, {r3, r12, lr}
368 subge r2, r2, #0x0c
369 ldmia sp!, {r4, lr}
370
371 .Lmemmove_bl12:
372 adds r2, r2, #8
373 blt .Lmemmove_bl4
374 subs r2, r2, #4
375 ldrlt r3, [r1, #-4]!
376 strlt r3, [r0, #-4]!
377 ldmgedb r1!, {r3, r12}
378 stmgedb r0!, {r3, r12}
379 subge r2, r2, #4
380
381 .Lmemmove_bl4:
382 /* less than 4 bytes to go */
383 adds r2, r2, #4
384 RETc(eq)
385
386 /* copy the crud byte at a time */
387 cmp r2, #2
388 ldrb r3, [r1, #-1]!
389 strb r3, [r0, #-1]!
390 ldrgeb r3, [r1, #-1]!
391 strgeb r3, [r0, #-1]!
392 ldrgtb r3, [r1, #-1]!
393 strgtb r3, [r0, #-1]!
394 RET
395
396 /* erg - unaligned destination */
397 .Lmemmove_bdestul:
398 cmp r12, #2
399
400 /* align destination with byte copies */
401 ldrb r3, [r1, #-1]!
402 strb r3, [r0, #-1]!
403 ldrgeb r3, [r1, #-1]!
404 strgeb r3, [r0, #-1]!
405 ldrgtb r3, [r1, #-1]!
406 strgtb r3, [r0, #-1]!
407 subs r2, r2, r12
408 blt .Lmemmove_bl4 /* less than 4 bytes to go */
409 ands r12, r1, #3
410 beq .Lmemmove_bt8 /* we have an aligned source */
411
412 /* erg - unaligned source */
413 /* This is where it gets nasty ... */
414 .Lmemmove_bsrcul:
415 bic r1, r1, #3
416 ldr r3, [r1, #0]
417 cmp r12, #2
418 blt .Lmemmove_bsrcul1
419 beq .Lmemmove_bsrcul2
420 cmp r2, #0x0c
421 blt .Lmemmove_bsrcul3loop4
422 sub r2, r2, #0x0c
423 stmdb sp!, {r4, r5, lr}
424
425 .Lmemmove_bsrcul3loop16:
426 #ifdef __ARMEB__
427 mov lr, r3, lsr #8
428 #else
429 mov lr, r3, lsl #8
430 #endif
431 ldmdb r1!, {r3-r5, r12}
432 #ifdef __ARMEB__
433 orr lr, lr, r12, lsl #24
434 mov r12, r12, lsr #8
435 orr r12, r12, r5, lsl #24
436 mov r5, r5, lsr #8
437 orr r5, r5, r4, lsl #24
438 mov r4, r4, lsr #8
439 orr r4, r4, r3, lsl #24
440 #else
441 orr lr, lr, r12, lsr #24
442 mov r12, r12, lsl #8
443 orr r12, r12, r5, lsr #24
444 mov r5, r5, lsl #8
445 orr r5, r5, r4, lsr #24
446 mov r4, r4, lsl #8
447 orr r4, r4, r3, lsr #24
448 #endif
449 stmdb r0!, {r4, r5, r12, lr}
450 subs r2, r2, #0x10
451 bge .Lmemmove_bsrcul3loop16
452 ldmia sp!, {r4, r5, lr}
453 adds r2, r2, #0x0c
454 blt .Lmemmove_bsrcul3l4
455
456 .Lmemmove_bsrcul3loop4:
457 #ifdef __ARMEB__
458 mov r12, r3, lsr #8
459 #else
460 mov r12, r3, lsl #8
461 #endif
462 ldr r3, [r1, #-4]!
463 #ifdef __ARMEB__
464 orr r12, r12, r3, lsl #24
465 #else
466 orr r12, r12, r3, lsr #24
467 #endif
468 str r12, [r0, #-4]!
469 subs r2, r2, #4
470 bge .Lmemmove_bsrcul3loop4
471
472 .Lmemmove_bsrcul3l4:
473 add r1, r1, #3
474 b .Lmemmove_bl4
475
476 .Lmemmove_bsrcul2:
477 cmp r2, #0x0c
478 blt .Lmemmove_bsrcul2loop4
479 sub r2, r2, #0x0c
480 stmdb sp!, {r4, r5, lr}
481
482 .Lmemmove_bsrcul2loop16:
483 #ifdef __ARMEB__
484 mov lr, r3, lsr #16
485 #else
486 mov lr, r3, lsl #16
487 #endif
488 ldmdb r1!, {r3-r5, r12}
489 #ifdef __ARMEB__
490 orr lr, lr, r12, lsl #16
491 mov r12, r12, lsr #16
492 orr r12, r12, r5, lsl #16
493 mov r5, r5, lsr #16
494 orr r5, r5, r4, lsl #16
495 mov r4, r4, lsr #16
496 orr r4, r4, r3, lsl #16
497 #else
498 orr lr, lr, r12, lsr #16
499 mov r12, r12, lsl #16
500 orr r12, r12, r5, lsr #16
501 mov r5, r5, lsl #16
502 orr r5, r5, r4, lsr #16
503 mov r4, r4, lsl #16
504 orr r4, r4, r3, lsr #16
505 #endif
506 stmdb r0!, {r4, r5, r12, lr}
507 subs r2, r2, #0x10
508 bge .Lmemmove_bsrcul2loop16
509 ldmia sp!, {r4, r5, lr}
510 adds r2, r2, #0x0c
511 blt .Lmemmove_bsrcul2l4
512
513 .Lmemmove_bsrcul2loop4:
514 #ifdef __ARMEB__
515 mov r12, r3, lsr #16
516 #else
517 mov r12, r3, lsl #16
518 #endif
519 ldr r3, [r1, #-4]!
520 #ifdef __ARMEB__
521 orr r12, r12, r3, lsl #16
522 #else
523 orr r12, r12, r3, lsr #16
524 #endif
525 str r12, [r0, #-4]!
526 subs r2, r2, #4
527 bge .Lmemmove_bsrcul2loop4
528
529 .Lmemmove_bsrcul2l4:
530 add r1, r1, #2
531 b .Lmemmove_bl4
532
533 .Lmemmove_bsrcul1:
534 cmp r2, #0x0c
535 blt .Lmemmove_bsrcul1loop4
536 sub r2, r2, #0x0c
537 stmdb sp!, {r4, r5, lr}
538
539 .Lmemmove_bsrcul1loop32:
540 #ifdef __ARMEB__
541 mov lr, r3, lsr #24
542 #else
543 mov lr, r3, lsl #24
544 #endif
545 ldmdb r1!, {r3-r5, r12}
546 #ifdef __ARMEB__
547 orr lr, lr, r12, lsl #8
548 mov r12, r12, lsr #24
549 orr r12, r12, r5, lsl #8
550 mov r5, r5, lsr #24
551 orr r5, r5, r4, lsl #8
552 mov r4, r4, lsr #24
553 orr r4, r4, r3, lsl #8
554 #else
555 orr lr, lr, r12, lsr #8
556 mov r12, r12, lsl #24
557 orr r12, r12, r5, lsr #8
558 mov r5, r5, lsl #24
559 orr r5, r5, r4, lsr #8
560 mov r4, r4, lsl #24
561 orr r4, r4, r3, lsr #8
562 #endif
563 stmdb r0!, {r4, r5, r12, lr}
564 subs r2, r2, #0x10
565 bge .Lmemmove_bsrcul1loop32
566 ldmia sp!, {r4, r5, lr}
567 adds r2, r2, #0x0c
568 blt .Lmemmove_bsrcul1l4
569
570 .Lmemmove_bsrcul1loop4:
571 #ifdef __ARMEB__
572 mov r12, r3, lsr #24
573 #else
574 mov r12, r3, lsl #24
575 #endif
576 ldr r3, [r1, #-4]!
577 #ifdef __ARMEB__
578 orr r12, r12, r3, lsl #8
579 #else
580 orr r12, r12, r3, lsr #8
581 #endif
582 str r12, [r0, #-4]!
583 subs r2, r2, #4
584 bge .Lmemmove_bsrcul1loop4
585
586 .Lmemmove_bsrcul1l4:
587 add r1, r1, #1
588 b .Lmemmove_bl4
589