1 /* $NetBSD: memmove.S,v 1.11 2023/01/19 18:03:03 mlelstv Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <machine/asm.h> 33 34 #ifndef _BCOPY 35 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 36 ENTRY(memmove) 37 #else 38 /* bcopy = memcpy/memmove with arguments reversed. */ 39 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 40 ENTRY(bcopy) 41 /* switch the source and destination registers */ 42 eor r0, r1, r0 43 eor r1, r0, r1 44 eor r0, r1, r0 45 #endif 46 /* Do the buffers overlap? */ 47 cmp r0, r1 48 RETc(eq) /* Bail now if src/dst are the same */ 49 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */ 50 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */ 51 cmp r3, r2 /* if (r3 >= len) we have an overlap */ 52 bhs PLT_SYM(_C_LABEL(memcpy)) 53 54 /* Determine copy direction */ 55 cmp r1, r0 56 bcc .Lmemmove_backwards 57 58 moveq r0, #0 /* Quick abort for len=0 */ 59 RETc(eq) 60 61 push {r0, lr} /* memmove() returns dest addr */ 62 subs r2, r2, #4 63 blo .Lmemmove_fl4 /* less than 4 bytes */ 64 ands r12, r0, #3 65 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 66 ands r12, r1, #3 67 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 68 69 .Lmemmove_ft8: 70 /* We have aligned source and destination */ 71 subs r2, r2, #8 72 blo .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 73 subs r2, r2, #0x14 74 blo .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 75 push {r4} /* borrow r4 */ 76 77 /* blat 32 bytes at a time */ 78 /* XXX for really big copies perhaps we should use more registers */ 79 .Lmemmove_floop32: 80 ldmia r1!, {r3, r4, r12, lr} 81 stmia r0!, {r3, r4, r12, lr} 82 ldmia r1!, {r3, r4, r12, lr} 83 stmia r0!, {r3, r4, r12, lr} 84 subs r2, r2, #0x20 85 bhs .Lmemmove_floop32 86 87 cmn r2, #0x10 88 ldmiahs r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 89 stmiahs r0!, {r3, r4, r12, lr} 90 subhs r2, r2, #0x10 91 pop {r4} /* return r4 */ 92 93 .Lmemmove_fl32: 94 adds r2, r2, #0x14 95 96 /* blat 12 bytes at a time */ 97 .Lmemmove_floop12: 98 ldmiahs r1!, {r3, r12, lr} 99 stmiahs r0!, {r3, r12, lr} 100 subshs r2, r2, #0x0c 101 bhs .Lmemmove_floop12 102 103 .Lmemmove_fl12: 104 adds r2, r2, #8 105 blo .Lmemmove_fl4 106 107 subs r2, r2, #4 108 ldrlo r3, [r1], #4 109 strlo r3, [r0], #4 110 ldmiahs r1!, {r3, r12} 111 stmiahs r0!, {r3, r12} 112 subhs r2, r2, #4 113 114 .Lmemmove_fl4: 115 /* less than 4 bytes to go */ 116 adds r2, r2, #4 117 popeq {r0, pc} /* done */ 118 119 /* copy the crud byte at a time */ 120 cmp r2, #2 121 ldrb r3, [r1], #1 122 strb r3, [r0], #1 123 ldrbhs r3, [r1], #1 124 strbhs r3, [r0], #1 125 ldrbhi r3, [r1], #1 126 strbhi r3, [r0], #1 127 pop {r0, pc} 128 129 /* erg - unaligned destination */ 130 .Lmemmove_fdestul: 131 rsb r12, r12, #4 132 cmp r12, #2 133 134 /* align destination with byte copies */ 135 ldrb r3, [r1], #1 136 strb r3, [r0], #1 137 ldrbhs r3, [r1], #1 138 strbhs r3, [r0], #1 139 ldrbhi r3, [r1], #1 140 strbhi r3, [r0], #1 141 subs r2, r2, r12 142 blo .Lmemmove_fl4 /* less the 4 bytes */ 143 144 ands r12, r1, #3 145 beq .Lmemmove_ft8 /* we have an aligned source */ 146 147 /* erg - unaligned source */ 148 /* This is where it gets nasty ... */ 149 .Lmemmove_fsrcul: 150 bic r1, r1, #3 151 ldr lr, [r1], #4 152 cmp r12, #2 153 bhi .Lmemmove_fsrcul3 154 beq .Lmemmove_fsrcul2 155 cmp r2, #0x0c 156 blo .Lmemmove_fsrcul1loop4 157 sub r2, r2, #0x0c 158 push {r4, r5} 159 160 .Lmemmove_fsrcul1loop16: 161 #ifdef __ARMEB__ 162 mov r3, lr, lsl #8 163 #else 164 mov r3, lr, lsr #8 165 #endif 166 ldmia r1!, {r4, r5, r12, lr} 167 #ifdef __ARMEB__ 168 orr r3, r3, r4, lsr #24 169 mov r4, r4, lsl #8 170 orr r4, r4, r5, lsr #24 171 mov r5, r5, lsl #8 172 orr r5, r5, r12, lsr #24 173 mov r12, r12, lsl #8 174 orr r12, r12, lr, lsr #24 175 #else 176 orr r3, r3, r4, lsl #24 177 mov r4, r4, lsr #8 178 orr r4, r4, r5, lsl #24 179 mov r5, r5, lsr #8 180 orr r5, r5, r12, lsl #24 181 mov r12, r12, lsr #8 182 orr r12, r12, lr, lsl #24 183 #endif 184 stmia r0!, {r3-r5, r12} 185 subs r2, r2, #0x10 186 bhs .Lmemmove_fsrcul1loop16 187 pop {r4, r5} 188 adds r2, r2, #0x0c 189 blo .Lmemmove_fsrcul1l4 190 191 .Lmemmove_fsrcul1loop4: 192 #ifdef __ARMEB__ 193 mov r12, lr, lsl #8 194 #else 195 mov r12, lr, lsr #8 196 #endif 197 ldr lr, [r1], #4 198 #ifdef __ARMEB__ 199 orr r12, r12, lr, lsr #24 200 #else 201 orr r12, r12, lr, lsl #24 202 #endif 203 str r12, [r0], #4 204 subs r2, r2, #4 205 bhs .Lmemmove_fsrcul1loop4 206 207 .Lmemmove_fsrcul1l4: 208 sub r1, r1, #3 209 b .Lmemmove_fl4 210 211 .Lmemmove_fsrcul2: 212 cmp r2, #0x0c 213 blo .Lmemmove_fsrcul2loop4 214 sub r2, r2, #0x0c 215 push {r4, r5} 216 217 .Lmemmove_fsrcul2loop16: 218 #ifdef __ARMEB__ 219 mov r3, lr, lsl #16 220 #else 221 mov r3, lr, lsr #16 222 #endif 223 ldmia r1!, {r4, r5, r12, lr} 224 #ifdef __ARMEB__ 225 orr r3, r3, r4, lsr #16 226 mov r4, r4, lsl #16 227 orr r4, r4, r5, lsr #16 228 mov r5, r5, lsl #16 229 orr r5, r5, r12, lsr #16 230 mov r12, r12, lsl #16 231 orr r12, r12, lr, lsr #16 232 #else 233 orr r3, r3, r4, lsl #16 234 mov r4, r4, lsr #16 235 orr r4, r4, r5, lsl #16 236 mov r5, r5, lsr #16 237 orr r5, r5, r12, lsl #16 238 mov r12, r12, lsr #16 239 orr r12, r12, lr, lsl #16 240 #endif 241 stmia r0!, {r3-r5, r12} 242 subs r2, r2, #0x10 243 bhs .Lmemmove_fsrcul2loop16 244 pop {r4, r5} 245 adds r2, r2, #0x0c 246 blo .Lmemmove_fsrcul2l4 247 248 .Lmemmove_fsrcul2loop4: 249 #ifdef __ARMEB__ 250 mov r12, lr, lsl #16 251 #else 252 mov r12, lr, lsr #16 253 #endif 254 ldr lr, [r1], #4 255 #ifdef __ARMEB__ 256 orr r12, r12, lr, lsr #16 257 #else 258 orr r12, r12, lr, lsl #16 259 #endif 260 str r12, [r0], #4 261 subs r2, r2, #4 262 bhs .Lmemmove_fsrcul2loop4 263 264 .Lmemmove_fsrcul2l4: 265 sub r1, r1, #2 266 b .Lmemmove_fl4 267 268 .Lmemmove_fsrcul3: 269 cmp r2, #0x0c 270 blo .Lmemmove_fsrcul3loop4 271 sub r2, r2, #0x0c 272 push {r4, r5} 273 274 .Lmemmove_fsrcul3loop16: 275 #ifdef __ARMEB__ 276 mov r3, lr, lsl #24 277 #else 278 mov r3, lr, lsr #24 279 #endif 280 ldmia r1!, {r4, r5, r12, lr} 281 #ifdef __ARMEB__ 282 orr r3, r3, r4, lsr #8 283 mov r4, r4, lsl #24 284 orr r4, r4, r5, lsr #8 285 mov r5, r5, lsl #24 286 orr r5, r5, r12, lsr #8 287 mov r12, r12, lsl #24 288 orr r12, r12, lr, lsr #8 289 #else 290 orr r3, r3, r4, lsl #8 291 mov r4, r4, lsr #24 292 orr r4, r4, r5, lsl #8 293 mov r5, r5, lsr #24 294 orr r5, r5, r12, lsl #8 295 mov r12, r12, lsr #24 296 orr r12, r12, lr, lsl #8 297 #endif 298 stmia r0!, {r3-r5, r12} 299 subs r2, r2, #0x10 300 bhs .Lmemmove_fsrcul3loop16 301 pop {r4, r5} 302 adds r2, r2, #0x0c 303 blo .Lmemmove_fsrcul3l4 304 305 .Lmemmove_fsrcul3loop4: 306 #ifdef __ARMEB__ 307 mov r12, lr, lsl #24 308 #else 309 mov r12, lr, lsr #24 310 #endif 311 ldr lr, [r1], #4 312 #ifdef __ARMEB__ 313 orr r12, r12, lr, lsr #8 314 #else 315 orr r12, r12, lr, lsl #8 316 #endif 317 str r12, [r0], #4 318 subs r2, r2, #4 319 bhs .Lmemmove_fsrcul3loop4 320 321 .Lmemmove_fsrcul3l4: 322 sub r1, r1, #1 323 b .Lmemmove_fl4 324 325 .Lmemmove_backwards: 326 add r1, r1, r2 327 add r0, r0, r2 328 subs r2, r2, #4 329 blo .Lmemmove_bl4 /* less than 4 bytes */ 330 ands r12, r0, #3 331 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 332 ands r12, r1, #3 333 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 334 335 .Lmemmove_bt8: 336 /* We have aligned source and destination */ 337 subs r2, r2, #8 338 blo .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 339 push {r4, lr} 340 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 341 blo .Lmemmove_bl32 342 343 /* blat 32 bytes at a time */ 344 /* XXX for really big copies perhaps we should use more registers */ 345 .Lmemmove_bloop32: 346 ldmdb r1!, {r3, r4, r12, lr} 347 stmdb r0!, {r3, r4, r12, lr} 348 ldmdb r1!, {r3, r4, r12, lr} 349 stmdb r0!, {r3, r4, r12, lr} 350 subs r2, r2, #0x20 351 bhs .Lmemmove_bloop32 352 353 .Lmemmove_bl32: 354 cmn r2, #0x10 355 ldmdbhs r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 356 stmdbhs r0!, {r3, r4, r12, lr} 357 subhs r2, r2, #0x10 358 adds r2, r2, #0x14 359 ldmdbhs r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 360 stmdbhs r0!, {r3, r12, lr} 361 subhs r2, r2, #0x0c 362 pop {r4, lr} 363 364 .Lmemmove_bl12: 365 adds r2, r2, #8 366 blo .Lmemmove_bl4 367 subs r2, r2, #4 368 ldrlo r3, [r1, #-4]! 369 strlo r3, [r0, #-4]! 370 ldmdbhs r1!, {r3, r12} 371 stmdbhs r0!, {r3, r12} 372 subhs r2, r2, #4 373 374 .Lmemmove_bl4: 375 /* less than 4 bytes to go */ 376 adds r2, r2, #4 377 RETc(eq) 378 379 /* copy the crud byte at a time */ 380 cmp r2, #2 381 ldrb r3, [r1, #-1]! 382 strb r3, [r0, #-1]! 383 ldrbhs r3, [r1, #-1]! 384 strbhs r3, [r0, #-1]! 385 ldrbhi r3, [r1, #-1]! 386 strbhi r3, [r0, #-1]! 387 RET 388 389 /* erg - unaligned destination */ 390 .Lmemmove_bdestul: 391 cmp r12, #2 392 393 /* align destination with byte copies */ 394 ldrb r3, [r1, #-1]! 395 strb r3, [r0, #-1]! 396 ldrbhs r3, [r1, #-1]! 397 strbhs r3, [r0, #-1]! 398 ldrbhi r3, [r1, #-1]! 399 strbhi r3, [r0, #-1]! 400 subs r2, r2, r12 401 blo .Lmemmove_bl4 /* less than 4 bytes to go */ 402 ands r12, r1, #3 403 beq .Lmemmove_bt8 /* we have an aligned source */ 404 405 /* erg - unaligned source */ 406 /* This is where it gets nasty ... */ 407 .Lmemmove_bsrcul: 408 bic r1, r1, #3 409 ldr r3, [r1, #0] 410 cmp r12, #2 411 blo .Lmemmove_bsrcul1 412 beq .Lmemmove_bsrcul2 413 cmp r2, #0x0c 414 blo .Lmemmove_bsrcul3loop4 415 sub r2, r2, #0x0c 416 push {r4, r5, lr} 417 418 .Lmemmove_bsrcul3loop16: 419 #ifdef __ARMEB__ 420 mov lr, r3, lsr #8 421 #else 422 mov lr, r3, lsl #8 423 #endif 424 ldmdb r1!, {r3-r5, r12} 425 #ifdef __ARMEB__ 426 orr lr, lr, r12, lsl #24 427 mov r12, r12, lsr #8 428 orr r12, r12, r5, lsl #24 429 mov r5, r5, lsr #8 430 orr r5, r5, r4, lsl #24 431 mov r4, r4, lsr #8 432 orr r4, r4, r3, lsl #24 433 #else 434 orr lr, lr, r12, lsr #24 435 mov r12, r12, lsl #8 436 orr r12, r12, r5, lsr #24 437 mov r5, r5, lsl #8 438 orr r5, r5, r4, lsr #24 439 mov r4, r4, lsl #8 440 orr r4, r4, r3, lsr #24 441 #endif 442 stmdb r0!, {r4, r5, r12, lr} 443 subs r2, r2, #0x10 444 bhs .Lmemmove_bsrcul3loop16 445 pop {r4, r5, lr} 446 adds r2, r2, #0x0c 447 blo .Lmemmove_bsrcul3l4 448 449 .Lmemmove_bsrcul3loop4: 450 #ifdef __ARMEB__ 451 mov r12, r3, lsr #8 452 #else 453 mov r12, r3, lsl #8 454 #endif 455 ldr r3, [r1, #-4]! 456 #ifdef __ARMEB__ 457 orr r12, r12, r3, lsl #24 458 #else 459 orr r12, r12, r3, lsr #24 460 #endif 461 str r12, [r0, #-4]! 462 subs r2, r2, #4 463 bhs .Lmemmove_bsrcul3loop4 464 465 .Lmemmove_bsrcul3l4: 466 add r1, r1, #3 467 b .Lmemmove_bl4 468 469 .Lmemmove_bsrcul2: 470 cmp r2, #0x0c 471 blo .Lmemmove_bsrcul2loop4 472 sub r2, r2, #0x0c 473 push {r4, r5, lr} 474 475 .Lmemmove_bsrcul2loop16: 476 #ifdef __ARMEB__ 477 mov lr, r3, lsr #16 478 #else 479 mov lr, r3, lsl #16 480 #endif 481 ldmdb r1!, {r3-r5, r12} 482 #ifdef __ARMEB__ 483 orr lr, lr, r12, lsl #16 484 mov r12, r12, lsr #16 485 orr r12, r12, r5, lsl #16 486 mov r5, r5, lsr #16 487 orr r5, r5, r4, lsl #16 488 mov r4, r4, lsr #16 489 orr r4, r4, r3, lsl #16 490 #else 491 orr lr, lr, r12, lsr #16 492 mov r12, r12, lsl #16 493 orr r12, r12, r5, lsr #16 494 mov r5, r5, lsl #16 495 orr r5, r5, r4, lsr #16 496 mov r4, r4, lsl #16 497 orr r4, r4, r3, lsr #16 498 #endif 499 stmdb r0!, {r4, r5, r12, lr} 500 subs r2, r2, #0x10 501 bhs .Lmemmove_bsrcul2loop16 502 pop {r4, r5, lr} 503 adds r2, r2, #0x0c 504 blo .Lmemmove_bsrcul2l4 505 506 .Lmemmove_bsrcul2loop4: 507 #ifdef __ARMEB__ 508 mov r12, r3, lsr #16 509 #else 510 mov r12, r3, lsl #16 511 #endif 512 ldr r3, [r1, #-4]! 513 #ifdef __ARMEB__ 514 orr r12, r12, r3, lsl #16 515 #else 516 orr r12, r12, r3, lsr #16 517 #endif 518 str r12, [r0, #-4]! 519 subs r2, r2, #4 520 bhs .Lmemmove_bsrcul2loop4 521 522 .Lmemmove_bsrcul2l4: 523 add r1, r1, #2 524 b .Lmemmove_bl4 525 526 .Lmemmove_bsrcul1: 527 cmp r2, #0x0c 528 blo .Lmemmove_bsrcul1loop4 529 sub r2, r2, #0x0c 530 push {r4, r5, lr} 531 532 .Lmemmove_bsrcul1loop32: 533 #ifdef __ARMEB__ 534 mov lr, r3, lsr #24 535 #else 536 mov lr, r3, lsl #24 537 #endif 538 ldmdb r1!, {r3-r5, r12} 539 #ifdef __ARMEB__ 540 orr lr, lr, r12, lsl #8 541 mov r12, r12, lsr #24 542 orr r12, r12, r5, lsl #8 543 mov r5, r5, lsr #24 544 orr r5, r5, r4, lsl #8 545 mov r4, r4, lsr #24 546 orr r4, r4, r3, lsl #8 547 #else 548 orr lr, lr, r12, lsr #8 549 mov r12, r12, lsl #24 550 orr r12, r12, r5, lsr #8 551 mov r5, r5, lsl #24 552 orr r5, r5, r4, lsr #8 553 mov r4, r4, lsl #24 554 orr r4, r4, r3, lsr #8 555 #endif 556 stmdb r0!, {r4, r5, r12, lr} 557 subs r2, r2, #0x10 558 bhs .Lmemmove_bsrcul1loop32 559 pop {r4, r5, lr} 560 adds r2, r2, #0x0c 561 blo .Lmemmove_bsrcul1l4 562 563 .Lmemmove_bsrcul1loop4: 564 #ifdef __ARMEB__ 565 mov r12, r3, lsr #24 566 #else 567 mov r12, r3, lsl #24 568 #endif 569 ldr r3, [r1, #-4]! 570 #ifdef __ARMEB__ 571 orr r12, r12, r3, lsl #8 572 #else 573 orr r12, r12, r3, lsr #8 574 #endif 575 str r12, [r0, #-4]! 576 subs r2, r2, #4 577 bhs .Lmemmove_bsrcul1loop4 578 579 .Lmemmove_bsrcul1l4: 580 add r1, r1, #1 581 b .Lmemmove_bl4 582 #ifndef _BCOPY 583 END(memmove) 584 #else 585 END(bcopy) 586 #endif 587 588 #if defined(__ARM_EABI__) && !defined(BCOPY) && !defined(_RUMPKERNEL) 589 STRONG_ALIAS(__aeabi_memmove, memmove) 590 STRONG_ALIAS(__aeabi_memmove4, memmove) 591 STRONG_ALIAS(__aeabi_memmove8, memmove) 592 #endif 593