Home | History | Annotate | Line # | Download | only in string
memmove.S revision 1.5
      1 /*	$NetBSD: memmove.S,v 1.5 2013/08/11 04:56:32 matt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Neil A. Carson and Mark Brinicombe
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <machine/asm.h>
     33 
     34 #if defined(__ARM_EABI__) && !defined(BCOPY)
     35 STRONG_ALIAS(__aeabi_memmove, memmove)
     36 #endif
     37 
     38 #ifndef _BCOPY
     39 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
     40 ENTRY(memmove)
     41 #else
     42 /* bcopy = memcpy/memmove with arguments reversed. */
     43 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
     44 ENTRY(bcopy)
     45 	/* switch the source and destination registers */
     46 	eor     r0, r1, r0
     47 	eor     r1, r0, r1
     48 	eor     r0, r1, r0
     49 #endif
     50 	/* Do the buffers overlap? */
     51 	cmp	r0, r1
     52 	RETc(eq)		/* Bail now if src/dst are the same */
     53 	subhs	r3, r0, r1	/* if (dst > src) r3 = dst - src */
     54 	sublo	r3, r1, r0	/* if (src > dst) r3 = src - dst */
     55 	cmp	r3, r2		/* if (r3 >= len) we have an overlap */
     56 	bhs	PIC_SYM(_C_LABEL(memcpy), PLT)
     57 
     58 	/* Determine copy direction */
     59 	cmp	r1, r0
     60 	bcc	.Lmemmove_backwards
     61 
     62 	moveq	r0, #0			/* Quick abort for len=0 */
     63 	RETc(eq)
     64 
     65 	push	{r0, lr}		/* memmove() returns dest addr */
     66 	subs	r2, r2, #4
     67 	blt	.Lmemmove_fl4		/* less than 4 bytes */
     68 	ands	r12, r0, #3
     69 	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
     70 	ands	r12, r1, #3
     71 	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
     72 
     73 .Lmemmove_ft8:
     74 	/* We have aligned source and destination */
     75 	subs	r2, r2, #8
     76 	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
     77 	subs	r2, r2, #0x14
     78 	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
     79 	push	{r4}		/* borrow r4 */
     80 
     81 	/* blat 32 bytes at a time */
     82 	/* XXX for really big copies perhaps we should use more registers */
     83 .Lmemmove_floop32:
     84 	ldmia	r1!, {r3, r4, r12, lr}
     85 	stmia	r0!, {r3, r4, r12, lr}
     86 	ldmia	r1!, {r3, r4, r12, lr}
     87 	stmia	r0!, {r3, r4, r12, lr}
     88 	subs	r2, r2, #0x20
     89 	bge	.Lmemmove_floop32
     90 
     91 	cmn	r2, #0x10
     92 	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
     93 	stmiage	r0!, {r3, r4, r12, lr}
     94 	subge	r2, r2, #0x10
     95 	pop	{r4}		/* return r4 */
     96 
     97 .Lmemmove_fl32:
     98 	adds	r2, r2, #0x14
     99 
    100 	/* blat 12 bytes at a time */
    101 .Lmemmove_floop12:
    102 	ldmiage	r1!, {r3, r12, lr}
    103 	stmiage	r0!, {r3, r12, lr}
    104 	subsge	r2, r2, #0x0c
    105 	bge	.Lmemmove_floop12
    106 
    107 .Lmemmove_fl12:
    108 	adds	r2, r2, #8
    109 	blt	.Lmemmove_fl4
    110 
    111 	subs	r2, r2, #4
    112 	ldrlt	r3, [r1], #4
    113 	strlt	r3, [r0], #4
    114 	ldmiage	r1!, {r3, r12}
    115 	stmiage	r0!, {r3, r12}
    116 	subge	r2, r2, #4
    117 
    118 .Lmemmove_fl4:
    119 	/* less than 4 bytes to go */
    120 	adds	r2, r2, #4
    121 	popeq	{r0, pc}		/* done */
    122 
    123 	/* copy the crud byte at a time */
    124 	cmp	r2, #2
    125 	ldrb	r3, [r1], #1
    126 	strb	r3, [r0], #1
    127 	ldrbge	r3, [r1], #1
    128 	strbge	r3, [r0], #1
    129 	ldrbgt	r3, [r1], #1
    130 	strbgt	r3, [r0], #1
    131 	pop	{r0, pc}
    132 
    133 	/* erg - unaligned destination */
    134 .Lmemmove_fdestul:
    135 	rsb	r12, r12, #4
    136 	cmp	r12, #2
    137 
    138 	/* align destination with byte copies */
    139 	ldrb	r3, [r1], #1
    140 	strb	r3, [r0], #1
    141 	ldrbge	r3, [r1], #1
    142 	strbge	r3, [r0], #1
    143 	ldrbgt	r3, [r1], #1
    144 	strbgt	r3, [r0], #1
    145 	subs	r2, r2, r12
    146 	blt	.Lmemmove_fl4		/* less the 4 bytes */
    147 
    148 	ands	r12, r1, #3
    149 	beq	.Lmemmove_ft8		/* we have an aligned source */
    150 
    151 	/* erg - unaligned source */
    152 	/* This is where it gets nasty ... */
    153 .Lmemmove_fsrcul:
    154 	bic	r1, r1, #3
    155 	ldr	lr, [r1], #4
    156 	cmp	r12, #2
    157 	bgt	.Lmemmove_fsrcul3
    158 	beq	.Lmemmove_fsrcul2
    159 	cmp	r2, #0x0c
    160 	blt	.Lmemmove_fsrcul1loop4
    161 	sub	r2, r2, #0x0c
    162 	push	{r4, r5}
    163 
    164 .Lmemmove_fsrcul1loop16:
    165 #ifdef __ARMEB__
    166 	mov	r3, lr, lsl #8
    167 #else
    168 	mov	r3, lr, lsr #8
    169 #endif
    170 	ldmia	r1!, {r4, r5, r12, lr}
    171 #ifdef __ARMEB__
    172 	orr	r3, r3, r4, lsr #24
    173 	mov	r4, r4, lsl #8
    174 	orr	r4, r4, r5, lsr #24
    175 	mov	r5, r5, lsl #8
    176 	orr	r5, r5, r12, lsr #24
    177 	mov	r12, r12, lsl #8
    178 	orr	r12, r12, lr, lsr #24
    179 #else
    180 	orr	r3, r3, r4, lsl #24
    181 	mov	r4, r4, lsr #8
    182 	orr	r4, r4, r5, lsl #24
    183 	mov	r5, r5, lsr #8
    184 	orr	r5, r5, r12, lsl #24
    185 	mov	r12, r12, lsr #8
    186 	orr	r12, r12, lr, lsl #24
    187 #endif
    188 	stmia	r0!, {r3-r5, r12}
    189 	subs	r2, r2, #0x10
    190 	bge	.Lmemmove_fsrcul1loop16
    191 	pop	{r4, r5}
    192 	adds	r2, r2, #0x0c
    193 	blt	.Lmemmove_fsrcul1l4
    194 
    195 .Lmemmove_fsrcul1loop4:
    196 #ifdef __ARMEB__
    197 	mov	r12, lr, lsl #8
    198 #else
    199 	mov	r12, lr, lsr #8
    200 #endif
    201 	ldr	lr, [r1], #4
    202 #ifdef __ARMEB__
    203 	orr	r12, r12, lr, lsr #24
    204 #else
    205 	orr	r12, r12, lr, lsl #24
    206 #endif
    207 	str	r12, [r0], #4
    208 	subs	r2, r2, #4
    209 	bge	.Lmemmove_fsrcul1loop4
    210 
    211 .Lmemmove_fsrcul1l4:
    212 	sub	r1, r1, #3
    213 	b	.Lmemmove_fl4
    214 
    215 .Lmemmove_fsrcul2:
    216 	cmp	r2, #0x0c
    217 	blt	.Lmemmove_fsrcul2loop4
    218 	sub	r2, r2, #0x0c
    219 	push	{r4, r5}
    220 
    221 .Lmemmove_fsrcul2loop16:
    222 #ifdef __ARMEB__
    223 	mov	r3, lr, lsl #16
    224 #else
    225 	mov	r3, lr, lsr #16
    226 #endif
    227 	ldmia	r1!, {r4, r5, r12, lr}
    228 #ifdef __ARMEB__
    229 	orr	r3, r3, r4, lsr #16
    230 	mov	r4, r4, lsl #16
    231 	orr	r4, r4, r5, lsr #16
    232 	mov	r5, r5, lsl #16
    233 	orr	r5, r5, r12, lsr #16
    234 	mov	r12, r12, lsl #16
    235 	orr	r12, r12, lr, lsr #16
    236 #else
    237 	orr	r3, r3, r4, lsl #16
    238 	mov	r4, r4, lsr #16
    239 	orr	r4, r4, r5, lsl #16
    240 	mov	r5, r5, lsr #16
    241 	orr	r5, r5, r12, lsl #16
    242 	mov	r12, r12, lsr #16
    243 	orr	r12, r12, lr, lsl #16
    244 #endif
    245 	stmia	r0!, {r3-r5, r12}
    246 	subs	r2, r2, #0x10
    247 	bge	.Lmemmove_fsrcul2loop16
    248 	pop	{r4, r5}
    249 	adds	r2, r2, #0x0c
    250 	blt	.Lmemmove_fsrcul2l4
    251 
    252 .Lmemmove_fsrcul2loop4:
    253 #ifdef __ARMEB__
    254 	mov	r12, lr, lsl #16
    255 #else
    256 	mov	r12, lr, lsr #16
    257 #endif
    258 	ldr	lr, [r1], #4
    259 #ifdef __ARMEB__
    260 	orr	r12, r12, lr, lsr #16
    261 #else
    262 	orr	r12, r12, lr, lsl #16
    263 #endif
    264 	str	r12, [r0], #4
    265 	subs	r2, r2, #4
    266 	bge	.Lmemmove_fsrcul2loop4
    267 
    268 .Lmemmove_fsrcul2l4:
    269 	sub	r1, r1, #2
    270 	b	.Lmemmove_fl4
    271 
    272 .Lmemmove_fsrcul3:
    273 	cmp	r2, #0x0c
    274 	blt	.Lmemmove_fsrcul3loop4
    275 	sub	r2, r2, #0x0c
    276 	push	{r4, r5}
    277 
    278 .Lmemmove_fsrcul3loop16:
    279 #ifdef __ARMEB__
    280 	mov	r3, lr, lsl #24
    281 #else
    282 	mov	r3, lr, lsr #24
    283 #endif
    284 	ldmia	r1!, {r4, r5, r12, lr}
    285 #ifdef __ARMEB__
    286 	orr	r3, r3, r4, lsr #8
    287 	mov	r4, r4, lsl #24
    288 	orr	r4, r4, r5, lsr #8
    289 	mov	r5, r5, lsl #24
    290 	orr	r5, r5, r12, lsr #8
    291 	mov	r12, r12, lsl #24
    292 	orr	r12, r12, lr, lsr #8
    293 #else
    294 	orr	r3, r3, r4, lsl #8
    295 	mov	r4, r4, lsr #24
    296 	orr	r4, r4, r5, lsl #8
    297 	mov	r5, r5, lsr #24
    298 	orr	r5, r5, r12, lsl #8
    299 	mov	r12, r12, lsr #24
    300 	orr	r12, r12, lr, lsl #8
    301 #endif
    302 	stmia	r0!, {r3-r5, r12}
    303 	subs	r2, r2, #0x10
    304 	bge	.Lmemmove_fsrcul3loop16
    305 	pop	{r4, r5}
    306 	adds	r2, r2, #0x0c
    307 	blt	.Lmemmove_fsrcul3l4
    308 
    309 .Lmemmove_fsrcul3loop4:
    310 #ifdef __ARMEB__
    311 	mov	r12, lr, lsl #24
    312 #else
    313 	mov	r12, lr, lsr #24
    314 #endif
    315 	ldr	lr, [r1], #4
    316 #ifdef __ARMEB__
    317 	orr	r12, r12, lr, lsr #8
    318 #else
    319 	orr	r12, r12, lr, lsl #8
    320 #endif
    321 	str	r12, [r0], #4
    322 	subs	r2, r2, #4
    323 	bge	.Lmemmove_fsrcul3loop4
    324 
    325 .Lmemmove_fsrcul3l4:
    326 	sub	r1, r1, #1
    327 	b	.Lmemmove_fl4
    328 
    329 .Lmemmove_backwards:
    330 	add	r1, r1, r2
    331 	add	r0, r0, r2
    332 	subs	r2, r2, #4
    333 	blt	.Lmemmove_bl4		/* less than 4 bytes */
    334 	ands	r12, r0, #3
    335 	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
    336 	ands	r12, r1, #3
    337 	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
    338 
    339 .Lmemmove_bt8:
    340 	/* We have aligned source and destination */
    341 	subs	r2, r2, #8
    342 	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
    343 	push	{r4, lr}
    344 	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
    345 	blt	.Lmemmove_bl32
    346 
    347 	/* blat 32 bytes at a time */
    348 	/* XXX for really big copies perhaps we should use more registers */
    349 .Lmemmove_bloop32:
    350 	ldmdb	r1!, {r3, r4, r12, lr}
    351 	stmdb	r0!, {r3, r4, r12, lr}
    352 	ldmdb	r1!, {r3, r4, r12, lr}
    353 	stmdb	r0!, {r3, r4, r12, lr}
    354 	subs	r2, r2, #0x20
    355 	bge	.Lmemmove_bloop32
    356 
    357 .Lmemmove_bl32:
    358 	cmn	r2, #0x10
    359 	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
    360 	stmdbge	r0!, {r3, r4, r12, lr}
    361 	subge	r2, r2, #0x10
    362 	adds	r2, r2, #0x14
    363 	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
    364 	stmdbge	r0!, {r3, r12, lr}
    365 	subge	r2, r2, #0x0c
    366 	pop	{r4, lr}
    367 
    368 .Lmemmove_bl12:
    369 	adds	r2, r2, #8
    370 	blt	.Lmemmove_bl4
    371 	subs	r2, r2, #4
    372 	ldrlt	r3, [r1, #-4]!
    373 	strlt	r3, [r0, #-4]!
    374 	ldmdbge	r1!, {r3, r12}
    375 	stmdbge	r0!, {r3, r12}
    376 	subge	r2, r2, #4
    377 
    378 .Lmemmove_bl4:
    379 	/* less than 4 bytes to go */
    380 	adds	r2, r2, #4
    381 	RETc(eq)
    382 
    383 	/* copy the crud byte at a time */
    384 	cmp	r2, #2
    385 	ldrb	r3, [r1, #-1]!
    386 	strb	r3, [r0, #-1]!
    387 	ldrbge	r3, [r1, #-1]!
    388 	strbge	r3, [r0, #-1]!
    389 	ldrbgt	r3, [r1, #-1]!
    390 	strbgt	r3, [r0, #-1]!
    391 	RET
    392 
    393 	/* erg - unaligned destination */
    394 .Lmemmove_bdestul:
    395 	cmp	r12, #2
    396 
    397 	/* align destination with byte copies */
    398 	ldrb	r3, [r1, #-1]!
    399 	strb	r3, [r0, #-1]!
    400 	ldrbge	r3, [r1, #-1]!
    401 	strbge	r3, [r0, #-1]!
    402 	ldrbgt	r3, [r1, #-1]!
    403 	strbgt	r3, [r0, #-1]!
    404 	subs	r2, r2, r12
    405 	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
    406 	ands	r12, r1, #3
    407 	beq	.Lmemmove_bt8		/* we have an aligned source */
    408 
    409 	/* erg - unaligned source */
    410 	/* This is where it gets nasty ... */
    411 .Lmemmove_bsrcul:
    412 	bic	r1, r1, #3
    413 	ldr	r3, [r1, #0]
    414 	cmp	r12, #2
    415 	blt	.Lmemmove_bsrcul1
    416 	beq	.Lmemmove_bsrcul2
    417 	cmp	r2, #0x0c
    418 	blt	.Lmemmove_bsrcul3loop4
    419 	sub	r2, r2, #0x0c
    420 	push	{r4, r5, lr}
    421 
    422 .Lmemmove_bsrcul3loop16:
    423 #ifdef __ARMEB__
    424 	mov	lr, r3, lsr #8
    425 #else
    426 	mov	lr, r3, lsl #8
    427 #endif
    428 	ldmdb	r1!, {r3-r5, r12}
    429 #ifdef __ARMEB__
    430 	orr	lr, lr, r12, lsl #24
    431 	mov	r12, r12, lsr #8
    432 	orr	r12, r12, r5, lsl #24
    433 	mov	r5, r5, lsr #8
    434 	orr	r5, r5, r4, lsl #24
    435 	mov	r4, r4, lsr #8
    436 	orr	r4, r4, r3, lsl #24
    437 #else
    438 	orr	lr, lr, r12, lsr #24
    439 	mov	r12, r12, lsl #8
    440 	orr	r12, r12, r5, lsr #24
    441 	mov	r5, r5, lsl #8
    442 	orr	r5, r5, r4, lsr #24
    443 	mov	r4, r4, lsl #8
    444 	orr	r4, r4, r3, lsr #24
    445 #endif
    446 	stmdb	r0!, {r4, r5, r12, lr}
    447 	subs	r2, r2, #0x10
    448 	bge	.Lmemmove_bsrcul3loop16
    449 	pop	{r4, r5, lr}
    450 	adds	r2, r2, #0x0c
    451 	blt	.Lmemmove_bsrcul3l4
    452 
    453 .Lmemmove_bsrcul3loop4:
    454 #ifdef __ARMEB__
    455 	mov	r12, r3, lsr #8
    456 #else
    457 	mov	r12, r3, lsl #8
    458 #endif
    459 	ldr	r3, [r1, #-4]!
    460 #ifdef __ARMEB__
    461 	orr	r12, r12, r3, lsl #24
    462 #else
    463 	orr	r12, r12, r3, lsr #24
    464 #endif
    465 	str	r12, [r0, #-4]!
    466 	subs	r2, r2, #4
    467 	bge	.Lmemmove_bsrcul3loop4
    468 
    469 .Lmemmove_bsrcul3l4:
    470 	add	r1, r1, #3
    471 	b	.Lmemmove_bl4
    472 
    473 .Lmemmove_bsrcul2:
    474 	cmp	r2, #0x0c
    475 	blt	.Lmemmove_bsrcul2loop4
    476 	sub	r2, r2, #0x0c
    477 	push	{r4, r5, lr}
    478 
    479 .Lmemmove_bsrcul2loop16:
    480 #ifdef __ARMEB__
    481 	mov	lr, r3, lsr #16
    482 #else
    483 	mov	lr, r3, lsl #16
    484 #endif
    485 	ldmdb	r1!, {r3-r5, r12}
    486 #ifdef __ARMEB__
    487 	orr	lr, lr, r12, lsl #16
    488 	mov	r12, r12, lsr #16
    489 	orr	r12, r12, r5, lsl #16
    490 	mov	r5, r5, lsr #16
    491 	orr	r5, r5, r4, lsl #16
    492 	mov	r4, r4, lsr #16
    493 	orr	r4, r4, r3, lsl #16
    494 #else
    495 	orr	lr, lr, r12, lsr #16
    496 	mov	r12, r12, lsl #16
    497 	orr	r12, r12, r5, lsr #16
    498 	mov	r5, r5, lsl #16
    499 	orr	r5, r5, r4, lsr #16
    500 	mov	r4, r4, lsl #16
    501 	orr	r4, r4, r3, lsr #16
    502 #endif
    503 	stmdb	r0!, {r4, r5, r12, lr}
    504 	subs	r2, r2, #0x10
    505 	bge	.Lmemmove_bsrcul2loop16
    506 	pop	{r4, r5, lr}
    507 	adds	r2, r2, #0x0c
    508 	blt	.Lmemmove_bsrcul2l4
    509 
    510 .Lmemmove_bsrcul2loop4:
    511 #ifdef __ARMEB__
    512 	mov	r12, r3, lsr #16
    513 #else
    514 	mov	r12, r3, lsl #16
    515 #endif
    516 	ldr	r3, [r1, #-4]!
    517 #ifdef __ARMEB__
    518 	orr	r12, r12, r3, lsl #16
    519 #else
    520 	orr	r12, r12, r3, lsr #16
    521 #endif
    522 	str	r12, [r0, #-4]!
    523 	subs	r2, r2, #4
    524 	bge	.Lmemmove_bsrcul2loop4
    525 
    526 .Lmemmove_bsrcul2l4:
    527 	add	r1, r1, #2
    528 	b	.Lmemmove_bl4
    529 
    530 .Lmemmove_bsrcul1:
    531 	cmp	r2, #0x0c
    532 	blt	.Lmemmove_bsrcul1loop4
    533 	sub	r2, r2, #0x0c
    534 	push	{r4, r5, lr}
    535 
    536 .Lmemmove_bsrcul1loop32:
    537 #ifdef __ARMEB__
    538 	mov	lr, r3, lsr #24
    539 #else
    540 	mov	lr, r3, lsl #24
    541 #endif
    542 	ldmdb	r1!, {r3-r5, r12}
    543 #ifdef __ARMEB__
    544 	orr	lr, lr, r12, lsl #8
    545 	mov	r12, r12, lsr #24
    546 	orr	r12, r12, r5, lsl #8
    547 	mov	r5, r5, lsr #24
    548 	orr	r5, r5, r4, lsl #8
    549 	mov	r4, r4, lsr #24
    550 	orr	r4, r4, r3, lsl #8
    551 #else
    552 	orr	lr, lr, r12, lsr #8
    553 	mov	r12, r12, lsl #24
    554 	orr	r12, r12, r5, lsr #8
    555 	mov	r5, r5, lsl #24
    556 	orr	r5, r5, r4, lsr #8
    557 	mov	r4, r4, lsl #24
    558 	orr	r4, r4, r3, lsr #8
    559 #endif
    560 	stmdb	r0!, {r4, r5, r12, lr}
    561 	subs	r2, r2, #0x10
    562 	bge	.Lmemmove_bsrcul1loop32
    563 	pop	{r4, r5, lr}
    564 	adds	r2, r2, #0x0c
    565 	blt	.Lmemmove_bsrcul1l4
    566 
    567 .Lmemmove_bsrcul1loop4:
    568 #ifdef __ARMEB__
    569 	mov	r12, r3, lsr #24
    570 #else
    571 	mov	r12, r3, lsl #24
    572 #endif
    573 	ldr	r3, [r1, #-4]!
    574 #ifdef __ARMEB__
    575 	orr	r12, r12, r3, lsl #8
    576 #else
    577 	orr	r12, r12, r3, lsr #8
    578 #endif
    579 	str	r12, [r0, #-4]!
    580 	subs	r2, r2, #4
    581 	bge	.Lmemmove_bsrcul1loop4
    582 
    583 .Lmemmove_bsrcul1l4:
    584 	add	r1, r1, #1
    585 	b	.Lmemmove_bl4
    586