Home | History | Annotate | Line # | Download | only in string
memmove.S revision 1.1.6.1
      1 /*	$NetBSD: memmove.S,v 1.1.6.1 2007/06/21 14:25:01 liamjfoy Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Neil A. Carson and Mark Brinicombe
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <machine/asm.h>
     40 
     41 #ifndef _BCOPY
     42 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
     43 ENTRY(memmove)
     44 #else
     45 /* bcopy = memcpy/memmove with arguments reversed. */
     46 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
     47 ENTRY(bcopy)
     48 	/* switch the source and destination registers */
     49 	eor     r0, r1, r0
     50 	eor     r1, r0, r1
     51 	eor     r0, r1, r0
     52 #endif
     53 	/* Do the buffers overlap? */
     54 	cmp	r0, r1
     55 	RETc(eq)		/* Bail now if src/dst are the same */
     56 	subhs	r3, r0, r1	/* if (dst > src) r3 = dst - src */
     57 	sublo	r3, r1, r0	/* if (src > dst) r3 = src - dst */
     58 	cmp	r3, r2		/* if (r3 >= len) we have an overlap */
     59 	bhs	PIC_SYM(_C_LABEL(memcpy), PLT)
     60 
     61 	/* Determine copy direction */
     62 	cmp	r1, r0
     63 	bcc	.Lmemmove_backwards
     64 
     65 	moveq	r0, #0			/* Quick abort for len=0 */
     66 	RETc(eq)
     67 
     68 	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
     69 	subs	r2, r2, #4
     70 	blt	.Lmemmove_fl4		/* less than 4 bytes */
     71 	ands	r12, r0, #3
     72 	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
     73 	ands	r12, r1, #3
     74 	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
     75 
     76 .Lmemmove_ft8:
     77 	/* We have aligned source and destination */
     78 	subs	r2, r2, #8
     79 	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
     80 	subs	r2, r2, #0x14
     81 	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
     82 	stmdb	sp!, {r4}		/* borrow r4 */
     83 
     84 	/* blat 32 bytes at a time */
     85 	/* XXX for really big copies perhaps we should use more registers */
     86 .Lmemmove_floop32:
     87 	ldmia	r1!, {r3, r4, r12, lr}
     88 	stmia	r0!, {r3, r4, r12, lr}
     89 	ldmia	r1!, {r3, r4, r12, lr}
     90 	stmia	r0!, {r3, r4, r12, lr}
     91 	subs	r2, r2, #0x20
     92 	bge	.Lmemmove_floop32
     93 
     94 	cmn	r2, #0x10
     95 	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
     96 	stmgeia	r0!, {r3, r4, r12, lr}
     97 	subge	r2, r2, #0x10
     98 	ldmia	sp!, {r4}		/* return r4 */
     99 
    100 .Lmemmove_fl32:
    101 	adds	r2, r2, #0x14
    102 
    103 	/* blat 12 bytes at a time */
    104 .Lmemmove_floop12:
    105 	ldmgeia	r1!, {r3, r12, lr}
    106 	stmgeia	r0!, {r3, r12, lr}
    107 	subges	r2, r2, #0x0c
    108 	bge	.Lmemmove_floop12
    109 
    110 .Lmemmove_fl12:
    111 	adds	r2, r2, #8
    112 	blt	.Lmemmove_fl4
    113 
    114 	subs	r2, r2, #4
    115 	ldrlt	r3, [r1], #4
    116 	strlt	r3, [r0], #4
    117 	ldmgeia	r1!, {r3, r12}
    118 	stmgeia	r0!, {r3, r12}
    119 	subge	r2, r2, #4
    120 
    121 .Lmemmove_fl4:
    122 	/* less than 4 bytes to go */
    123 	adds	r2, r2, #4
    124 	ldmeqia	sp!, {r0, pc}		/* done */
    125 
    126 	/* copy the crud byte at a time */
    127 	cmp	r2, #2
    128 	ldrb	r3, [r1], #1
    129 	strb	r3, [r0], #1
    130 	ldrgeb	r3, [r1], #1
    131 	strgeb	r3, [r0], #1
    132 	ldrgtb	r3, [r1], #1
    133 	strgtb	r3, [r0], #1
    134 	ldmia	sp!, {r0, pc}
    135 
    136 	/* erg - unaligned destination */
    137 .Lmemmove_fdestul:
    138 	rsb	r12, r12, #4
    139 	cmp	r12, #2
    140 
    141 	/* align destination with byte copies */
    142 	ldrb	r3, [r1], #1
    143 	strb	r3, [r0], #1
    144 	ldrgeb	r3, [r1], #1
    145 	strgeb	r3, [r0], #1
    146 	ldrgtb	r3, [r1], #1
    147 	strgtb	r3, [r0], #1
    148 	subs	r2, r2, r12
    149 	blt	.Lmemmove_fl4		/* less the 4 bytes */
    150 
    151 	ands	r12, r1, #3
    152 	beq	.Lmemmove_ft8		/* we have an aligned source */
    153 
    154 	/* erg - unaligned source */
    155 	/* This is where it gets nasty ... */
    156 .Lmemmove_fsrcul:
    157 	bic	r1, r1, #3
    158 	ldr	lr, [r1], #4
    159 	cmp	r12, #2
    160 	bgt	.Lmemmove_fsrcul3
    161 	beq	.Lmemmove_fsrcul2
    162 	cmp	r2, #0x0c
    163 	blt	.Lmemmove_fsrcul1loop4
    164 	sub	r2, r2, #0x0c
    165 	stmdb	sp!, {r4, r5}
    166 
    167 .Lmemmove_fsrcul1loop16:
    168 #ifdef __ARMEB__
    169 	mov	r3, lr, lsl #8
    170 #else
    171 	mov	r3, lr, lsr #8
    172 #endif
    173 	ldmia	r1!, {r4, r5, r12, lr}
    174 #ifdef __ARMEB__
    175 	orr	r3, r3, r4, lsr #24
    176 	mov	r4, r4, lsl #8
    177 	orr	r4, r4, r5, lsr #24
    178 	mov	r5, r5, lsl #8
    179 	orr	r5, r5, r12, lsr #24
    180 	mov	r12, r12, lsl #8
    181 	orr	r12, r12, lr, lsr #24
    182 #else
    183 	orr	r3, r3, r4, lsl #24
    184 	mov	r4, r4, lsr #8
    185 	orr	r4, r4, r5, lsl #24
    186 	mov	r5, r5, lsr #8
    187 	orr	r5, r5, r12, lsl #24
    188 	mov	r12, r12, lsr #8
    189 	orr	r12, r12, lr, lsl #24
    190 #endif
    191 	stmia	r0!, {r3-r5, r12}
    192 	subs	r2, r2, #0x10
    193 	bge	.Lmemmove_fsrcul1loop16
    194 	ldmia	sp!, {r4, r5}
    195 	adds	r2, r2, #0x0c
    196 	blt	.Lmemmove_fsrcul1l4
    197 
    198 .Lmemmove_fsrcul1loop4:
    199 #ifdef __ARMEB__
    200 	mov	r12, lr, lsl #8
    201 #else
    202 	mov	r12, lr, lsr #8
    203 #endif
    204 	ldr	lr, [r1], #4
    205 #ifdef __ARMEB__
    206 	orr	r12, r12, lr, lsr #24
    207 #else
    208 	orr	r12, r12, lr, lsl #24
    209 #endif
    210 	str	r12, [r0], #4
    211 	subs	r2, r2, #4
    212 	bge	.Lmemmove_fsrcul1loop4
    213 
    214 .Lmemmove_fsrcul1l4:
    215 	sub	r1, r1, #3
    216 	b	.Lmemmove_fl4
    217 
    218 .Lmemmove_fsrcul2:
    219 	cmp	r2, #0x0c
    220 	blt	.Lmemmove_fsrcul2loop4
    221 	sub	r2, r2, #0x0c
    222 	stmdb	sp!, {r4, r5}
    223 
    224 .Lmemmove_fsrcul2loop16:
    225 #ifdef __ARMEB__
    226 	mov	r3, lr, lsl #16
    227 #else
    228 	mov	r3, lr, lsr #16
    229 #endif
    230 	ldmia	r1!, {r4, r5, r12, lr}
    231 #ifdef __ARMEB__
    232 	orr	r3, r3, r4, lsr #16
    233 	mov	r4, r4, lsl #16
    234 	orr	r4, r4, r5, lsr #16
    235 	mov	r5, r5, lsl #16
    236 	orr	r5, r5, r12, lsr #16
    237 	mov	r12, r12, lsl #16
    238 	orr	r12, r12, lr, lsr #16
    239 #else
    240 	orr	r3, r3, r4, lsl #16
    241 	mov	r4, r4, lsr #16
    242 	orr	r4, r4, r5, lsl #16
    243 	mov	r5, r5, lsr #16
    244 	orr	r5, r5, r12, lsl #16
    245 	mov	r12, r12, lsr #16
    246 	orr	r12, r12, lr, lsl #16
    247 #endif
    248 	stmia	r0!, {r3-r5, r12}
    249 	subs	r2, r2, #0x10
    250 	bge	.Lmemmove_fsrcul2loop16
    251 	ldmia	sp!, {r4, r5}
    252 	adds	r2, r2, #0x0c
    253 	blt	.Lmemmove_fsrcul2l4
    254 
    255 .Lmemmove_fsrcul2loop4:
    256 #ifdef __ARMEB__
    257 	mov	r12, lr, lsl #16
    258 #else
    259 	mov	r12, lr, lsr #16
    260 #endif
    261 	ldr	lr, [r1], #4
    262 #ifdef __ARMEB__
    263 	orr	r12, r12, lr, lsr #16
    264 #else
    265 	orr	r12, r12, lr, lsl #16
    266 #endif
    267 	str	r12, [r0], #4
    268 	subs	r2, r2, #4
    269 	bge	.Lmemmove_fsrcul2loop4
    270 
    271 .Lmemmove_fsrcul2l4:
    272 	sub	r1, r1, #2
    273 	b	.Lmemmove_fl4
    274 
    275 .Lmemmove_fsrcul3:
    276 	cmp	r2, #0x0c
    277 	blt	.Lmemmove_fsrcul3loop4
    278 	sub	r2, r2, #0x0c
    279 	stmdb	sp!, {r4, r5}
    280 
    281 .Lmemmove_fsrcul3loop16:
    282 #ifdef __ARMEB__
    283 	mov	r3, lr, lsl #24
    284 #else
    285 	mov	r3, lr, lsr #24
    286 #endif
    287 	ldmia	r1!, {r4, r5, r12, lr}
    288 #ifdef __ARMEB__
    289 	orr	r3, r3, r4, lsr #8
    290 	mov	r4, r4, lsl #24
    291 	orr	r4, r4, r5, lsr #8
    292 	mov	r5, r5, lsl #24
    293 	orr	r5, r5, r12, lsr #8
    294 	mov	r12, r12, lsl #24
    295 	orr	r12, r12, lr, lsr #8
    296 #else
    297 	orr	r3, r3, r4, lsl #8
    298 	mov	r4, r4, lsr #24
    299 	orr	r4, r4, r5, lsl #8
    300 	mov	r5, r5, lsr #24
    301 	orr	r5, r5, r12, lsl #8
    302 	mov	r12, r12, lsr #24
    303 	orr	r12, r12, lr, lsl #8
    304 #endif
    305 	stmia	r0!, {r3-r5, r12}
    306 	subs	r2, r2, #0x10
    307 	bge	.Lmemmove_fsrcul3loop16
    308 	ldmia	sp!, {r4, r5}
    309 	adds	r2, r2, #0x0c
    310 	blt	.Lmemmove_fsrcul3l4
    311 
    312 .Lmemmove_fsrcul3loop4:
    313 #ifdef __ARMEB__
    314 	mov	r12, lr, lsl #24
    315 #else
    316 	mov	r12, lr, lsr #24
    317 #endif
    318 	ldr	lr, [r1], #4
    319 #ifdef __ARMEB__
    320 	orr	r12, r12, lr, lsr #8
    321 #else
    322 	orr	r12, r12, lr, lsl #8
    323 #endif
    324 	str	r12, [r0], #4
    325 	subs	r2, r2, #4
    326 	bge	.Lmemmove_fsrcul3loop4
    327 
    328 .Lmemmove_fsrcul3l4:
    329 	sub	r1, r1, #1
    330 	b	.Lmemmove_fl4
    331 
    332 .Lmemmove_backwards:
    333 	add	r1, r1, r2
    334 	add	r0, r0, r2
    335 	subs	r2, r2, #4
    336 	blt	.Lmemmove_bl4		/* less than 4 bytes */
    337 	ands	r12, r0, #3
    338 	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
    339 	ands	r12, r1, #3
    340 	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
    341 
    342 .Lmemmove_bt8:
    343 	/* We have aligned source and destination */
    344 	subs	r2, r2, #8
    345 	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
    346 	stmdb	sp!, {r4, lr}
    347 	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
    348 	blt	.Lmemmove_bl32
    349 
    350 	/* blat 32 bytes at a time */
    351 	/* XXX for really big copies perhaps we should use more registers */
    352 .Lmemmove_bloop32:
    353 	ldmdb	r1!, {r3, r4, r12, lr}
    354 	stmdb	r0!, {r3, r4, r12, lr}
    355 	ldmdb	r1!, {r3, r4, r12, lr}
    356 	stmdb	r0!, {r3, r4, r12, lr}
    357 	subs	r2, r2, #0x20
    358 	bge	.Lmemmove_bloop32
    359 
    360 .Lmemmove_bl32:
    361 	cmn	r2, #0x10
    362 	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
    363 	stmgedb	r0!, {r3, r4, r12, lr}
    364 	subge	r2, r2, #0x10
    365 	adds	r2, r2, #0x14
    366 	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
    367 	stmgedb	r0!, {r3, r12, lr}
    368 	subge	r2, r2, #0x0c
    369 	ldmia	sp!, {r4, lr}
    370 
    371 .Lmemmove_bl12:
    372 	adds	r2, r2, #8
    373 	blt	.Lmemmove_bl4
    374 	subs	r2, r2, #4
    375 	ldrlt	r3, [r1, #-4]!
    376 	strlt	r3, [r0, #-4]!
    377 	ldmgedb	r1!, {r3, r12}
    378 	stmgedb	r0!, {r3, r12}
    379 	subge	r2, r2, #4
    380 
    381 .Lmemmove_bl4:
    382 	/* less than 4 bytes to go */
    383 	adds	r2, r2, #4
    384 	RETc(eq)
    385 
    386 	/* copy the crud byte at a time */
    387 	cmp	r2, #2
    388 	ldrb	r3, [r1, #-1]!
    389 	strb	r3, [r0, #-1]!
    390 	ldrgeb	r3, [r1, #-1]!
    391 	strgeb	r3, [r0, #-1]!
    392 	ldrgtb	r3, [r1, #-1]!
    393 	strgtb	r3, [r0, #-1]!
    394 	RET
    395 
    396 	/* erg - unaligned destination */
    397 .Lmemmove_bdestul:
    398 	cmp	r12, #2
    399 
    400 	/* align destination with byte copies */
    401 	ldrb	r3, [r1, #-1]!
    402 	strb	r3, [r0, #-1]!
    403 	ldrgeb	r3, [r1, #-1]!
    404 	strgeb	r3, [r0, #-1]!
    405 	ldrgtb	r3, [r1, #-1]!
    406 	strgtb	r3, [r0, #-1]!
    407 	subs	r2, r2, r12
    408 	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
    409 	ands	r12, r1, #3
    410 	beq	.Lmemmove_bt8		/* we have an aligned source */
    411 
    412 	/* erg - unaligned source */
    413 	/* This is where it gets nasty ... */
    414 .Lmemmove_bsrcul:
    415 	bic	r1, r1, #3
    416 	ldr	r3, [r1, #0]
    417 	cmp	r12, #2
    418 	blt	.Lmemmove_bsrcul1
    419 	beq	.Lmemmove_bsrcul2
    420 	cmp	r2, #0x0c
    421 	blt	.Lmemmove_bsrcul3loop4
    422 	sub	r2, r2, #0x0c
    423 	stmdb	sp!, {r4, r5, lr}
    424 
    425 .Lmemmove_bsrcul3loop16:
    426 #ifdef __ARMEB__
    427 	mov	lr, r3, lsr #8
    428 #else
    429 	mov	lr, r3, lsl #8
    430 #endif
    431 	ldmdb	r1!, {r3-r5, r12}
    432 #ifdef __ARMEB__
    433 	orr	lr, lr, r12, lsl #24
    434 	mov	r12, r12, lsr #8
    435 	orr	r12, r12, r5, lsl #24
    436 	mov	r5, r5, lsr #8
    437 	orr	r5, r5, r4, lsl #24
    438 	mov	r4, r4, lsr #8
    439 	orr	r4, r4, r3, lsl #24
    440 #else
    441 	orr	lr, lr, r12, lsr #24
    442 	mov	r12, r12, lsl #8
    443 	orr	r12, r12, r5, lsr #24
    444 	mov	r5, r5, lsl #8
    445 	orr	r5, r5, r4, lsr #24
    446 	mov	r4, r4, lsl #8
    447 	orr	r4, r4, r3, lsr #24
    448 #endif
    449 	stmdb	r0!, {r4, r5, r12, lr}
    450 	subs	r2, r2, #0x10
    451 	bge	.Lmemmove_bsrcul3loop16
    452 	ldmia	sp!, {r4, r5, lr}
    453 	adds	r2, r2, #0x0c
    454 	blt	.Lmemmove_bsrcul3l4
    455 
    456 .Lmemmove_bsrcul3loop4:
    457 #ifdef __ARMEB__
    458 	mov	r12, r3, lsr #8
    459 #else
    460 	mov	r12, r3, lsl #8
    461 #endif
    462 	ldr	r3, [r1, #-4]!
    463 #ifdef __ARMEB__
    464 	orr	r12, r12, r3, lsl #24
    465 #else
    466 	orr	r12, r12, r3, lsr #24
    467 #endif
    468 	str	r12, [r0, #-4]!
    469 	subs	r2, r2, #4
    470 	bge	.Lmemmove_bsrcul3loop4
    471 
    472 .Lmemmove_bsrcul3l4:
    473 	add	r1, r1, #3
    474 	b	.Lmemmove_bl4
    475 
    476 .Lmemmove_bsrcul2:
    477 	cmp	r2, #0x0c
    478 	blt	.Lmemmove_bsrcul2loop4
    479 	sub	r2, r2, #0x0c
    480 	stmdb	sp!, {r4, r5, lr}
    481 
    482 .Lmemmove_bsrcul2loop16:
    483 #ifdef __ARMEB__
    484 	mov	lr, r3, lsr #16
    485 #else
    486 	mov	lr, r3, lsl #16
    487 #endif
    488 	ldmdb	r1!, {r3-r5, r12}
    489 #ifdef __ARMEB__
    490 	orr	lr, lr, r12, lsl #16
    491 	mov	r12, r12, lsr #16
    492 	orr	r12, r12, r5, lsl #16
    493 	mov	r5, r5, lsr #16
    494 	orr	r5, r5, r4, lsl #16
    495 	mov	r4, r4, lsr #16
    496 	orr	r4, r4, r3, lsl #16
    497 #else
    498 	orr	lr, lr, r12, lsr #16
    499 	mov	r12, r12, lsl #16
    500 	orr	r12, r12, r5, lsr #16
    501 	mov	r5, r5, lsl #16
    502 	orr	r5, r5, r4, lsr #16
    503 	mov	r4, r4, lsl #16
    504 	orr	r4, r4, r3, lsr #16
    505 #endif
    506 	stmdb	r0!, {r4, r5, r12, lr}
    507 	subs	r2, r2, #0x10
    508 	bge	.Lmemmove_bsrcul2loop16
    509 	ldmia	sp!, {r4, r5, lr}
    510 	adds	r2, r2, #0x0c
    511 	blt	.Lmemmove_bsrcul2l4
    512 
    513 .Lmemmove_bsrcul2loop4:
    514 #ifdef __ARMEB__
    515 	mov	r12, r3, lsr #16
    516 #else
    517 	mov	r12, r3, lsl #16
    518 #endif
    519 	ldr	r3, [r1, #-4]!
    520 #ifdef __ARMEB__
    521 	orr	r12, r12, r3, lsl #16
    522 #else
    523 	orr	r12, r12, r3, lsr #16
    524 #endif
    525 	str	r12, [r0, #-4]!
    526 	subs	r2, r2, #4
    527 	bge	.Lmemmove_bsrcul2loop4
    528 
    529 .Lmemmove_bsrcul2l4:
    530 	add	r1, r1, #2
    531 	b	.Lmemmove_bl4
    532 
    533 .Lmemmove_bsrcul1:
    534 	cmp	r2, #0x0c
    535 	blt	.Lmemmove_bsrcul1loop4
    536 	sub	r2, r2, #0x0c
    537 	stmdb	sp!, {r4, r5, lr}
    538 
    539 .Lmemmove_bsrcul1loop32:
    540 #ifdef __ARMEB__
    541 	mov	lr, r3, lsr #24
    542 #else
    543 	mov	lr, r3, lsl #24
    544 #endif
    545 	ldmdb	r1!, {r3-r5, r12}
    546 #ifdef __ARMEB__
    547 	orr	lr, lr, r12, lsl #8
    548 	mov	r12, r12, lsr #24
    549 	orr	r12, r12, r5, lsl #8
    550 	mov	r5, r5, lsr #24
    551 	orr	r5, r5, r4, lsl #8
    552 	mov	r4, r4, lsr #24
    553 	orr	r4, r4, r3, lsl #8
    554 #else
    555 	orr	lr, lr, r12, lsr #8
    556 	mov	r12, r12, lsl #24
    557 	orr	r12, r12, r5, lsr #8
    558 	mov	r5, r5, lsl #24
    559 	orr	r5, r5, r4, lsr #8
    560 	mov	r4, r4, lsl #24
    561 	orr	r4, r4, r3, lsr #8
    562 #endif
    563 	stmdb	r0!, {r4, r5, r12, lr}
    564 	subs	r2, r2, #0x10
    565 	bge	.Lmemmove_bsrcul1loop32
    566 	ldmia	sp!, {r4, r5, lr}
    567 	adds	r2, r2, #0x0c
    568 	blt	.Lmemmove_bsrcul1l4
    569 
    570 .Lmemmove_bsrcul1loop4:
    571 #ifdef __ARMEB__
    572 	mov	r12, r3, lsr #24
    573 #else
    574 	mov	r12, r3, lsl #24
    575 #endif
    576 	ldr	r3, [r1, #-4]!
    577 #ifdef __ARMEB__
    578 	orr	r12, r12, r3, lsl #8
    579 #else
    580 	orr	r12, r12, r3, lsr #8
    581 #endif
    582 	str	r12, [r0, #-4]!
    583 	subs	r2, r2, #4
    584 	bge	.Lmemmove_bsrcul1loop4
    585 
    586 .Lmemmove_bsrcul1l4:
    587 	add	r1, r1, #1
    588 	b	.Lmemmove_bl4
    589