memcpy_xscale.S revision 1.3 1 /* $NetBSD: memcpy_xscale.S,v 1.3 2013/08/11 04:56:32 matt Exp $ */
2
3 /*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39
40 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
41 ENTRY(memcpy)
42 pld [r1]
43 cmp r2, #0x0c
44 ble .Lmemcpy_short /* <= 12 bytes */
45 mov r3, r0 /* We must not clobber r0 */
46
47 /* Word-align the destination buffer */
48 ands ip, r3, #0x03 /* Already word aligned? */
49 beq .Lmemcpy_wordaligned /* Yup */
50 cmp ip, #0x02
51 ldrb ip, [r1], #0x01
52 sub r2, r2, #0x01
53 strb ip, [r3], #0x01
54 ldrble ip, [r1], #0x01
55 suble r2, r2, #0x01
56 strble ip, [r3], #0x01
57 ldrblt ip, [r1], #0x01
58 sublt r2, r2, #0x01
59 strblt ip, [r3], #0x01
60
61 /* Destination buffer is now word aligned */
62 .Lmemcpy_wordaligned:
63 ands ip, r1, #0x03 /* Is src also word-aligned? */
64 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
65
66 /* Quad-align the destination buffer */
67 tst r3, #0x07 /* Already quad aligned? */
68 ldrne ip, [r1], #0x04
69 push {r4-r9} /* Free up some registers */
70 subne r2, r2, #0x04
71 strne ip, [r3], #0x04
72
73 /* Destination buffer quad aligned, source is at least word aligned */
74 subs r2, r2, #0x80
75 blt .Lmemcpy_w_lessthan128
76
77 /* Copy 128 bytes at a time */
78 .Lmemcpy_w_loop128:
79 ldr r4, [r1], #0x04 /* LD:00-03 */
80 ldr r5, [r1], #0x04 /* LD:04-07 */
81 pld [r1, #0x18] /* Prefetch 0x20 */
82 ldr r6, [r1], #0x04 /* LD:08-0b */
83 ldr r7, [r1], #0x04 /* LD:0c-0f */
84 ldr r8, [r1], #0x04 /* LD:10-13 */
85 ldr r9, [r1], #0x04 /* LD:14-17 */
86 strd r4, [r3], #0x08 /* ST:00-07 */
87 ldr r4, [r1], #0x04 /* LD:18-1b */
88 ldr r5, [r1], #0x04 /* LD:1c-1f */
89 strd r6, [r3], #0x08 /* ST:08-0f */
90 ldr r6, [r1], #0x04 /* LD:20-23 */
91 ldr r7, [r1], #0x04 /* LD:24-27 */
92 pld [r1, #0x18] /* Prefetch 0x40 */
93 strd r8, [r3], #0x08 /* ST:10-17 */
94 ldr r8, [r1], #0x04 /* LD:28-2b */
95 ldr r9, [r1], #0x04 /* LD:2c-2f */
96 strd r4, [r3], #0x08 /* ST:18-1f */
97 ldr r4, [r1], #0x04 /* LD:30-33 */
98 ldr r5, [r1], #0x04 /* LD:34-37 */
99 strd r6, [r3], #0x08 /* ST:20-27 */
100 ldr r6, [r1], #0x04 /* LD:38-3b */
101 ldr r7, [r1], #0x04 /* LD:3c-3f */
102 strd r8, [r3], #0x08 /* ST:28-2f */
103 ldr r8, [r1], #0x04 /* LD:40-43 */
104 ldr r9, [r1], #0x04 /* LD:44-47 */
105 pld [r1, #0x18] /* Prefetch 0x60 */
106 strd r4, [r3], #0x08 /* ST:30-37 */
107 ldr r4, [r1], #0x04 /* LD:48-4b */
108 ldr r5, [r1], #0x04 /* LD:4c-4f */
109 strd r6, [r3], #0x08 /* ST:38-3f */
110 ldr r6, [r1], #0x04 /* LD:50-53 */
111 ldr r7, [r1], #0x04 /* LD:54-57 */
112 strd r8, [r3], #0x08 /* ST:40-47 */
113 ldr r8, [r1], #0x04 /* LD:58-5b */
114 ldr r9, [r1], #0x04 /* LD:5c-5f */
115 strd r4, [r3], #0x08 /* ST:48-4f */
116 ldr r4, [r1], #0x04 /* LD:60-63 */
117 ldr r5, [r1], #0x04 /* LD:64-67 */
118 pld [r1, #0x18] /* Prefetch 0x80 */
119 strd r6, [r3], #0x08 /* ST:50-57 */
120 ldr r6, [r1], #0x04 /* LD:68-6b */
121 ldr r7, [r1], #0x04 /* LD:6c-6f */
122 strd r8, [r3], #0x08 /* ST:58-5f */
123 ldr r8, [r1], #0x04 /* LD:70-73 */
124 ldr r9, [r1], #0x04 /* LD:74-77 */
125 strd r4, [r3], #0x08 /* ST:60-67 */
126 ldr r4, [r1], #0x04 /* LD:78-7b */
127 ldr r5, [r1], #0x04 /* LD:7c-7f */
128 strd r6, [r3], #0x08 /* ST:68-6f */
129 strd r8, [r3], #0x08 /* ST:70-77 */
130 subs r2, r2, #0x80
131 strd r4, [r3], #0x08 /* ST:78-7f */
132 bge .Lmemcpy_w_loop128
133
134 .Lmemcpy_w_lessthan128:
135 adds r2, r2, #0x80 /* Adjust for extra sub */
136 popeq {r4-r9}
137 RETc(eq) /* Return now if done */
138 subs r2, r2, #0x20
139 blt .Lmemcpy_w_lessthan32
140
141 /* Copy 32 bytes at a time */
142 .Lmemcpy_w_loop32:
143 ldr r4, [r1], #0x04
144 ldr r5, [r1], #0x04
145 pld [r1, #0x18]
146 ldr r6, [r1], #0x04
147 ldr r7, [r1], #0x04
148 ldr r8, [r1], #0x04
149 ldr r9, [r1], #0x04
150 strd r4, [r3], #0x08
151 ldr r4, [r1], #0x04
152 ldr r5, [r1], #0x04
153 strd r6, [r3], #0x08
154 strd r8, [r3], #0x08
155 subs r2, r2, #0x20
156 strd r4, [r3], #0x08
157 bge .Lmemcpy_w_loop32
158
159 .Lmemcpy_w_lessthan32:
160 adds r2, r2, #0x20 /* Adjust for extra sub */
161 popeq {r4-r9}
162 RETc(eq) /* Return now if done */
163
164 and r4, r2, #0x18
165 rsbs r4, r4, #0x18
166 addne pc, pc, r4, lsl #1
167 nop
168
169 /* At least 24 bytes remaining */
170 ldr r4, [r1], #0x04
171 ldr r5, [r1], #0x04
172 sub r2, r2, #0x08
173 strd r4, [r3], #0x08
174
175 /* At least 16 bytes remaining */
176 ldr r4, [r1], #0x04
177 ldr r5, [r1], #0x04
178 sub r2, r2, #0x08
179 strd r4, [r3], #0x08
180
181 /* At least 8 bytes remaining */
182 ldr r4, [r1], #0x04
183 ldr r5, [r1], #0x04
184 subs r2, r2, #0x08
185 strd r4, [r3], #0x08
186
187 /* Less than 8 bytes remaining */
188 pop {r4-r9}
189 RETc(eq) /* Return now if done */
190 subs r2, r2, #0x04
191 ldrge ip, [r1], #0x04
192 strge ip, [r3], #0x04
193 RETc(eq) /* Return now if done */
194 addlt r2, r2, #0x04
195 ldrb ip, [r1], #0x01
196 cmp r2, #0x02
197 ldrbge r2, [r1], #0x01
198 strb ip, [r3], #0x01
199 ldrbgt ip, [r1]
200 strbge r2, [r3], #0x01
201 strbgt ip, [r3]
202 RET
203
204
205 /*
206 * At this point, it has not been possible to word align both buffers.
207 * The destination buffer is word aligned, but the source buffer is not.
208 */
209 .Lmemcpy_bad_align:
210 push {r4-r7}
211 bic r1, r1, #0x03
212 cmp ip, #2
213 ldr ip, [r1], #0x04
214 bgt .Lmemcpy_bad3
215 beq .Lmemcpy_bad2
216 b .Lmemcpy_bad1
217
218 .Lmemcpy_bad1_loop16:
219 #ifdef __ARMEB__
220 mov r4, ip, lsl #8
221 #else
222 mov r4, ip, lsr #8
223 #endif
224 ldr r5, [r1], #0x04
225 pld [r1, #0x018]
226 ldr r6, [r1], #0x04
227 ldr r7, [r1], #0x04
228 ldr ip, [r1], #0x04
229 #ifdef __ARMEB__
230 orr r4, r4, r5, lsr #24
231 mov r5, r5, lsl #8
232 orr r5, r5, r6, lsr #24
233 mov r6, r6, lsl #8
234 orr r6, r6, r7, lsr #24
235 mov r7, r7, lsl #8
236 orr r7, r7, ip, lsr #24
237 #else
238 orr r4, r4, r5, lsl #24
239 mov r5, r5, lsr #8
240 orr r5, r5, r6, lsl #24
241 mov r6, r6, lsr #8
242 orr r6, r6, r7, lsl #24
243 mov r7, r7, lsr #8
244 orr r7, r7, ip, lsl #24
245 #endif
246 str r4, [r3], #0x04
247 str r5, [r3], #0x04
248 str r6, [r3], #0x04
249 str r7, [r3], #0x04
250 sub r2, r2, #0x10
251
252 .Lmemcpy_bad1:
253 cmp r2, #0x20
254 bge .Lmemcpy_bad1_loop16
255 cmp r2, #0x10
256 blt .Lmemcpy_bad1_loop16_short
257
258 /* copy last 16 bytes (without preload) */
259 #ifdef __ARMEB__
260 mov r4, ip, lsl #8
261 #else
262 mov r4, ip, lsr #8
263 #endif
264 ldr r5, [r1], #0x04
265 ldr r6, [r1], #0x04
266 ldr r7, [r1], #0x04
267 ldr ip, [r1], #0x04
268 #ifdef __ARMEB__
269 orr r4, r4, r5, lsr #24
270 mov r5, r5, lsl #8
271 orr r5, r5, r6, lsr #24
272 mov r6, r6, lsl #8
273 orr r6, r6, r7, lsr #24
274 mov r7, r7, lsl #8
275 orr r7, r7, ip, lsr #24
276 #else
277 orr r4, r4, r5, lsl #24
278 mov r5, r5, lsr #8
279 orr r5, r5, r6, lsl #24
280 mov r6, r6, lsr #8
281 orr r6, r6, r7, lsl #24
282 mov r7, r7, lsr #8
283 orr r7, r7, ip, lsl #24
284 #endif
285 str r4, [r3], #0x04
286 str r5, [r3], #0x04
287 str r6, [r3], #0x04
288 str r7, [r3], #0x04
289 subs r2, r2, #0x10
290 popeq {r4-r7}
291 RETc(eq) /* Return now if done */
292
293 .Lmemcpy_bad1_loop16_short:
294 subs r2, r2, #0x04
295 sublt r1, r1, #0x03
296 blt .Lmemcpy_bad_done
297
298 .Lmemcpy_bad1_loop4:
299 #ifdef __ARMEB__
300 mov r4, ip, lsl #8
301 #else
302 mov r4, ip, lsr #8
303 #endif
304 ldr ip, [r1], #0x04
305 subs r2, r2, #0x04
306 #ifdef __ARMEB__
307 orr r4, r4, ip, lsr #24
308 #else
309 orr r4, r4, ip, lsl #24
310 #endif
311 str r4, [r3], #0x04
312 bge .Lmemcpy_bad1_loop4
313 sub r1, r1, #0x03
314 b .Lmemcpy_bad_done
315
316 .Lmemcpy_bad2_loop16:
317 #ifdef __ARMEB__
318 mov r4, ip, lsl #16
319 #else
320 mov r4, ip, lsr #16
321 #endif
322 ldr r5, [r1], #0x04
323 pld [r1, #0x018]
324 ldr r6, [r1], #0x04
325 ldr r7, [r1], #0x04
326 ldr ip, [r1], #0x04
327 #ifdef __ARMEB__
328 orr r4, r4, r5, lsr #16
329 mov r5, r5, lsl #16
330 orr r5, r5, r6, lsr #16
331 mov r6, r6, lsl #16
332 orr r6, r6, r7, lsr #16
333 mov r7, r7, lsl #16
334 orr r7, r7, ip, lsr #16
335 #else
336 orr r4, r4, r5, lsl #16
337 mov r5, r5, lsr #16
338 orr r5, r5, r6, lsl #16
339 mov r6, r6, lsr #16
340 orr r6, r6, r7, lsl #16
341 mov r7, r7, lsr #16
342 orr r7, r7, ip, lsl #16
343 #endif
344 str r4, [r3], #0x04
345 str r5, [r3], #0x04
346 str r6, [r3], #0x04
347 str r7, [r3], #0x04
348 sub r2, r2, #0x10
349
350 .Lmemcpy_bad2:
351 cmp r2, #0x20
352 bge .Lmemcpy_bad2_loop16
353 cmp r2, #0x10
354 blt .Lmemcpy_bad2_loop16_short
355
356 /* copy last 16 bytes (without preload) */
357 #ifdef __ARMEB__
358 mov r4, ip, lsl #16
359 #else
360 mov r4, ip, lsr #16
361 #endif
362 ldr r5, [r1], #0x04
363 ldr r6, [r1], #0x04
364 ldr r7, [r1], #0x04
365 ldr ip, [r1], #0x04
366 #ifdef __ARMEB__
367 orr r4, r4, r5, lsr #16
368 mov r5, r5, lsl #16
369 orr r5, r5, r6, lsr #16
370 mov r6, r6, lsl #16
371 orr r6, r6, r7, lsr #16
372 mov r7, r7, lsl #16
373 orr r7, r7, ip, lsr #16
374 #else
375 orr r4, r4, r5, lsl #16
376 mov r5, r5, lsr #16
377 orr r5, r5, r6, lsl #16
378 mov r6, r6, lsr #16
379 orr r6, r6, r7, lsl #16
380 mov r7, r7, lsr #16
381 orr r7, r7, ip, lsl #16
382 #endif
383 str r4, [r3], #0x04
384 str r5, [r3], #0x04
385 str r6, [r3], #0x04
386 str r7, [r3], #0x04
387 subs r2, r2, #0x10
388 popeq {r4-r7}
389 RETc(eq) /* Return now if done */
390
391 .Lmemcpy_bad2_loop16_short:
392 subs r2, r2, #0x04
393 sublt r1, r1, #0x02
394 blt .Lmemcpy_bad_done
395
396 .Lmemcpy_bad2_loop4:
397 #ifdef __ARMEB__
398 mov r4, ip, lsl #16
399 #else
400 mov r4, ip, lsr #16
401 #endif
402 ldr ip, [r1], #0x04
403 subs r2, r2, #0x04
404 #ifdef __ARMEB__
405 orr r4, r4, ip, lsr #16
406 #else
407 orr r4, r4, ip, lsl #16
408 #endif
409 str r4, [r3], #0x04
410 bge .Lmemcpy_bad2_loop4
411 sub r1, r1, #0x02
412 b .Lmemcpy_bad_done
413
414 .Lmemcpy_bad3_loop16:
415 #ifdef __ARMEB__
416 mov r4, ip, lsl #24
417 #else
418 mov r4, ip, lsr #24
419 #endif
420 ldr r5, [r1], #0x04
421 pld [r1, #0x018]
422 ldr r6, [r1], #0x04
423 ldr r7, [r1], #0x04
424 ldr ip, [r1], #0x04
425 #ifdef __ARMEB__
426 orr r4, r4, r5, lsr #8
427 mov r5, r5, lsl #24
428 orr r5, r5, r6, lsr #8
429 mov r6, r6, lsl #24
430 orr r6, r6, r7, lsr #8
431 mov r7, r7, lsl #24
432 orr r7, r7, ip, lsr #8
433 #else
434 orr r4, r4, r5, lsl #8
435 mov r5, r5, lsr #24
436 orr r5, r5, r6, lsl #8
437 mov r6, r6, lsr #24
438 orr r6, r6, r7, lsl #8
439 mov r7, r7, lsr #24
440 orr r7, r7, ip, lsl #8
441 #endif
442 str r4, [r3], #0x04
443 str r5, [r3], #0x04
444 str r6, [r3], #0x04
445 str r7, [r3], #0x04
446 sub r2, r2, #0x10
447
448 .Lmemcpy_bad3:
449 cmp r2, #0x20
450 bge .Lmemcpy_bad3_loop16
451 cmp r2, #0x10
452 blt .Lmemcpy_bad3_loop16_short
453
454 /* copy last 16 bytes (without preload) */
455 #ifdef __ARMEB__
456 mov r4, ip, lsl #24
457 #else
458 mov r4, ip, lsr #24
459 #endif
460 ldr r5, [r1], #0x04
461 ldr r6, [r1], #0x04
462 ldr r7, [r1], #0x04
463 ldr ip, [r1], #0x04
464 #ifdef __ARMEB__
465 orr r4, r4, r5, lsr #8
466 mov r5, r5, lsl #24
467 orr r5, r5, r6, lsr #8
468 mov r6, r6, lsl #24
469 orr r6, r6, r7, lsr #8
470 mov r7, r7, lsl #24
471 orr r7, r7, ip, lsr #8
472 #else
473 orr r4, r4, r5, lsl #8
474 mov r5, r5, lsr #24
475 orr r5, r5, r6, lsl #8
476 mov r6, r6, lsr #24
477 orr r6, r6, r7, lsl #8
478 mov r7, r7, lsr #24
479 orr r7, r7, ip, lsl #8
480 #endif
481 str r4, [r3], #0x04
482 str r5, [r3], #0x04
483 str r6, [r3], #0x04
484 str r7, [r3], #0x04
485 subs r2, r2, #0x10
486 popeq {r4-r7}
487 RETc(eq) /* Return now if done */
488
489 .Lmemcpy_bad3_loop16_short:
490 subs r2, r2, #0x04
491 sublt r1, r1, #0x01
492 blt .Lmemcpy_bad_done
493
494 .Lmemcpy_bad3_loop4:
495 #ifdef __ARMEB__
496 mov r4, ip, lsl #24
497 #else
498 mov r4, ip, lsr #24
499 #endif
500 ldr ip, [r1], #0x04
501 subs r2, r2, #0x04
502 #ifdef __ARMEB__
503 orr r4, r4, ip, lsr #8
504 #else
505 orr r4, r4, ip, lsl #8
506 #endif
507 str r4, [r3], #0x04
508 bge .Lmemcpy_bad3_loop4
509 sub r1, r1, #0x01
510
511 .Lmemcpy_bad_done:
512 pop {r4-r7}
513 adds r2, r2, #0x04
514 RETc(eq)
515 ldrb ip, [r1], #0x01
516 cmp r2, #0x02
517 ldrbge r2, [r1], #0x01
518 strb ip, [r3], #0x01
519 ldrbgt ip, [r1]
520 strbge r2, [r3], #0x01
521 strbgt ip, [r3]
522 RET
523
524
525 /*
526 * Handle short copies (less than 16 bytes), possibly misaligned.
527 * Some of these are *very* common, thanks to the network stack,
528 * and so are handled specially.
529 */
530 .Lmemcpy_short:
531 #ifndef _STANDALONE
532 add pc, pc, r2, lsl #2
533 nop
534 RET /* 0x00 */
535 b .Lmemcpy_bytewise /* 0x01 */
536 b .Lmemcpy_bytewise /* 0x02 */
537 b .Lmemcpy_bytewise /* 0x03 */
538 b .Lmemcpy_4 /* 0x04 */
539 b .Lmemcpy_bytewise /* 0x05 */
540 b .Lmemcpy_6 /* 0x06 */
541 b .Lmemcpy_bytewise /* 0x07 */
542 b .Lmemcpy_8 /* 0x08 */
543 b .Lmemcpy_bytewise /* 0x09 */
544 b .Lmemcpy_bytewise /* 0x0a */
545 b .Lmemcpy_bytewise /* 0x0b */
546 b .Lmemcpy_c /* 0x0c */
547 #endif
548 .Lmemcpy_bytewise:
549 mov r3, r0 /* We must not clobber r0 */
550 ldrb ip, [r1], #0x01
551 1: subs r2, r2, #0x01
552 strb ip, [r3], #0x01
553 ldrbne ip, [r1], #0x01
554 bne 1b
555 RET
556
557 #ifndef _STANDALONE
558 /******************************************************************************
559 * Special case for 4 byte copies
560 */
561 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
562 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
563 LMEMCPY_4_PAD
564 .Lmemcpy_4:
565 and r2, r1, #0x03
566 orr r2, r2, r0, lsl #2
567 ands r2, r2, #0x0f
568 sub r3, pc, #0x14
569 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
570
571 /*
572 * 0000: dst is 32-bit aligned, src is 32-bit aligned
573 */
574 ldr r2, [r1]
575 str r2, [r0]
576 RET
577 LMEMCPY_4_PAD
578
579 /*
580 * 0001: dst is 32-bit aligned, src is 8-bit aligned
581 */
582 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
583 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
584 #ifdef __ARMEB__
585 mov r3, r3, lsl #8 /* r3 = 012. */
586 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
587 #else
588 mov r3, r3, lsr #8 /* r3 = .210 */
589 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
590 #endif
591 str r3, [r0]
592 RET
593 LMEMCPY_4_PAD
594
595 /*
596 * 0010: dst is 32-bit aligned, src is 16-bit aligned
597 */
598 #ifdef __ARMEB__
599 ldrh r3, [r1]
600 ldrh r2, [r1, #0x02]
601 #else
602 ldrh r3, [r1, #0x02]
603 ldrh r2, [r1]
604 #endif
605 orr r3, r2, r3, lsl #16
606 str r3, [r0]
607 RET
608 LMEMCPY_4_PAD
609
610 /*
611 * 0011: dst is 32-bit aligned, src is 8-bit aligned
612 */
613 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
614 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
615 #ifdef __ARMEB__
616 mov r3, r3, lsl #24 /* r3 = 0... */
617 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
618 #else
619 mov r3, r3, lsr #24 /* r3 = ...0 */
620 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
621 #endif
622 str r3, [r0]
623 RET
624 LMEMCPY_4_PAD
625
626 /*
627 * 0100: dst is 8-bit aligned, src is 32-bit aligned
628 */
629 ldr r2, [r1]
630 #ifdef __ARMEB__
631 strb r2, [r0, #0x03]
632 mov r3, r2, lsr #8
633 mov r1, r2, lsr #24
634 strb r1, [r0]
635 #else
636 strb r2, [r0]
637 mov r3, r2, lsr #8
638 mov r1, r2, lsr #24
639 strb r1, [r0, #0x03]
640 #endif
641 strh r3, [r0, #0x01]
642 RET
643 LMEMCPY_4_PAD
644
645 /*
646 * 0101: dst is 8-bit aligned, src is 8-bit aligned
647 */
648 ldrb r2, [r1]
649 ldrh r3, [r1, #0x01]
650 ldrb r1, [r1, #0x03]
651 strb r2, [r0]
652 strh r3, [r0, #0x01]
653 strb r1, [r0, #0x03]
654 RET
655 LMEMCPY_4_PAD
656
657 /*
658 * 0110: dst is 8-bit aligned, src is 16-bit aligned
659 */
660 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
661 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
662 #ifdef __ARMEB__
663 mov r1, r2, lsr #8 /* r1 = ...0 */
664 strb r1, [r0]
665 mov r2, r2, lsl #8 /* r2 = .01. */
666 orr r2, r2, r3, lsr #8 /* r2 = .012 */
667 #else
668 strb r2, [r0]
669 mov r2, r2, lsr #8 /* r2 = ...1 */
670 orr r2, r2, r3, lsl #8 /* r2 = .321 */
671 mov r3, r3, lsr #8 /* r3 = ...3 */
672 #endif
673 strh r2, [r0, #0x01]
674 strb r3, [r0, #0x03]
675 RET
676 LMEMCPY_4_PAD
677
678 /*
679 * 0111: dst is 8-bit aligned, src is 8-bit aligned
680 */
681 ldrb r2, [r1]
682 ldrh r3, [r1, #0x01]
683 ldrb r1, [r1, #0x03]
684 strb r2, [r0]
685 strh r3, [r0, #0x01]
686 strb r1, [r0, #0x03]
687 RET
688 LMEMCPY_4_PAD
689
690 /*
691 * 1000: dst is 16-bit aligned, src is 32-bit aligned
692 */
693 ldr r2, [r1]
694 #ifdef __ARMEB__
695 strh r2, [r0, #0x02]
696 mov r3, r2, lsr #16
697 strh r3, [r0]
698 #else
699 strh r2, [r0]
700 mov r3, r2, lsr #16
701 strh r3, [r0, #0x02]
702 #endif
703 RET
704 LMEMCPY_4_PAD
705
706 /*
707 * 1001: dst is 16-bit aligned, src is 8-bit aligned
708 */
709 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
710 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
711 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
712 strh r1, [r0]
713 #ifdef __ARMEB__
714 mov r2, r2, lsl #8 /* r2 = 012. */
715 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
716 #else
717 mov r2, r2, lsr #24 /* r2 = ...2 */
718 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
719 #endif
720 strh r2, [r0, #0x02]
721 RET
722 LMEMCPY_4_PAD
723
724 /*
725 * 1010: dst is 16-bit aligned, src is 16-bit aligned
726 */
727 ldrh r2, [r1]
728 ldrh r3, [r1, #0x02]
729 strh r2, [r0]
730 strh r3, [r0, #0x02]
731 RET
732 LMEMCPY_4_PAD
733
734 /*
735 * 1011: dst is 16-bit aligned, src is 8-bit aligned
736 */
737 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
738 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
739 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
740 strh r1, [r0, #0x02]
741 #ifdef __ARMEB__
742 mov r3, r3, lsr #24 /* r3 = ...1 */
743 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
744 #else
745 mov r3, r3, lsl #8 /* r3 = 321. */
746 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
747 #endif
748 strh r3, [r0]
749 RET
750 LMEMCPY_4_PAD
751
752 /*
753 * 1100: dst is 8-bit aligned, src is 32-bit aligned
754 */
755 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
756 #ifdef __ARMEB__
757 strb r2, [r0, #0x03]
758 mov r3, r2, lsr #8
759 mov r1, r2, lsr #24
760 strh r3, [r0, #0x01]
761 strb r1, [r0]
762 #else
763 strb r2, [r0]
764 mov r3, r2, lsr #8
765 mov r1, r2, lsr #24
766 strh r3, [r0, #0x01]
767 strb r1, [r0, #0x03]
768 #endif
769 RET
770 LMEMCPY_4_PAD
771
772 /*
773 * 1101: dst is 8-bit aligned, src is 8-bit aligned
774 */
775 ldrb r2, [r1]
776 ldrh r3, [r1, #0x01]
777 ldrb r1, [r1, #0x03]
778 strb r2, [r0]
779 strh r3, [r0, #0x01]
780 strb r1, [r0, #0x03]
781 RET
782 LMEMCPY_4_PAD
783
784 /*
785 * 1110: dst is 8-bit aligned, src is 16-bit aligned
786 */
787 #ifdef __ARMEB__
788 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
789 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
790 strb r3, [r0, #0x03]
791 mov r3, r3, lsr #8 /* r3 = ...2 */
792 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
793 strh r3, [r0, #0x01]
794 mov r2, r2, lsr #8 /* r2 = ...0 */
795 strb r2, [r0]
796 #else
797 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
798 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
799 strb r2, [r0]
800 mov r2, r2, lsr #8 /* r2 = ...1 */
801 orr r2, r2, r3, lsl #8 /* r2 = .321 */
802 strh r2, [r0, #0x01]
803 mov r3, r3, lsr #8 /* r3 = ...3 */
804 strb r3, [r0, #0x03]
805 #endif
806 RET
807 LMEMCPY_4_PAD
808
809 /*
810 * 1111: dst is 8-bit aligned, src is 8-bit aligned
811 */
812 ldrb r2, [r1]
813 ldrh r3, [r1, #0x01]
814 ldrb r1, [r1, #0x03]
815 strb r2, [r0]
816 strh r3, [r0, #0x01]
817 strb r1, [r0, #0x03]
818 RET
819 LMEMCPY_4_PAD
820
821
822 /******************************************************************************
823 * Special case for 6 byte copies
824 */
825 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
826 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
827 LMEMCPY_6_PAD
828 .Lmemcpy_6:
829 and r2, r1, #0x03
830 orr r2, r2, r0, lsl #2
831 ands r2, r2, #0x0f
832 sub r3, pc, #0x14
833 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
834
835 /*
836 * 0000: dst is 32-bit aligned, src is 32-bit aligned
837 */
838 ldr r2, [r1]
839 ldrh r3, [r1, #0x04]
840 str r2, [r0]
841 strh r3, [r0, #0x04]
842 RET
843 LMEMCPY_6_PAD
844
845 /*
846 * 0001: dst is 32-bit aligned, src is 8-bit aligned
847 */
848 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
849 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
850 #ifdef __ARMEB__
851 mov r2, r2, lsl #8 /* r2 = 012. */
852 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
853 #else
854 mov r2, r2, lsr #8 /* r2 = .210 */
855 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
856 #endif
857 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
858 str r2, [r0]
859 strh r3, [r0, #0x04]
860 RET
861 LMEMCPY_6_PAD
862
863 /*
864 * 0010: dst is 32-bit aligned, src is 16-bit aligned
865 */
866 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
867 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
868 #ifdef __ARMEB__
869 mov r1, r3, lsr #16 /* r1 = ..23 */
870 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
871 str r1, [r0]
872 strh r3, [r0, #0x04]
873 #else
874 mov r1, r3, lsr #16 /* r1 = ..54 */
875 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
876 str r2, [r0]
877 strh r1, [r0, #0x04]
878 #endif
879 RET
880 LMEMCPY_6_PAD
881
882 /*
883 * 0011: dst is 32-bit aligned, src is 8-bit aligned
884 */
885 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
886 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
887 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
888 #ifdef __ARMEB__
889 mov r2, r2, lsl #24 /* r2 = 0... */
890 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
891 mov r3, r3, lsl #8 /* r3 = 234. */
892 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
893 #else
894 mov r2, r2, lsr #24 /* r2 = ...0 */
895 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
896 mov r1, r1, lsl #8 /* r1 = xx5. */
897 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
898 #endif
899 str r2, [r0]
900 strh r1, [r0, #0x04]
901 RET
902 LMEMCPY_6_PAD
903
904 /*
905 * 0100: dst is 8-bit aligned, src is 32-bit aligned
906 */
907 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
908 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
909 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
910 strh r1, [r0, #0x01]
911 #ifdef __ARMEB__
912 mov r1, r3, lsr #24 /* r1 = ...0 */
913 strb r1, [r0]
914 mov r3, r3, lsl #8 /* r3 = 123. */
915 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
916 #else
917 strb r3, [r0]
918 mov r3, r3, lsr #24 /* r3 = ...3 */
919 orr r3, r3, r2, lsl #8 /* r3 = .543 */
920 mov r2, r2, lsr #8 /* r2 = ...5 */
921 #endif
922 strh r3, [r0, #0x03]
923 strb r2, [r0, #0x05]
924 RET
925 LMEMCPY_6_PAD
926
927 /*
928 * 0101: dst is 8-bit aligned, src is 8-bit aligned
929 */
930 ldrb r2, [r1]
931 ldrh r3, [r1, #0x01]
932 ldrh ip, [r1, #0x03]
933 ldrb r1, [r1, #0x05]
934 strb r2, [r0]
935 strh r3, [r0, #0x01]
936 strh ip, [r0, #0x03]
937 strb r1, [r0, #0x05]
938 RET
939 LMEMCPY_6_PAD
940
941 /*
942 * 0110: dst is 8-bit aligned, src is 16-bit aligned
943 */
944 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
945 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
946 #ifdef __ARMEB__
947 mov r3, r2, lsr #8 /* r3 = ...0 */
948 strb r3, [r0]
949 strb r1, [r0, #0x05]
950 mov r3, r1, lsr #8 /* r3 = .234 */
951 strh r3, [r0, #0x03]
952 mov r3, r2, lsl #8 /* r3 = .01. */
953 orr r3, r3, r1, lsr #24 /* r3 = .012 */
954 strh r3, [r0, #0x01]
955 #else
956 strb r2, [r0]
957 mov r3, r1, lsr #24
958 strb r3, [r0, #0x05]
959 mov r3, r1, lsr #8 /* r3 = .543 */
960 strh r3, [r0, #0x03]
961 mov r3, r2, lsr #8 /* r3 = ...1 */
962 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
963 strh r3, [r0, #0x01]
964 #endif
965 RET
966 LMEMCPY_6_PAD
967
968 /*
969 * 0111: dst is 8-bit aligned, src is 8-bit aligned
970 */
971 ldrb r2, [r1]
972 ldrh r3, [r1, #0x01]
973 ldrh ip, [r1, #0x03]
974 ldrb r1, [r1, #0x05]
975 strb r2, [r0]
976 strh r3, [r0, #0x01]
977 strh ip, [r0, #0x03]
978 strb r1, [r0, #0x05]
979 RET
980 LMEMCPY_6_PAD
981
982 /*
983 * 1000: dst is 16-bit aligned, src is 32-bit aligned
984 */
985 #ifdef __ARMEB__
986 ldr r2, [r1] /* r2 = 0123 */
987 ldrh r3, [r1, #0x04] /* r3 = ..45 */
988 mov r1, r2, lsr #16 /* r1 = ..01 */
989 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
990 strh r1, [r0]
991 str r3, [r0, #0x02]
992 #else
993 ldrh r2, [r1, #0x04] /* r2 = ..54 */
994 ldr r3, [r1] /* r3 = 3210 */
995 mov r2, r2, lsl #16 /* r2 = 54.. */
996 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
997 strh r3, [r0]
998 str r2, [r0, #0x02]
999 #endif
1000 RET
1001 LMEMCPY_6_PAD
1002
1003 /*
1004 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1005 */
1006 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1007 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
1008 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1009 #ifdef __ARMEB__
1010 mov r2, r2, lsr #8 /* r2 = .345 */
1011 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
1012 #else
1013 mov r2, r2, lsl #8 /* r2 = 543. */
1014 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
1015 #endif
1016 strh r1, [r0]
1017 str r2, [r0, #0x02]
1018 RET
1019 LMEMCPY_6_PAD
1020
1021 /*
1022 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1023 */
1024 ldrh r2, [r1]
1025 ldr r3, [r1, #0x02]
1026 strh r2, [r0]
1027 str r3, [r0, #0x02]
1028 RET
1029 LMEMCPY_6_PAD
1030
1031 /*
1032 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1033 */
1034 ldrb r3, [r1] /* r3 = ...0 */
1035 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1036 ldrb r1, [r1, #0x05] /* r1 = ...5 */
1037 #ifdef __ARMEB__
1038 mov r3, r3, lsl #8 /* r3 = ..0. */
1039 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
1040 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
1041 #else
1042 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1043 mov r1, r1, lsl #24 /* r1 = 5... */
1044 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
1045 #endif
1046 strh r3, [r0]
1047 str r1, [r0, #0x02]
1048 RET
1049 LMEMCPY_6_PAD
1050
1051 /*
1052 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1053 */
1054 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1055 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
1056 #ifdef __ARMEB__
1057 mov r3, r2, lsr #24 /* r3 = ...0 */
1058 strb r3, [r0]
1059 mov r2, r2, lsl #8 /* r2 = 123. */
1060 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
1061 #else
1062 strb r2, [r0]
1063 mov r2, r2, lsr #8 /* r2 = .321 */
1064 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
1065 mov r1, r1, lsr #8 /* r1 = ...5 */
1066 #endif
1067 str r2, [r0, #0x01]
1068 strb r1, [r0, #0x05]
1069 RET
1070 LMEMCPY_6_PAD
1071
1072 /*
1073 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1074 */
1075 ldrb r2, [r1]
1076 ldrh r3, [r1, #0x01]
1077 ldrh ip, [r1, #0x03]
1078 ldrb r1, [r1, #0x05]
1079 strb r2, [r0]
1080 strh r3, [r0, #0x01]
1081 strh ip, [r0, #0x03]
1082 strb r1, [r0, #0x05]
1083 RET
1084 LMEMCPY_6_PAD
1085
1086 /*
1087 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1088 */
1089 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1090 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1091 #ifdef __ARMEB__
1092 mov r3, r2, lsr #8 /* r3 = ...0 */
1093 strb r3, [r0]
1094 mov r2, r2, lsl #24 /* r2 = 1... */
1095 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
1096 #else
1097 strb r2, [r0]
1098 mov r2, r2, lsr #8 /* r2 = ...1 */
1099 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
1100 mov r1, r1, lsr #24 /* r1 = ...5 */
1101 #endif
1102 str r2, [r0, #0x01]
1103 strb r1, [r0, #0x05]
1104 RET
1105 LMEMCPY_6_PAD
1106
1107 /*
1108 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1109 */
1110 ldrb r2, [r1]
1111 ldr r3, [r1, #0x01]
1112 ldrb r1, [r1, #0x05]
1113 strb r2, [r0]
1114 str r3, [r0, #0x01]
1115 strb r1, [r0, #0x05]
1116 RET
1117 LMEMCPY_6_PAD
1118
1119
1120 /******************************************************************************
1121 * Special case for 8 byte copies
1122 */
1123 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
1124 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
1125 LMEMCPY_8_PAD
1126 .Lmemcpy_8:
1127 and r2, r1, #0x03
1128 orr r2, r2, r0, lsl #2
1129 ands r2, r2, #0x0f
1130 sub r3, pc, #0x14
1131 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
1132
1133 /*
1134 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1135 */
1136 ldr r2, [r1]
1137 ldr r3, [r1, #0x04]
1138 str r2, [r0]
1139 str r3, [r0, #0x04]
1140 RET
1141 LMEMCPY_8_PAD
1142
1143 /*
1144 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1145 */
1146 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1147 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
1148 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1149 #ifdef __ARMEB__
1150 mov r3, r3, lsl #8 /* r3 = 012. */
1151 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1152 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
1153 #else
1154 mov r3, r3, lsr #8 /* r3 = .210 */
1155 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1156 mov r1, r1, lsl #24 /* r1 = 7... */
1157 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
1158 #endif
1159 str r3, [r0]
1160 str r2, [r0, #0x04]
1161 RET
1162 LMEMCPY_8_PAD
1163
1164 /*
1165 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1166 */
1167 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1168 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1169 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1170 #ifdef __ARMEB__
1171 mov r2, r2, lsl #16 /* r2 = 01.. */
1172 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1173 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
1174 #else
1175 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1176 mov r3, r3, lsr #16 /* r3 = ..54 */
1177 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
1178 #endif
1179 str r2, [r0]
1180 str r3, [r0, #0x04]
1181 RET
1182 LMEMCPY_8_PAD
1183
1184 /*
1185 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1186 */
1187 ldrb r3, [r1] /* r3 = ...0 */
1188 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1189 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
1190 #ifdef __ARMEB__
1191 mov r3, r3, lsl #24 /* r3 = 0... */
1192 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1193 mov r2, r2, lsl #24 /* r2 = 4... */
1194 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
1195 #else
1196 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1197 mov r2, r2, lsr #24 /* r2 = ...4 */
1198 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
1199 #endif
1200 str r3, [r0]
1201 str r2, [r0, #0x04]
1202 RET
1203 LMEMCPY_8_PAD
1204
1205 /*
1206 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1207 */
1208 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1209 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
1210 #ifdef __ARMEB__
1211 mov r1, r3, lsr #24 /* r1 = ...0 */
1212 strb r1, [r0]
1213 mov r1, r3, lsr #8 /* r1 = .012 */
1214 strb r2, [r0, #0x07]
1215 mov r3, r3, lsl #24 /* r3 = 3... */
1216 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
1217 #else
1218 strb r3, [r0]
1219 mov r1, r2, lsr #24 /* r1 = ...7 */
1220 strb r1, [r0, #0x07]
1221 mov r1, r3, lsr #8 /* r1 = .321 */
1222 mov r3, r3, lsr #24 /* r3 = ...3 */
1223 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
1224 #endif
1225 strh r1, [r0, #0x01]
1226 str r3, [r0, #0x03]
1227 RET
1228 LMEMCPY_8_PAD
1229
1230 /*
1231 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1232 */
1233 ldrb r2, [r1]
1234 ldrh r3, [r1, #0x01]
1235 ldr ip, [r1, #0x03]
1236 ldrb r1, [r1, #0x07]
1237 strb r2, [r0]
1238 strh r3, [r0, #0x01]
1239 str ip, [r0, #0x03]
1240 strb r1, [r0, #0x07]
1241 RET
1242 LMEMCPY_8_PAD
1243
1244 /*
1245 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1246 */
1247 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1248 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1249 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1250 #ifdef __ARMEB__
1251 mov ip, r2, lsr #8 /* ip = ...0 */
1252 strb ip, [r0]
1253 mov ip, r2, lsl #8 /* ip = .01. */
1254 orr ip, ip, r3, lsr #24 /* ip = .012 */
1255 strb r1, [r0, #0x07]
1256 mov r3, r3, lsl #8 /* r3 = 345. */
1257 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
1258 #else
1259 strb r2, [r0] /* 0 */
1260 mov ip, r1, lsr #8 /* ip = ...7 */
1261 strb ip, [r0, #0x07] /* 7 */
1262 mov ip, r2, lsr #8 /* ip = ...1 */
1263 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1264 mov r3, r3, lsr #8 /* r3 = .543 */
1265 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
1266 #endif
1267 strh ip, [r0, #0x01]
1268 str r3, [r0, #0x03]
1269 RET
1270 LMEMCPY_8_PAD
1271
1272 /*
1273 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1274 */
1275 ldrb r3, [r1] /* r3 = ...0 */
1276 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1277 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
1278 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1279 strb r3, [r0]
1280 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
1281 #ifdef __ARMEB__
1282 strh r3, [r0, #0x01]
1283 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
1284 #else
1285 strh ip, [r0, #0x01]
1286 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
1287 #endif
1288 str r2, [r0, #0x03]
1289 strb r1, [r0, #0x07]
1290 RET
1291 LMEMCPY_8_PAD
1292
1293 /*
1294 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1295 */
1296 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1297 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1298 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1299 #ifdef __ARMEB__
1300 strh r1, [r0]
1301 mov r1, r3, lsr #16 /* r1 = ..45 */
1302 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
1303 #else
1304 strh r2, [r0]
1305 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
1306 mov r3, r3, lsr #16 /* r3 = ..76 */
1307 #endif
1308 str r2, [r0, #0x02]
1309 strh r3, [r0, #0x06]
1310 RET
1311 LMEMCPY_8_PAD
1312
1313 /*
1314 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1315 */
1316 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1317 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1318 ldrb ip, [r1, #0x07] /* ip = ...7 */
1319 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1320 strh r1, [r0]
1321 #ifdef __ARMEB__
1322 mov r1, r2, lsl #24 /* r1 = 2... */
1323 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
1324 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
1325 #else
1326 mov r1, r2, lsr #24 /* r1 = ...2 */
1327 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
1328 mov r3, r3, lsr #24 /* r3 = ...6 */
1329 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
1330 #endif
1331 str r1, [r0, #0x02]
1332 strh r3, [r0, #0x06]
1333 RET
1334 LMEMCPY_8_PAD
1335
1336 /*
1337 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1338 */
1339 ldrh r2, [r1]
1340 ldr ip, [r1, #0x02]
1341 ldrh r3, [r1, #0x06]
1342 strh r2, [r0]
1343 str ip, [r0, #0x02]
1344 strh r3, [r0, #0x06]
1345 RET
1346 LMEMCPY_8_PAD
1347
1348 /*
1349 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1350 */
1351 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
1352 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1353 ldrb ip, [r1] /* ip = ...0 */
1354 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
1355 strh r1, [r0, #0x06]
1356 #ifdef __ARMEB__
1357 mov r3, r3, lsr #24 /* r3 = ...5 */
1358 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
1359 mov r2, r2, lsr #24 /* r2 = ...1 */
1360 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
1361 #else
1362 mov r3, r3, lsl #24 /* r3 = 5... */
1363 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
1364 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
1365 #endif
1366 str r3, [r0, #0x02]
1367 strh r2, [r0]
1368 RET
1369 LMEMCPY_8_PAD
1370
1371 /*
1372 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1373 */
1374 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1375 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1376 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1377 strh r1, [r0, #0x05]
1378 #ifdef __ARMEB__
1379 strb r3, [r0, #0x07]
1380 mov r1, r2, lsr #24 /* r1 = ...0 */
1381 strb r1, [r0]
1382 mov r2, r2, lsl #8 /* r2 = 123. */
1383 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
1384 str r2, [r0, #0x01]
1385 #else
1386 strb r2, [r0]
1387 mov r1, r3, lsr #24 /* r1 = ...7 */
1388 strb r1, [r0, #0x07]
1389 mov r2, r2, lsr #8 /* r2 = .321 */
1390 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1391 str r2, [r0, #0x01]
1392 #endif
1393 RET
1394 LMEMCPY_8_PAD
1395
1396 /*
1397 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1398 */
1399 ldrb r3, [r1] /* r3 = ...0 */
1400 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1401 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1402 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1403 strb r3, [r0]
1404 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1405 #ifdef __ARMEB__
1406 strh ip, [r0, #0x05]
1407 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
1408 #else
1409 strh r3, [r0, #0x05]
1410 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1411 #endif
1412 str r2, [r0, #0x01]
1413 strb r1, [r0, #0x07]
1414 RET
1415 LMEMCPY_8_PAD
1416
1417 /*
1418 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1419 */
1420 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1421 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1422 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1423 #ifdef __ARMEB__
1424 mov ip, r2, lsr #8 /* ip = ...0 */
1425 strb ip, [r0]
1426 mov ip, r2, lsl #24 /* ip = 1... */
1427 orr ip, ip, r3, lsr #8 /* ip = 1234 */
1428 strb r1, [r0, #0x07]
1429 mov r1, r1, lsr #8 /* r1 = ...6 */
1430 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
1431 #else
1432 strb r2, [r0]
1433 mov ip, r2, lsr #8 /* ip = ...1 */
1434 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1435 mov r2, r1, lsr #8 /* r2 = ...7 */
1436 strb r2, [r0, #0x07]
1437 mov r1, r1, lsl #8 /* r1 = .76. */
1438 orr r1, r1, r3, lsr #24 /* r1 = .765 */
1439 #endif
1440 str ip, [r0, #0x01]
1441 strh r1, [r0, #0x05]
1442 RET
1443 LMEMCPY_8_PAD
1444
1445 /*
1446 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1447 */
1448 ldrb r2, [r1]
1449 ldr ip, [r1, #0x01]
1450 ldrh r3, [r1, #0x05]
1451 ldrb r1, [r1, #0x07]
1452 strb r2, [r0]
1453 str ip, [r0, #0x01]
1454 strh r3, [r0, #0x05]
1455 strb r1, [r0, #0x07]
1456 RET
1457 LMEMCPY_8_PAD
1458
1459 /******************************************************************************
1460 * Special case for 12 byte copies
1461 */
1462 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
1463 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1464 LMEMCPY_C_PAD
1465 .Lmemcpy_c:
1466 and r2, r1, #0x03
1467 orr r2, r2, r0, lsl #2
1468 ands r2, r2, #0x0f
1469 sub r3, pc, #0x14
1470 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1471
1472 /*
1473 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1474 */
1475 ldr r2, [r1]
1476 ldr r3, [r1, #0x04]
1477 ldr r1, [r1, #0x08]
1478 str r2, [r0]
1479 str r3, [r0, #0x04]
1480 str r1, [r0, #0x08]
1481 RET
1482 LMEMCPY_C_PAD
1483
1484 /*
1485 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1486 */
1487 ldrb r2, [r1, #0xb] /* r2 = ...B */
1488 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1489 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1490 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1491 #ifdef __ARMEB__
1492 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
1493 str r2, [r0, #0x08]
1494 mov r2, ip, lsr #24 /* r2 = ...7 */
1495 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
1496 mov r1, r1, lsl #8 /* r1 = 012. */
1497 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
1498 #else
1499 mov r2, r2, lsl #24 /* r2 = B... */
1500 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1501 str r2, [r0, #0x08]
1502 mov r2, ip, lsl #24 /* r2 = 7... */
1503 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1504 mov r1, r1, lsr #8 /* r1 = .210 */
1505 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1506 #endif
1507 str r2, [r0, #0x04]
1508 str r1, [r0]
1509 RET
1510 LMEMCPY_C_PAD
1511
1512 /*
1513 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1514 */
1515 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1516 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1517 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1518 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1519 #ifdef __ARMEB__
1520 mov r2, r2, lsl #16 /* r2 = 01.. */
1521 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1522 str r2, [r0]
1523 mov r3, r3, lsl #16 /* r3 = 45.. */
1524 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
1525 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
1526 #else
1527 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1528 str r2, [r0]
1529 mov r3, r3, lsr #16 /* r3 = ..54 */
1530 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1531 mov r1, r1, lsl #16 /* r1 = BA.. */
1532 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1533 #endif
1534 str r3, [r0, #0x04]
1535 str r1, [r0, #0x08]
1536 RET
1537 LMEMCPY_C_PAD
1538
1539 /*
1540 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1541 */
1542 ldrb r2, [r1] /* r2 = ...0 */
1543 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1544 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1545 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1546 #ifdef __ARMEB__
1547 mov r2, r2, lsl #24 /* r2 = 0... */
1548 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1549 str r2, [r0]
1550 mov r3, r3, lsl #24 /* r3 = 4... */
1551 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
1552 mov r1, r1, lsr #8 /* r1 = .9AB */
1553 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
1554 #else
1555 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1556 str r2, [r0]
1557 mov r3, r3, lsr #24 /* r3 = ...4 */
1558 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1559 mov r1, r1, lsl #8 /* r1 = BA9. */
1560 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1561 #endif
1562 str r3, [r0, #0x04]
1563 str r1, [r0, #0x08]
1564 RET
1565 LMEMCPY_C_PAD
1566
1567 /*
1568 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1569 */
1570 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1571 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1572 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1573 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1574 strh r1, [r0, #0x01]
1575 #ifdef __ARMEB__
1576 mov r1, r2, lsr #24 /* r1 = ...0 */
1577 strb r1, [r0]
1578 mov r1, r2, lsl #24 /* r1 = 3... */
1579 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
1580 mov r1, r3, lsl #24 /* r1 = 7... */
1581 orr r1, r1, ip, lsr #8 /* r1 = 789A */
1582 #else
1583 strb r2, [r0]
1584 mov r1, r2, lsr #24 /* r1 = ...3 */
1585 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1586 mov r1, r3, lsr #24 /* r1 = ...7 */
1587 orr r1, r1, ip, lsl #8 /* r1 = A987 */
1588 mov ip, ip, lsr #24 /* ip = ...B */
1589 #endif
1590 str r2, [r0, #0x03]
1591 str r1, [r0, #0x07]
1592 strb ip, [r0, #0x0b]
1593 RET
1594 LMEMCPY_C_PAD
1595
1596 /*
1597 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1598 */
1599 ldrb r2, [r1]
1600 ldrh r3, [r1, #0x01]
1601 ldr ip, [r1, #0x03]
1602 strb r2, [r0]
1603 ldr r2, [r1, #0x07]
1604 ldrb r1, [r1, #0x0b]
1605 strh r3, [r0, #0x01]
1606 str ip, [r0, #0x03]
1607 str r2, [r0, #0x07]
1608 strb r1, [r0, #0x0b]
1609 RET
1610 LMEMCPY_C_PAD
1611
1612 /*
1613 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1614 */
1615 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1616 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1617 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1618 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1619 #ifdef __ARMEB__
1620 mov r2, r2, ror #8 /* r2 = 1..0 */
1621 strb r2, [r0]
1622 mov r2, r2, lsr #16 /* r2 = ..1. */
1623 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
1624 strh r2, [r0, #0x01]
1625 mov r2, r3, lsl #8 /* r2 = 345. */
1626 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
1627 mov r2, ip, lsl #8 /* r2 = 789. */
1628 orr r2, r2, r1, lsr #8 /* r2 = 789A */
1629 #else
1630 strb r2, [r0]
1631 mov r2, r2, lsr #8 /* r2 = ...1 */
1632 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1633 strh r2, [r0, #0x01]
1634 mov r2, r3, lsr #8 /* r2 = .543 */
1635 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1636 mov r2, ip, lsr #8 /* r2 = .987 */
1637 orr r2, r2, r1, lsl #24 /* r2 = A987 */
1638 mov r1, r1, lsr #8 /* r1 = ...B */
1639 #endif
1640 str r3, [r0, #0x03]
1641 str r2, [r0, #0x07]
1642 strb r1, [r0, #0x0b]
1643 RET
1644 LMEMCPY_C_PAD
1645
1646 /*
1647 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1648 */
1649 ldrb r2, [r1]
1650 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1651 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1652 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1653 strb r2, [r0]
1654 #ifdef __ARMEB__
1655 mov r2, r3, lsr #16 /* r2 = ..12 */
1656 strh r2, [r0, #0x01]
1657 mov r3, r3, lsl #16 /* r3 = 34.. */
1658 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
1659 mov ip, ip, lsl #16 /* ip = 78.. */
1660 orr ip, ip, r1, lsr #16 /* ip = 789A */
1661 mov r1, r1, lsr #8 /* r1 = .9AB */
1662 #else
1663 strh r3, [r0, #0x01]
1664 mov r3, r3, lsr #16 /* r3 = ..43 */
1665 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1666 mov ip, ip, lsr #16 /* ip = ..87 */
1667 orr ip, ip, r1, lsl #16 /* ip = A987 */
1668 mov r1, r1, lsr #16 /* r1 = ..xB */
1669 #endif
1670 str r3, [r0, #0x03]
1671 str ip, [r0, #0x07]
1672 strb r1, [r0, #0x0b]
1673 RET
1674 LMEMCPY_C_PAD
1675
1676 /*
1677 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1678 */
1679 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1680 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1681 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1682 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1683 #ifdef __ARMEB__
1684 strh r1, [r0]
1685 mov r1, ip, lsl #16 /* r1 = 23.. */
1686 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
1687 mov r3, r3, lsl #16 /* r3 = 67.. */
1688 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
1689 #else
1690 strh ip, [r0]
1691 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1692 mov r3, r3, lsr #16 /* r3 = ..76 */
1693 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1694 mov r2, r2, lsr #16 /* r2 = ..BA */
1695 #endif
1696 str r1, [r0, #0x02]
1697 str r3, [r0, #0x06]
1698 strh r2, [r0, #0x0a]
1699 RET
1700 LMEMCPY_C_PAD
1701
1702 /*
1703 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1704 */
1705 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1706 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1707 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1708 strh ip, [r0]
1709 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1710 ldrb r1, [r1, #0x0b] /* r1 = ...B */
1711 #ifdef __ARMEB__
1712 mov r2, r2, lsl #24 /* r2 = 2... */
1713 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
1714 mov r3, r3, lsl #24 /* r3 = 6... */
1715 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
1716 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
1717 #else
1718 mov r2, r2, lsr #24 /* r2 = ...2 */
1719 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
1720 mov r3, r3, lsr #24 /* r3 = ...6 */
1721 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
1722 mov r1, r1, lsl #8 /* r1 = ..B. */
1723 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
1724 #endif
1725 str r2, [r0, #0x02]
1726 str r3, [r0, #0x06]
1727 strh r1, [r0, #0x0a]
1728 RET
1729 LMEMCPY_C_PAD
1730
1731 /*
1732 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1733 */
1734 ldrh r2, [r1]
1735 ldr r3, [r1, #0x02]
1736 ldr ip, [r1, #0x06]
1737 ldrh r1, [r1, #0x0a]
1738 strh r2, [r0]
1739 str r3, [r0, #0x02]
1740 str ip, [r0, #0x06]
1741 strh r1, [r0, #0x0a]
1742 RET
1743 LMEMCPY_C_PAD
1744
1745 /*
1746 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1747 */
1748 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
1749 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
1750 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
1751 strh ip, [r0, #0x0a]
1752 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1753 ldrb r1, [r1] /* r1 = ...0 */
1754 #ifdef __ARMEB__
1755 mov r2, r2, lsr #24 /* r2 = ...9 */
1756 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
1757 mov r3, r3, lsr #24 /* r3 = ...5 */
1758 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
1759 mov r1, r1, lsl #8 /* r1 = ..0. */
1760 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
1761 #else
1762 mov r2, r2, lsl #24 /* r2 = 9... */
1763 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
1764 mov r3, r3, lsl #24 /* r3 = 5... */
1765 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
1766 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
1767 #endif
1768 str r2, [r0, #0x06]
1769 str r3, [r0, #0x02]
1770 strh r1, [r0]
1771 RET
1772 LMEMCPY_C_PAD
1773
1774 /*
1775 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1776 */
1777 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1778 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
1779 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
1780 #ifdef __ARMEB__
1781 mov r3, r2, lsr #24 /* r3 = ...0 */
1782 strb r3, [r0]
1783 mov r2, r2, lsl #8 /* r2 = 123. */
1784 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
1785 str r2, [r0, #0x01]
1786 mov r2, ip, lsl #8 /* r2 = 567. */
1787 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
1788 str r2, [r0, #0x05]
1789 mov r2, r1, lsr #8 /* r2 = ..9A */
1790 strh r2, [r0, #0x09]
1791 strb r1, [r0, #0x0b]
1792 #else
1793 strb r2, [r0]
1794 mov r3, r2, lsr #8 /* r3 = .321 */
1795 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
1796 str r3, [r0, #0x01]
1797 mov r3, ip, lsr #8 /* r3 = .765 */
1798 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
1799 str r3, [r0, #0x05]
1800 mov r1, r1, lsr #8 /* r1 = .BA9 */
1801 strh r1, [r0, #0x09]
1802 mov r1, r1, lsr #16 /* r1 = ...B */
1803 strb r1, [r0, #0x0b]
1804 #endif
1805 RET
1806 LMEMCPY_C_PAD
1807
1808 /*
1809 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1810 */
1811 ldrb r2, [r1, #0x0b] /* r2 = ...B */
1812 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
1813 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1814 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1815 strb r2, [r0, #0x0b]
1816 #ifdef __ARMEB__
1817 strh r3, [r0, #0x09]
1818 mov r3, r3, lsr #16 /* r3 = ..78 */
1819 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
1820 mov ip, ip, lsr #16 /* ip = ..34 */
1821 orr ip, ip, r1, lsl #16 /* ip = 1234 */
1822 mov r1, r1, lsr #16 /* r1 = ..x0 */
1823 #else
1824 mov r2, r3, lsr #16 /* r2 = ..A9 */
1825 strh r2, [r0, #0x09]
1826 mov r3, r3, lsl #16 /* r3 = 87.. */
1827 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
1828 mov ip, ip, lsl #16 /* ip = 43.. */
1829 orr ip, ip, r1, lsr #16 /* ip = 4321 */
1830 mov r1, r1, lsr #8 /* r1 = .210 */
1831 #endif
1832 str r3, [r0, #0x05]
1833 str ip, [r0, #0x01]
1834 strb r1, [r0]
1835 RET
1836 LMEMCPY_C_PAD
1837
1838 /*
1839 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1840 */
1841 #ifdef __ARMEB__
1842 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
1843 ldr ip, [r1, #0x06] /* ip = 6789 */
1844 ldr r3, [r1, #0x02] /* r3 = 2345 */
1845 ldrh r1, [r1] /* r1 = ..01 */
1846 strb r2, [r0, #0x0b]
1847 mov r2, r2, lsr #8 /* r2 = ...A */
1848 orr r2, r2, ip, lsl #8 /* r2 = 789A */
1849 mov ip, ip, lsr #8 /* ip = .678 */
1850 orr ip, ip, r3, lsl #24 /* ip = 5678 */
1851 mov r3, r3, lsr #8 /* r3 = .234 */
1852 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
1853 mov r1, r1, lsr #8 /* r1 = ...0 */
1854 strb r1, [r0]
1855 str r3, [r0, #0x01]
1856 str ip, [r0, #0x05]
1857 strh r2, [r0, #0x09]
1858 #else
1859 ldrh r2, [r1] /* r2 = ..10 */
1860 ldr r3, [r1, #0x02] /* r3 = 5432 */
1861 ldr ip, [r1, #0x06] /* ip = 9876 */
1862 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
1863 strb r2, [r0]
1864 mov r2, r2, lsr #8 /* r2 = ...1 */
1865 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1866 mov r3, r3, lsr #24 /* r3 = ...5 */
1867 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
1868 mov ip, ip, lsr #24 /* ip = ...9 */
1869 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
1870 mov r1, r1, lsr #8 /* r1 = ...B */
1871 str r2, [r0, #0x01]
1872 str r3, [r0, #0x05]
1873 strh ip, [r0, #0x09]
1874 strb r1, [r0, #0x0b]
1875 #endif
1876 RET
1877 LMEMCPY_C_PAD
1878
1879 /*
1880 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1881 */
1882 ldrb r2, [r1]
1883 ldr r3, [r1, #0x01]
1884 ldr ip, [r1, #0x05]
1885 strb r2, [r0]
1886 ldrh r2, [r1, #0x09]
1887 ldrb r1, [r1, #0x0b]
1888 str r3, [r0, #0x01]
1889 str ip, [r0, #0x05]
1890 strh r2, [r0, #0x09]
1891 strb r1, [r0, #0x0b]
1892 RET
1893 #endif /* !_STANDALONE */
1894