Home | History | Annotate | Line # | Download | only in arm
      1 /*	$NetBSD: bcopyinout.S,v 1.23 2022/10/20 06:58:38 skrll Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Allen Briggs for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 #include "opt_multiprocessor.h"
     39 #include "opt_cpuoptions.h"
     40 
     41 #include "assym.h"
     42 
     43 #include <machine/asm.h>
     44 
     45 #include <arm/locore.h>
     46 
     47 #if defined(__XSCALE__) || defined(_ARM_ARCH_6)
     48 /*
     49  * armv6 and v7 have pld and strd so they can use the xscale
     50  * bcopyinout as well.
     51  */
     52 #include "bcopyinout_xscale.S"
     53 #else
     54 
     55 RCSID("$NetBSD: bcopyinout.S,v 1.23 2022/10/20 06:58:38 skrll Exp $")
     56 
     57 	.text
     58 	.align	0
     59 
     60 #define SAVE_REGS	stmfd	sp!, {r4-r11}
     61 #define RESTORE_REGS	ldmfd	sp!, {r4-r11}
     62 
     63 #if defined(__XSCALE__) || defined(_ARM_ARCH_6)
     64 #define HELLOCPP #
     65 #define PREFETCH(rx,o)	pld	[ rx , HELLOCPP (o) ]
     66 #else
     67 #define PREFETCH(rx,o)
     68 #endif
     69 
     70 /*
     71  * r0 = user space address
     72  * r1 = kernel space address
     73  * r2 = length
     74  *
     75  * Copies bytes from user space to kernel space
     76  *
     77  * We save/restore r4-r11:
     78  * r4-r11 are scratch
     79  */
     80 ENTRY(copyin)
     81 	/* Quick exit if length is zero */
     82 	teq	r2, #0
     83 	moveq	r0, #0
     84 	RETc(eq)
     85 
     86 	SAVE_REGS
     87 	GET_CURPCB(r4)
     88 
     89 	ldr	r5, [r4, #PCB_ONFAULT]
     90 	adr	r3, .Lcopyfault
     91 	str	r3, [r4, #PCB_ONFAULT]
     92 
     93 	PREFETCH(r0, 0)
     94 	PREFETCH(r1, 0)
     95 
     96 	/*
     97 	 * If not too many bytes, take the slow path.
     98 	 */
     99 	cmp	r2, #0x08
    100 	blt	.Licleanup
    101 
    102 	/*
    103 	 * Align destination to word boundary.
    104 	 */
    105 	and	r6, r1, #0x3
    106 	ldr	pc, [pc, r6, lsl #2]
    107 	b	.Lialend
    108 	.word	.Lialend
    109 	.word	.Lial3
    110 	.word	.Lial2
    111 	.word	.Lial1
    112 .Lial3:	ldrbt	r6, [r0], #1
    113 	sub	r2, r2, #1
    114 	strb	r6, [r1], #1
    115 .Lial2:	ldrbt	r7, [r0], #1
    116 	sub	r2, r2, #1
    117 	strb	r7, [r1], #1
    118 .Lial1:	ldrbt	r6, [r0], #1
    119 	sub	r2, r2, #1
    120 	strb	r6, [r1], #1
    121 .Lialend:
    122 
    123 	/*
    124 	 * If few bytes left, finish slow.
    125 	 */
    126 	cmp	r2, #0x08
    127 	blt	.Licleanup
    128 
    129 	/*
    130 	 * If source is not aligned, finish slow.
    131 	 */
    132 	ands	r3, r0, #0x03
    133 	bne	.Licleanup
    134 
    135 	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
    136 	blt	.Licleanup8
    137 
    138 	/*
    139 	 * Align destination to cacheline boundary.
    140 	 * If source and destination are nicely aligned, this can be a big
    141 	 * win.  If not, it's still cheaper to copy in groups of 32 even if
    142 	 * we don't get the nice cacheline alignment.
    143 	 */
    144 	and	r6, r1, #0x1f
    145 	ldr	pc, [pc, r6]
    146 	b	.Licaligned
    147 	.word	.Licaligned
    148 	.word	.Lical28
    149 	.word	.Lical24
    150 	.word	.Lical20
    151 	.word	.Lical16
    152 	.word	.Lical12
    153 	.word	.Lical8
    154 	.word	.Lical4
    155 .Lical28:ldrt	r6, [r0], #4
    156 	sub	r2, r2, #4
    157 	str	r6, [r1], #4
    158 .Lical24:ldrt	r7, [r0], #4
    159 	sub	r2, r2, #4
    160 	str	r7, [r1], #4
    161 .Lical20:ldrt	r6, [r0], #4
    162 	sub	r2, r2, #4
    163 	str	r6, [r1], #4
    164 .Lical16:ldrt	r7, [r0], #4
    165 	sub	r2, r2, #4
    166 	str	r7, [r1], #4
    167 .Lical12:ldrt	r6, [r0], #4
    168 	sub	r2, r2, #4
    169 	str	r6, [r1], #4
    170 .Lical8:ldrt	r7, [r0], #4
    171 	sub	r2, r2, #4
    172 	str	r7, [r1], #4
    173 .Lical4:ldrt	r6, [r0], #4
    174 	sub	r2, r2, #4
    175 	str	r6, [r1], #4
    176 
    177 	/*
    178 	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
    179 	 * part of the code, and we may have knocked that down by as much
    180 	 * as 0x1c getting aligned).
    181 	 *
    182 	 * This loop basically works out to:
    183 	 * do {
    184 	 * 	prefetch-next-cacheline(s)
    185 	 *	bytes -= 0x20;
    186 	 *	copy cacheline
    187 	 * } while (bytes >= 0x40);
    188 	 * bytes -= 0x20;
    189 	 * copy cacheline
    190 	 */
    191 .Licaligned:
    192 	PREFETCH(r0, 32)
    193 	PREFETCH(r1, 32)
    194 
    195 	sub	r2, r2, #0x20
    196 
    197 	/* Copy a cacheline */
    198 	ldrt	r10, [r0], #4
    199 	ldrt	r11, [r0], #4
    200 	ldrt	r6, [r0], #4
    201 	ldrt	r7, [r0], #4
    202 	ldrt	r8, [r0], #4
    203 	ldrt	r9, [r0], #4
    204 	stmia	r1!, {r10-r11}
    205 	ldrt	r10, [r0], #4
    206 	ldrt	r11, [r0], #4
    207 	stmia	r1!, {r6-r11}
    208 
    209 	cmp	r2, #0x40
    210 	bge	.Licaligned
    211 
    212 	sub	r2, r2, #0x20
    213 
    214 	/* Copy a cacheline */
    215 	ldrt	r10, [r0], #4
    216 	ldrt	r11, [r0], #4
    217 	ldrt	r6, [r0], #4
    218 	ldrt	r7, [r0], #4
    219 	ldrt	r8, [r0], #4
    220 	ldrt	r9, [r0], #4
    221 	stmia	r1!, {r10-r11}
    222 	ldrt	r10, [r0], #4
    223 	ldrt	r11, [r0], #4
    224 	stmia	r1!, {r6-r11}
    225 
    226 	cmp	r2, #0x08
    227 	blt	.Liprecleanup
    228 
    229 .Licleanup8:
    230 	ldrt	r8, [r0], #4
    231 	ldrt	r9, [r0], #4
    232 	sub	r2, r2, #8
    233 	stmia	r1!, {r8, r9}
    234 	cmp	r2, #8
    235 	bge	.Licleanup8
    236 
    237 .Liprecleanup:
    238 	/*
    239 	 * If we're done, bail.
    240 	 */
    241 	cmp	r2, #0
    242 	beq	.Liout
    243 
    244 .Licleanup:
    245 	and	r6, r2, #0x3
    246 	ldr	pc, [pc, r6, lsl #2]
    247 	b	.Licend
    248 	.word	.Lic4
    249 	.word	.Lic1
    250 	.word	.Lic2
    251 	.word	.Lic3
    252 .Lic4:	ldrbt	r6, [r0], #1
    253 	sub	r2, r2, #1
    254 	strb	r6, [r1], #1
    255 .Lic3:	ldrbt	r7, [r0], #1
    256 	sub	r2, r2, #1
    257 	strb	r7, [r1], #1
    258 .Lic2:	ldrbt	r6, [r0], #1
    259 	sub	r2, r2, #1
    260 	strb	r6, [r1], #1
    261 .Lic1:	ldrbt	r7, [r0], #1
    262 	subs	r2, r2, #1
    263 	strb	r7, [r1], #1
    264 .Licend:
    265 	bne	.Licleanup
    266 
    267 .Liout:
    268 	mov	r0, #0
    269 
    270 	str	r5, [r4, #PCB_ONFAULT]
    271 	RESTORE_REGS
    272 
    273 	RET
    274 
    275 .Lcopyfault:
    276 	str	r5, [r4, #PCB_ONFAULT]
    277 	RESTORE_REGS
    278 
    279 	RET
    280 END(copyin)
    281 
    282 /*
    283  * r0 = kernel space address
    284  * r1 = user space address
    285  * r2 = length
    286  *
    287  * Copies bytes from kernel space to user space
    288  *
    289  * We save/restore r4-r11:
    290  * r4-r11 are scratch
    291  */
    292 
    293 ENTRY(copyout)
    294 	/* Quick exit if length is zero */
    295 	teq	r2, #0
    296 	moveq	r0, #0
    297 	moveq	pc, lr
    298 
    299 	SAVE_REGS
    300 	GET_CURPCB(r4)
    301 
    302 	ldr	r5, [r4, #PCB_ONFAULT]
    303 	adr	r3, .Lcopyfault
    304 	str	r3, [r4, #PCB_ONFAULT]
    305 
    306 	PREFETCH(r0, 0)
    307 	PREFETCH(r1, 0)
    308 
    309 	/*
    310 	 * If not too many bytes, take the slow path.
    311 	 */
    312 	cmp	r2, #0x08
    313 	blt	.Lcleanup
    314 
    315 	/*
    316 	 * Align destination to word boundary.
    317 	 */
    318 	and	r6, r1, #0x3
    319 	ldr	pc, [pc, r6, lsl #2]
    320 	b	.Lalend
    321 	.word	.Lalend
    322 	.word	.Lal3
    323 	.word	.Lal2
    324 	.word	.Lal1
    325 .Lal3:	ldrb	r6, [r0], #1
    326 	sub	r2, r2, #1
    327 	strbt	r6, [r1], #1
    328 .Lal2:	ldrb	r7, [r0], #1
    329 	sub	r2, r2, #1
    330 	strbt	r7, [r1], #1
    331 .Lal1:	ldrb	r6, [r0], #1
    332 	sub	r2, r2, #1
    333 	strbt	r6, [r1], #1
    334 .Lalend:
    335 
    336 	/*
    337 	 * If few bytes left, finish slow.
    338 	 */
    339 	cmp	r2, #0x08
    340 	blt	.Lcleanup
    341 
    342 	/*
    343 	 * If source is not aligned, finish slow.
    344 	 */
    345 	ands	r3, r0, #0x03
    346 	bne	.Lcleanup
    347 
    348 	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
    349 	blt	.Lcleanup8
    350 
    351 	/*
    352 	 * Align source & destination to cacheline boundary.
    353 	 */
    354 	and	r6, r1, #0x1f
    355 	ldr	pc, [pc, r6]
    356 	b	.Lcaligned
    357 	.word	.Lcaligned
    358 	.word	.Lcal28
    359 	.word	.Lcal24
    360 	.word	.Lcal20
    361 	.word	.Lcal16
    362 	.word	.Lcal12
    363 	.word	.Lcal8
    364 	.word	.Lcal4
    365 .Lcal28:ldr	r6, [r0], #4
    366 	sub	r2, r2, #4
    367 	strt	r6, [r1], #4
    368 .Lcal24:ldr	r7, [r0], #4
    369 	sub	r2, r2, #4
    370 	strt	r7, [r1], #4
    371 .Lcal20:ldr	r6, [r0], #4
    372 	sub	r2, r2, #4
    373 	strt	r6, [r1], #4
    374 .Lcal16:ldr	r7, [r0], #4
    375 	sub	r2, r2, #4
    376 	strt	r7, [r1], #4
    377 .Lcal12:ldr	r6, [r0], #4
    378 	sub	r2, r2, #4
    379 	strt	r6, [r1], #4
    380 .Lcal8:	ldr	r7, [r0], #4
    381 	sub	r2, r2, #4
    382 	strt	r7, [r1], #4
    383 .Lcal4:	ldr	r6, [r0], #4
    384 	sub	r2, r2, #4
    385 	strt	r6, [r1], #4
    386 
    387 	/*
    388 	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
    389 	 * part of the code, and we may have knocked that down by as much
    390 	 * as 0x1c getting aligned).
    391 	 *
    392 	 * This loop basically works out to:
    393 	 * do {
    394 	 * 	prefetch-next-cacheline(s)
    395 	 *	bytes -= 0x20;
    396 	 *	copy cacheline
    397 	 * } while (bytes >= 0x40);
    398 	 * bytes -= 0x20;
    399 	 * copy cacheline
    400 	 */
    401 .Lcaligned:
    402 	PREFETCH(r0, 32)
    403 	PREFETCH(r1, 32)
    404 
    405 	sub	r2, r2, #0x20
    406 
    407 	/* Copy a cacheline */
    408 	ldmia	r0!, {r6-r11}
    409 	strt	r6, [r1], #4
    410 	strt	r7, [r1], #4
    411 	ldmia	r0!, {r6-r7}
    412 	strt	r8, [r1], #4
    413 	strt	r9, [r1], #4
    414 	strt	r10, [r1], #4
    415 	strt	r11, [r1], #4
    416 	strt	r6, [r1], #4
    417 	strt	r7, [r1], #4
    418 
    419 	cmp	r2, #0x40
    420 	bge	.Lcaligned
    421 
    422 	sub	r2, r2, #0x20
    423 
    424 	/* Copy a cacheline */
    425 	ldmia	r0!, {r6-r11}
    426 	strt	r6, [r1], #4
    427 	strt	r7, [r1], #4
    428 	ldmia	r0!, {r6-r7}
    429 	strt	r8, [r1], #4
    430 	strt	r9, [r1], #4
    431 	strt	r10, [r1], #4
    432 	strt	r11, [r1], #4
    433 	strt	r6, [r1], #4
    434 	strt	r7, [r1], #4
    435 
    436 	cmp	r2, #0x08
    437 	blt	.Lprecleanup
    438 
    439 .Lcleanup8:
    440 	ldmia	r0!, {r8-r9}
    441 	sub	r2, r2, #8
    442 	strt	r8, [r1], #4
    443 	strt	r9, [r1], #4
    444 	cmp	r2, #8
    445 	bge	.Lcleanup8
    446 
    447 .Lprecleanup:
    448 	/*
    449 	 * If we're done, bail.
    450 	 */
    451 	cmp	r2, #0
    452 	beq	.Lout
    453 
    454 .Lcleanup:
    455 	and	r6, r2, #0x3
    456 	ldr	pc, [pc, r6, lsl #2]
    457 	b	.Lcend
    458 	.word	.Lc4
    459 	.word	.Lc1
    460 	.word	.Lc2
    461 	.word	.Lc3
    462 .Lc4:	ldrb	r6, [r0], #1
    463 	sub	r2, r2, #1
    464 	strbt	r6, [r1], #1
    465 .Lc3:	ldrb	r7, [r0], #1
    466 	sub	r2, r2, #1
    467 	strbt	r7, [r1], #1
    468 .Lc2:	ldrb	r6, [r0], #1
    469 	sub	r2, r2, #1
    470 	strbt	r6, [r1], #1
    471 .Lc1:	ldrb	r7, [r0], #1
    472 	subs	r2, r2, #1
    473 	strbt	r7, [r1], #1
    474 .Lcend:
    475 	bne	.Lcleanup
    476 
    477 .Lout:
    478 	mov	r0, #0
    479 
    480 	str	r5, [r4, #PCB_ONFAULT]
    481 	RESTORE_REGS
    482 
    483 	RET
    484 END(copyout)
    485 
    486 /*
    487  * r0 = kernel space source address
    488  * r1 = kernel space destination address
    489  * r2 = length
    490  *
    491  * Copies bytes from kernel space to kernel space, aborting on page fault
    492  *
    493  * Copy of copyout, but without the ldrt/strt instructions.
    494  */
    495 
    496 ENTRY(kcopy)
    497 	/* Quick exit if length is zero */
    498 	teq	r2, #0
    499 	moveq	r0, #0
    500 	moveq	pc, lr
    501 
    502 	SAVE_REGS
    503 	GET_CURPCB(r4)
    504 
    505 	ldr	r5, [r4, #PCB_ONFAULT]
    506 	adr	r3, .Lcopyfault
    507 	str	r3, [r4, #PCB_ONFAULT]
    508 
    509 	PREFETCH(r0, 0)
    510 	PREFETCH(r1, 0)
    511 
    512 	/*
    513 	 * If not too many bytes, take the slow path.
    514 	 */
    515 	cmp	r2, #0x08
    516 	blt	.Lkcleanup
    517 
    518 	/*
    519 	 * Align destination to word boundary.
    520 	 */
    521 	and	r6, r1, #0x3
    522 	ldr	pc, [pc, r6, lsl #2]
    523 	b	.Lkalend
    524 	.word	.Lkalend
    525 	.word	.Lkal3
    526 	.word	.Lkal2
    527 	.word	.Lkal1
    528 .Lkal3:	ldrb	r6, [r0], #1
    529 	sub	r2, r2, #1
    530 	strb	r6, [r1], #1
    531 .Lkal2:	ldrb	r7, [r0], #1
    532 	sub	r2, r2, #1
    533 	strb	r7, [r1], #1
    534 .Lkal1:	ldrb	r6, [r0], #1
    535 	sub	r2, r2, #1
    536 	strb	r6, [r1], #1
    537 .Lkalend:
    538 
    539 	/*
    540 	 * If few bytes left, finish slow.
    541 	 */
    542 	cmp	r2, #0x08
    543 	blt	.Lkcleanup
    544 
    545 	/*
    546 	 * If source is not aligned, finish slow.
    547 	 */
    548 	ands	r3, r0, #0x03
    549 	bne	.Lkcleanup
    550 
    551 	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
    552 	blt	.Lkcleanup8
    553 
    554 	/*
    555 	 * Align source & destination to cacheline boundary.
    556 	 */
    557 	and	r6, r1, #0x1f
    558 	ldr	pc, [pc, r6]
    559 	b	.Lkcaligned
    560 	.word	.Lkcaligned
    561 	.word	.Lkcal28
    562 	.word	.Lkcal24
    563 	.word	.Lkcal20
    564 	.word	.Lkcal16
    565 	.word	.Lkcal12
    566 	.word	.Lkcal8
    567 	.word	.Lkcal4
    568 .Lkcal28:ldr	r6, [r0], #4
    569 	sub	r2, r2, #4
    570 	str	r6, [r1], #4
    571 .Lkcal24:ldr	r7, [r0], #4
    572 	sub	r2, r2, #4
    573 	str	r7, [r1], #4
    574 .Lkcal20:ldr	r6, [r0], #4
    575 	sub	r2, r2, #4
    576 	str	r6, [r1], #4
    577 .Lkcal16:ldr	r7, [r0], #4
    578 	sub	r2, r2, #4
    579 	str	r7, [r1], #4
    580 .Lkcal12:ldr	r6, [r0], #4
    581 	sub	r2, r2, #4
    582 	str	r6, [r1], #4
    583 .Lkcal8:ldr	r7, [r0], #4
    584 	sub	r2, r2, #4
    585 	str	r7, [r1], #4
    586 .Lkcal4:ldr	r6, [r0], #4
    587 	sub	r2, r2, #4
    588 	str	r6, [r1], #4
    589 
    590 	/*
    591 	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
    592 	 * part of the code, and we may have knocked that down by as much
    593 	 * as 0x1c getting aligned).
    594 	 *
    595 	 * This loop basically works out to:
    596 	 * do {
    597 	 * 	prefetch-next-cacheline(s)
    598 	 *	bytes -= 0x20;
    599 	 *	copy cacheline
    600 	 * } while (bytes >= 0x40);
    601 	 * bytes -= 0x20;
    602 	 * copy cacheline
    603 	 */
    604 .Lkcaligned:
    605 	PREFETCH(r0, 32)
    606 	PREFETCH(r1, 32)
    607 
    608 	sub	r2, r2, #0x20
    609 
    610 	/* Copy a cacheline */
    611 	ldmia	r0!, {r6-r11}
    612 	stmia	r1!, {r6, r7}
    613 	ldmia	r0!, {r6, r7}
    614 	stmia	r1!, {r8-r11}
    615 	stmia	r1!, {r6, r7}
    616 
    617 	cmp	r2, #0x40
    618 	bge	.Lkcaligned
    619 
    620 	sub	r2, r2, #0x20
    621 
    622 	/* Copy a cacheline */
    623 	ldmia	r0!, {r6-r11}
    624 	stmia	r1!, {r6-r7}
    625 	ldmia	r0!, {r6-r7}
    626 	stmia	r1!, {r8-r11}
    627 	stmia	r1!, {r6-r7}
    628 
    629 	cmp	r2, #0x08
    630 	blt	.Lkprecleanup
    631 
    632 .Lkcleanup8:
    633 	ldmia	r0!, {r8-r9}
    634 	sub	r2, r2, #8
    635 	stmia	r1!, {r8-r9}
    636 	cmp	r2, #8
    637 	bge	.Lkcleanup8
    638 
    639 .Lkprecleanup:
    640 	/*
    641 	 * If we're done, bail.
    642 	 */
    643 	cmp	r2, #0
    644 	beq	.Lkout
    645 
    646 .Lkcleanup:
    647 	and	r6, r2, #0x3
    648 	ldr	pc, [pc, r6, lsl #2]
    649 	b	.Lkcend
    650 	.word	.Lkc4
    651 	.word	.Lkc1
    652 	.word	.Lkc2
    653 	.word	.Lkc3
    654 .Lkc4:	ldrb	r6, [r0], #1
    655 	sub	r2, r2, #1
    656 	strb	r6, [r1], #1
    657 .Lkc3:	ldrb	r7, [r0], #1
    658 	sub	r2, r2, #1
    659 	strb	r7, [r1], #1
    660 .Lkc2:	ldrb	r6, [r0], #1
    661 	sub	r2, r2, #1
    662 	strb	r6, [r1], #1
    663 .Lkc1:	ldrb	r7, [r0], #1
    664 	subs	r2, r2, #1
    665 	strb	r7, [r1], #1
    666 .Lkcend:
    667 	bne	.Lkcleanup
    668 
    669 .Lkout:
    670 	mov	r0, #0
    671 
    672 	str	r5, [r4, #PCB_ONFAULT]
    673 	RESTORE_REGS
    674 
    675 	RET
    676 END(kcopy)
    677 #endif	/* !__XSCALE__ */
    678 
    679 /*
    680  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
    681  *
    682  * Copies a single 8-bit value from src to dest, returning 0 on success,
    683  * else EFAULT if a page fault occurred.
    684  */
    685 ENTRY(badaddr_read_1)
    686 	GET_CURPCB(r2)
    687 	ldr	ip, [r2, #PCB_ONFAULT]
    688 	adr	r3, 1f
    689 	str	r3, [r2, #PCB_ONFAULT]
    690 	nop
    691 	nop
    692 	nop
    693 	ldrb	r3, [r0]
    694 	nop
    695 	nop
    696 	nop
    697 	strb	r3, [r1]
    698 	mov	r0, #0		/* No fault */
    699 1:	str	ip, [r2, #PCB_ONFAULT]
    700 	RET
    701 END(badaddr_read_1)
    702 
    703 /*
    704  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
    705  *
    706  * Copies a single 16-bit value from src to dest, returning 0 on success,
    707  * else EFAULT if a page fault occurred.
    708  */
    709 ENTRY(badaddr_read_2)
    710 	GET_CURPCB(r2)
    711 	ldr	ip, [r2, #PCB_ONFAULT]
    712 	adr	r3, 1f
    713 	str	r3, [r2, #PCB_ONFAULT]
    714 	nop
    715 	nop
    716 	nop
    717 	ldrh	r3, [r0]
    718 	nop
    719 	nop
    720 	nop
    721 	strh	r3, [r1]
    722 	mov	r0, #0		/* No fault */
    723 1:	str	ip, [r2, #PCB_ONFAULT]
    724 	RET
    725 END(badaddr_read_2)
    726 
    727 /*
    728  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
    729  *
    730  * Copies a single 32-bit value from src to dest, returning 0 on success,
    731  * else EFAULT if a page fault occurred.
    732  */
    733 ENTRY(badaddr_read_4)
    734 	GET_CURPCB(r2)
    735 	ldr	ip, [r2, #PCB_ONFAULT]
    736 	adr	r3, 1f
    737 	str	r3, [r2, #PCB_ONFAULT]
    738 	nop
    739 	nop
    740 	nop
    741 	ldr	r3, [r0]
    742 	nop
    743 	nop
    744 	nop
    745 	str	r3, [r1]
    746 	mov	r0, #0		/* No fault */
    747 1:	str	ip, [r2, #PCB_ONFAULT]
    748 	RET
    749 END(badaddr_read_4)
    750