Home | History | Annotate | Line # | Download | only in arm
      1 /*	$NetBSD: blockio.S,v 1.9 2022/10/20 06:58:38 skrll Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2001 Ben Harris.
      5  * Copyright (c) 1994 Mark Brinicombe.
      6  * Copyright (c) 1994 Brini.
      7  * All rights reserved.
      8  *
      9  * This code is derived from software written for Brini by Mark Brinicombe
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by Brini.
     22  * 4. The name of the company nor the name of the author may be used to
     23  *    endorse or promote products derived from this software without specific
     24  *    prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
     27  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     29  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
     30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  *
     38  * RiscBSD kernel project
     39  *
     40  * blockio.S
     41  *
     42  * optimised block read/write from/to IO routines.
     43  *
     44  * Created      : 08/10/94
     45  * Modified	: 22/01/99  -- R.Earnshaw
     46  *			       Faster, and small tweaks for StrongARM
     47  */
     48 
     49 #include <machine/asm.h>
     50 
     51 RCSID("$NetBSD: blockio.S,v 1.9 2022/10/20 06:58:38 skrll Exp $")
     52 
     53 /*
     54  * Read bytes from an I/O address into a block of memory
     55  *
     56  * r0 = address to read from (IO)
     57  * r1 = address to write to (memory)
     58  * r2 = length
     59  */
     60 
     61 /* This code will look very familiar if you've read _memcpy(). */
     62 ENTRY(read_multi_1)
     63 	mov	ip, sp
     64 	push	{fp, ip, lr, pc}
     65 	sub	fp, ip, #4
     66 	subs	r2, r2, #4		/* r2 = length - 4 */
     67 	blt	.Lrm1_l4			/* less than 4 bytes */
     68 	ands	ip, r1, #3
     69 	beq	.Lrm1_main		/* aligned destination */
     70 	rsb	ip, ip, #4
     71 	cmp	ip, #2
     72 	ldrb	r3, [r0]
     73 	strb	r3, [r1], #1
     74 	ldrbge	r3, [r0]
     75 	strbge	r3, [r1], #1
     76 	ldrbgt	r3, [r0]
     77 	strbgt	r3, [r1], #1
     78 	subs	r2, r2, ip
     79 	blt	.Lrm1_l4
     80 .Lrm1_main:
     81 .Lrm1loop:
     82 	ldrb	r3, [r0]
     83 	ldrb	ip, [r0]
     84 	orr	r3, r3, ip, lsl #8
     85 	ldrb	ip, [r0]
     86 	orr	r3, r3, ip, lsl #16
     87 	ldrb	ip, [r0]
     88 	orr	r3, r3, ip, lsl #24
     89 	str	r3, [r1], #4
     90 	subs	r2, r2, #4
     91 	bge	.Lrm1loop
     92 .Lrm1_l4:
     93 	adds	r2, r2, #4			/* r2 = length again */
     94 	ldmdbeq	fp, {fp, sp, pc}
     95 	RETc(eq)				/* ??? not needed */
     96 	cmp	r2, #2
     97 	ldrb	r3, [r0]
     98 	strb	r3, [r1], #1
     99 	ldrbge	r3, [r0]
    100 	strbge	r3, [r1], #1
    101 	ldrbgt	r3, [r0]
    102 	strbgt	r3, [r1], #1
    103 	ldmdb	fp, {fp, sp, pc}
    104 END(read_multi_1)
    105 
    106 /*
    107  * Write bytes to an I/O address from a block of memory
    108  *
    109  * r0 = address to write to (IO)
    110  * r1 = address to read from (memory)
    111  * r2 = length
    112  */
    113 
    114 /* This code will look very familiar if you've read _memcpy(). */
    115 ENTRY(write_multi_1)
    116 	mov	ip, sp
    117 	push	{fp, ip, lr, pc}
    118 	sub	fp, ip, #4
    119 	subs	r2, r2, #4		/* r2 = length - 4 */
    120 	blt	.Lwm1_l4		/* less than 4 bytes */
    121 	ands	ip, r1, #3
    122 	beq	.Lwm1_main		/* aligned source */
    123 	rsb	ip, ip, #4
    124 	cmp	ip, #2
    125 	ldrb	r3, [r1], #1
    126 	strb	r3, [r0]
    127 	ldrbge	r3, [r1], #1
    128 	strbge	r3, [r0]
    129 	ldrbgt	r3, [r1], #1
    130 	strbgt	r3, [r0]
    131 	subs	r2, r2, ip
    132 	blt	.Lwm1_l4
    133 .Lwm1_main:
    134 .Lwm1loop:
    135 	ldr	r3, [r1], #4
    136 	strb	r3, [r0]
    137 	mov	r3, r3, lsr #8
    138 	strb	r3, [r0]
    139 	mov	r3, r3, lsr #8
    140 	strb	r3, [r0]
    141 	mov	r3, r3, lsr #8
    142 	strb	r3, [r0]
    143 	subs	r2, r2, #4
    144 	bge	.Lwm1loop
    145 .Lwm1_l4:
    146 	adds	r2, r2, #4			/* r2 = length again */
    147 	ldmdbeq	fp, {fp, sp, pc}
    148 	cmp	r2, #2
    149 	ldrb	r3, [r1], #1
    150 	strb	r3, [r0]
    151 	ldrbge	r3, [r1], #1
    152 	strbge	r3, [r0]
    153 	ldrbgt	r3, [r1], #1
    154 	strbgt	r3, [r0]
    155 	ldmdb	fp, {fp, sp, pc}
    156 END(write_multi_1)
    157 
    158 /*
    159  * Reads short ints (16 bits) from an I/O address into a block of memory
    160  *
    161  * r0 = address to read from (IO)
    162  * r1 = address to write to (memory)
    163  * r2 = length
    164  */
    165 
    166 ENTRY(insw)
    167 /* Make sure that we have a positive length */
    168 	cmp	r2, #0x00000000
    169 	RETc(le)
    170 
    171 /* If the destination address and the size is word aligned, do it fast */
    172 
    173 	tst	r2, #0x00000001
    174 	tsteq	r1, #0x00000003
    175 	beq	.Lfastinsw
    176 
    177 /* Non aligned insw */
    178 
    179 .Linswloop:
    180 	ldr	r3, [r0]
    181 	subs	r2, r2, #0x00000001	/* Loop test in load delay slot */
    182 	strb	r3, [r1], #0x0001
    183 	mov	r3, r3, lsr #8
    184 	strb	r3, [r1], #0x0001
    185 	bgt	.Linswloop
    186 
    187 	RET
    188 
    189 /* Word aligned insw */
    190 
    191 .Lfastinsw:
    192 
    193 .Lfastinswloop:
    194 	ldr	r3, [r0, #0x0002]	/* take advantage of nonaligned
    195 					 * word accesses */
    196 	ldr	ip, [r0]
    197 	mov	r3, r3, lsr #16		/* Put the two shorts together */
    198 	orr	r3, r3, ip, lsl #16
    199 	str	r3, [r1], #0x0004	/* Store */
    200 	subs	r2, r2, #0x00000002	/* Next */
    201 	bgt	.Lfastinswloop
    202 
    203 	RET
    204 END(insw)
    205 
    206 
    207 /*
    208  * Writes short ints (16 bits) from a block of memory to an I/O address
    209  *
    210  * r0 = address to write to (IO)
    211  * r1 = address to read from (memory)
    212  * r2 = length
    213  */
    214 
    215 ENTRY(outsw)
    216 /* Make sure that we have a positive length */
    217 	cmp	r2, #0x00000000
    218 	RETc(le)
    219 
    220 /* If the destination address and the size is word aligned, do it fast */
    221 
    222 	tst	r2, #0x00000001
    223 	tsteq	r1, #0x00000003
    224 	beq	.Lfastoutsw
    225 
    226 /* Non aligned outsw */
    227 
    228 .Loutswloop:
    229 	ldrb	r3, [r1], #0x0001
    230 	ldrb	ip, [r1], #0x0001
    231 	subs	r2, r2, #0x00000001	/* Loop test in load delay slot */
    232 	orr	r3, r3, ip, lsl #8
    233 	orr	r3, r3, r3, lsl #16
    234 	str	r3, [r0]
    235 	bgt	.Loutswloop
    236 
    237 	RET
    238 
    239 /* Word aligned outsw */
    240 
    241 .Lfastoutsw:
    242 
    243 .Lfastoutswloop:
    244 	ldr	r3, [r1], #0x0004	/* r3 = (H)(L) */
    245 	subs	r2, r2, #0x00000002	/* Loop test in load delay slot */
    246 
    247 	eor	ip, r3, r3, lsr #16	/* ip = (H)(H^L) */
    248 	eor	r3, r3, ip, lsl #16	/* r3 = (H^H^L)(L) = (L)(L) */
    249 	eor	ip, ip, r3, lsr #16	/* ip = (H)(H^L^L) = (H)(H) */
    250 
    251 	str	r3, [r0]
    252 	str	ip, [r0]
    253 
    254 /*	mov	ip, r3, lsl #16
    255  *	orr	ip, ip, ip, lsr #16
    256  *	str	ip, [r0]
    257  *
    258  *	mov	ip, r3, lsr #16
    259  *	orr	ip, ip, ip, lsl #16
    260  *	str	ip, [r0]
    261  */
    262 
    263 	bgt	.Lfastoutswloop
    264 
    265 	RET
    266 END(outsw)
    267 
    268 /*
    269  * reads short ints (16 bits) from an I/O address into a block of memory
    270  * with a length garenteed to be a multiple of 16 bytes
    271  * with a word aligned destination address
    272  *
    273  * r0 = address to read from (IO)
    274  * r1 = address to write to (memory)
    275  * r2 = length
    276  */
    277 
    278 ENTRY(insw16)
    279 /* Make sure that we have a positive length */
    280 	cmp	r2, #0x00000000
    281 	RETc(le)
    282 
    283 /* If the destination address is word aligned and the size suitably
    284    aligned, do it fast */
    285 
    286 	tst	r2, #0x00000007
    287 	tsteq	r1, #0x00000003
    288 
    289 	bne	_C_LABEL(insw)
    290 
    291 /* Word aligned insw */
    292 
    293 	push	{r4,r5,lr}
    294 
    295 .Linsw16loop:
    296 	ldr	r3, [r0, #0x0002]	/* take advantage of nonaligned
    297 					 * word accesses */
    298 	ldr	lr, [r0]
    299 	mov	r3, r3, lsr #16		/* Put the two shorts together */
    300 	orr	r3, r3, lr, lsl #16
    301 
    302 	ldr	r4, [r0, #0x0002]	/* take advantage of nonaligned
    303 					 * word accesses */
    304 	ldr	lr, [r0]
    305 	mov	r4, r4, lsr #16		/* Put the two shorts together */
    306 	orr	r4, r4, lr, lsl #16
    307 
    308 	ldr	r5, [r0, #0x0002]	/* take advantage of nonaligned
    309 					 * word accesses */
    310 	ldr	lr, [r0]
    311 	mov	r5, r5, lsr #16		/* Put the two shorts together */
    312 	orr	r5, r5, lr, lsl #16
    313 
    314 	ldr	ip, [r0, #0x0002]	/* take advantage of nonaligned
    315 					 * word accesses */
    316 	ldr	lr, [r0]
    317 	mov	ip, ip, lsr #16		/* Put the two shorts together */
    318 	orr	ip, ip, lr, lsl #16
    319 
    320 	stmia	r1!, {r3-r5,ip}
    321 	subs	r2, r2, #0x00000008	/* Next */
    322 	bgt	.Linsw16loop
    323 
    324 	pop	{r4,r5,pc}		/* Restore regs and go home */
    325 END(insw16)
    326 
    327 
    328 /*
    329  * Writes short ints (16 bits) from a block of memory to an I/O address
    330  *
    331  * r0 = address to write to (IO)
    332  * r1 = address to read from (memory)
    333  * r2 = length
    334  */
    335 
    336 ENTRY(outsw16)
    337 /* Make sure that we have a positive length */
    338 	cmp	r2, #0x00000000
    339 	RETc(le)
    340 
    341 /* If the destination address is word aligned and the size suitably
    342    aligned, do it fast */
    343 
    344 	tst	r2, #0x00000007
    345 	tsteq	r1, #0x00000003
    346 
    347 	bne	_C_LABEL(outsw)
    348 
    349 /* Word aligned outsw */
    350 
    351 	push	{r4,r5,lr}
    352 
    353 .Loutsw16loop:
    354 	ldmia	r1!, {r4,r5,ip,lr}
    355 
    356 	eor	r3, r4, r4, lsl #16	/* r3 = (A^B)(B) */
    357 	eor	r4, r4, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
    358 	eor	r3, r3, r4, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
    359 	str	r3, [r0]
    360 	str	r4, [r0]
    361 
    362 /*	mov	r3, r4, lsl #16
    363  *	orr	r3, r3, r3, lsr #16
    364  *	str	r3, [r0]
    365  *
    366  *	mov	r3, r4, lsr #16
    367  *	orr	r3, r3, r3, lsl #16
    368  *	str	r3, [r0]
    369  */
    370 
    371 	eor	r3, r5, r5, lsl #16	/* r3 = (A^B)(B) */
    372 	eor	r5, r5, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
    373 	eor	r3, r3, r5, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
    374 	str	r3, [r0]
    375 	str	r5, [r0]
    376 
    377 	eor	r3, ip, ip, lsl #16	/* r3 = (A^B)(B) */
    378 	eor	ip, ip, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
    379 	eor	r3, r3, ip, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
    380 	str	r3, [r0]
    381 	str	ip, [r0]
    382 
    383 	eor	r3, lr, lr, lsl #16	/* r3 = (A^B)(B) */
    384 	eor	lr, lr, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
    385 	eor	r3, r3, lr, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
    386 	str	r3, [r0]
    387 	str	lr, [r0]
    388 
    389 	subs	r2, r2, #0x00000008
    390 	bgt	.Loutsw16loop
    391 
    392 	pop	{r4,r5,pc}		/* and go home */
    393 END(outsw16)
    394 
    395 /*
    396  * reads short ints (16 bits) from an I/O address into a block of memory
    397  * The I/O address is assumed to be mapped multiple times in a block of
    398  * 8 words.
    399  * The destination address should be word aligned.
    400  *
    401  * r0 = address to read from (IO)
    402  * r1 = address to write to (memory)
    403  * r2 = length
    404  */
    405 
    406 ENTRY(inswm8)
    407 /* Make sure that we have a positive length */
    408 	cmp	r2, #0x00000000
    409 	RETc(le)
    410 
    411 /* If the destination address is word aligned and the size suitably
    412    aligned, do it fast */
    413 
    414 	tst	r1, #0x00000003
    415 
    416 	bne	_C_LABEL(insw)
    417 
    418 /* Word aligned insw */
    419 
    420 	push	{r4-r9,lr}
    421 
    422 	mov	lr, #0xff000000
    423 	orr	lr, lr, #0x00ff0000
    424 
    425 .Linswm8_loop8:
    426 	cmp	r2, #8
    427 	bcc	.Linswm8_l8
    428 
    429 	ldmia	r0, {r3-r9,ip}
    430 
    431 	bic	r3, r3, lr
    432 	orr	r3, r3, r4, lsl #16
    433 	bic	r5, r5, lr
    434 	orr	r4, r5, r6, lsl #16
    435 	bic	r7, r7, lr
    436 	orr	r5, r7, r8, lsl #16
    437 	bic	r9, r9, lr
    438 	orr	r6, r9, ip, lsl #16
    439 
    440 	stmia	r1!, {r3-r6}
    441 
    442 	subs	r2, r2, #0x00000008	/* Next */
    443 	bne	.Linswm8_loop8
    444 	beq	.Linswm8_l1
    445 
    446 .Linswm8_l8:
    447 	cmp	r2, #4
    448 	bcc	.Linswm8_l4
    449 
    450 	ldmia	r0, {r3-r6}
    451 
    452 	bic	r3, r3, lr
    453 	orr	r3, r3, r4, lsl #16
    454 	bic	r5, r5, lr
    455 	orr	r4, r5, r6, lsl #16
    456 
    457 	stmia	r1!, {r3-r4}
    458 
    459 	subs	r2, r2, #0x00000004
    460 	beq	.Linswm8_l1
    461 
    462 .Linswm8_l4:
    463 	cmp	r2, #2
    464 	bcc	.Linswm8_l2
    465 
    466 	ldmia	r0, {r3-r4}
    467 
    468 	bic	r3, r3, lr
    469 	orr	r3, r3, r4, lsl #16
    470 	str	r3, [r1], #0x0004
    471 
    472 	subs	r2, r2, #0x00000002
    473 	beq	.Linswm8_l1
    474 
    475 .Linswm8_l2:
    476 	cmp	r2, #1
    477 	bcc	.Linswm8_l1
    478 
    479 	ldr	r3, [r0]
    480 	subs	r2, r2, #0x00000001	/* Test in load delay slot */
    481 					/* XXX, why don't we use result?  */
    482 
    483 	strb	r3, [r1], #0x0001
    484 	mov	r3, r3, lsr #8
    485 	strb	r3, [r1], #0x0001
    486 
    487 
    488 .Linswm8_l1:
    489 	pop	{r4-r9,pc}		/* And go home */
    490 END(inswm8)
    491 
    492 /*
    493  * write short ints (16 bits) to an I/O address from a block of memory
    494  * The I/O address is assumed to be mapped multiple times in a block of
    495  * 8 words.
    496  * The source address should be word aligned.
    497  *
    498  * r0 = address to read to (IO)
    499  * r1 = address to write from (memory)
    500  * r2 = length
    501  */
    502 
    503 ENTRY(outswm8)
    504 /* Make sure that we have a positive length */
    505 	cmp	r2, #0x00000000
    506 	RETc(le)
    507 
    508 /* If the destination address is word aligned and the size suitably
    509    aligned, do it fast */
    510 
    511 	tst	r1, #0x00000003
    512 
    513 	bne	_C_LABEL(outsw)
    514 
    515 /* Word aligned outsw */
    516 
    517 	push	{r4-r8,lr}
    518 
    519 .Loutswm8_loop8:
    520 	cmp	r2, #8
    521 	bcc	.Loutswm8_l8
    522 
    523 	ldmia	r1!, {r3,r5,r7,ip}
    524 
    525 	eor	r4, r3, r3, lsr #16	/* r4 = (A)(A^B) */
    526 	eor	r3, r3, r4, lsl #16	/* r3 = (A^A^B)(B) = (B)(B) */
    527 	eor	r4, r4, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
    528 
    529 	eor	r6, r5, r5, lsr #16	/* r6 = (A)(A^B) */
    530 	eor	r5, r5, r6, lsl #16	/* r5 = (A^A^B)(B) = (B)(B) */
    531 	eor	r6, r6, r5, lsr #16	/* r6 = (A)(B^A^B) = (A)(A) */
    532 
    533 	eor	r8, r7, r7, lsr #16	/* r8 = (A)(A^B) */
    534 	eor	r7, r7, r8, lsl #16	/* r7 = (A^A^B)(B) = (B)(B) */
    535 	eor	r8, r8, r7, lsr #16	/* r8 = (A)(B^A^B) = (A)(A) */
    536 
    537 	eor	lr, ip, ip, lsr #16	/* lr = (A)(A^B) */
    538 	eor	ip, ip, lr, lsl #16	/* ip = (A^A^B)(B) = (B)(B) */
    539 	eor	lr, lr, ip, lsr #16	/* lr = (A)(B^A^B) = (A)(A) */
    540 
    541 	stmia	r0, {r3-r8,ip,lr}
    542 
    543 	subs	r2, r2, #0x00000008	/* Next */
    544 	bne	.Loutswm8_loop8
    545 	beq	.Loutswm8_l1
    546 
    547 .Loutswm8_l8:
    548 	cmp	r2, #4
    549 	bcc	.Loutswm8_l4
    550 
    551 	ldmia	r1!, {r3-r4}
    552 
    553 	eor	r6, r3, r3, lsr #16	/* r6 = (A)(A^B) */
    554 	eor	r5, r3, r6, lsl #16	/* r5 = (A^A^B)(B) = (B)(B) */
    555 	eor	r6, r6, r5, lsr #16	/* r6 = (A)(B^A^B) = (A)(A) */
    556 
    557 	eor	r8, r4, r4, lsr #16	/* r8 = (A)(A^B) */
    558 	eor	r7, r4, r8, lsl #16	/* r7 = (A^A^B)(B) = (B)(B) */
    559 	eor	r8, r8, r7, lsr #16	/* r8 = (A)(B^A^B) = (A)(A) */
    560 
    561 	stmia	r0, {r5-r8}
    562 
    563 	subs	r2, r2, #0x00000004
    564 	beq	.Loutswm8_l1
    565 
    566 .Loutswm8_l4:
    567 	cmp	r2, #2
    568 	bcc	.Loutswm8_l2
    569 
    570 	ldr	r3, [r1], #0x0004	/* r3 = (A)(B) */
    571 	subs	r2, r2, #0x00000002	/* Done test in Load delay slot */
    572 
    573 	eor	r5, r3, r3, lsr #16	/* r5 = (A)(A^B)*/
    574 	eor	r4, r3, r5, lsl #16	/* r4 = (A^A^B)(B) = (B)(B) */
    575 	eor	r5, r5, r4, lsr #16	/* r5 = (A)(B^A^B) = (A)(A) */
    576 
    577 	stmia	r0, {r4, r5}
    578 
    579 	beq	.Loutswm8_l1
    580 
    581 .Loutswm8_l2:
    582 	cmp	r2, #1
    583 	bcc	.Loutswm8_l1
    584 
    585 	ldrb	r3, [r1], #0x0001
    586 	ldrb	r4, [r1], #0x0001
    587 	subs	r2, r2, #0x00000001	/* Done test in load delay slot */
    588 					/* XXX This test isn't used?  */
    589 	orr	r3, r3, r4, lsl #8
    590 	orr	r3, r3, r3, lsl #16
    591 	str	r3, [r0]
    592 
    593 .Loutswm8_l1:
    594 	pop	{r4-r8,pc}		/* And go home */
    595 END(outswm8)
    596