Home | History | Annotate | Line # | Download | only in arm32
      1 /*	$NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Scott Stevens
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by Scott Stevens.
     18  * 4. The name of the author may not be used to endorse or promote products
     19  *    derived from this software without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  *
     32  * RiscBSD kernel project
     33  *
     34  * bcopy_page.S
     35  *
     36  * page optimised bcopy and bzero routines
     37  *
     38  * Created      : 08/04/95
     39  */
     40 
     41 #include <machine/asm.h>
     42 
     43 #include "assym.h"
     44 
     45 #ifndef __XSCALE__
     46 
     47 /* #define BIG_LOOPS */
     48 
     49 /*
     50  * bcopy_page(src, dest)
     51  *
     52  * Optimised copy page routine.
     53  *
     54  * On entry:
     55  *   r0 - src address
     56  *   r1 - dest address
     57  *
     58  * Requires:
     59  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
     60  *   otherwise.
     61  */
     62 
     63 #define	CHUNK_SIZE	32
     64 
     65 #define	PREFETCH_FIRST_CHUNK	/* nothing */
     66 #define	PREFETCH_NEXT_CHUNK	/* nothing */
     67 
     68 #ifndef COPY_CHUNK
     69 #define	COPY_CHUNK \
     70 	PREFETCH_NEXT_CHUNK ; \
     71 	ldmia	r0!, {r3-r8,ip,lr} ; \
     72 	stmia	r1!, {r3-r8,ip,lr}
     73 #endif /* ! COPY_CHUNK */
     74 
     75 #ifndef SAVE_REGS
     76 #define	SAVE_REGS	push	{r4-r8, lr}
     77 #define	RESTORE_REGS	pop	{r4-r8, pc}
     78 #endif
     79 
     80 ENTRY(bcopy_page)
     81 	PREFETCH_FIRST_CHUNK
     82 	SAVE_REGS
     83 #ifdef BIG_LOOPS
     84 	mov	r2, #(PAGE_SIZE >> 9)
     85 #else
     86 	mov	r2, #(PAGE_SIZE >> 7)
     87 #endif
     88 
     89 1:
     90 	COPY_CHUNK
     91 	COPY_CHUNK
     92 	COPY_CHUNK
     93 	COPY_CHUNK
     94 
     95 #ifdef BIG_LOOPS
     96 	/* There is little point making the loop any larger; unless we are
     97 	   running with the cache off, the load/store overheads will
     98 	   completely dominate this loop.  */
     99 	COPY_CHUNK
    100 	COPY_CHUNK
    101 	COPY_CHUNK
    102 	COPY_CHUNK
    103 
    104 	COPY_CHUNK
    105 	COPY_CHUNK
    106 	COPY_CHUNK
    107 	COPY_CHUNK
    108 
    109 	COPY_CHUNK
    110 	COPY_CHUNK
    111 	COPY_CHUNK
    112 	COPY_CHUNK
    113 #endif
    114 	subs	r2, r2, #1
    115 	bne	1b
    116 
    117 	RESTORE_REGS		/* ...and return. */
    118 END(bcopy_page)
    119 
    120 /*
    121  * bzero_page(dest)
    122  *
    123  * Optimised zero page routine.
    124  *
    125  * On entry:
    126  *   r0 - dest address
    127  *
    128  * Requires:
    129  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
    130  *   otherwise
    131  */
    132 
    133 ENTRY(bzero_page)
    134 	push	{r4-r8, lr}
    135 #ifdef BIG_LOOPS
    136 	mov	r2, #(PAGE_SIZE >> 9)
    137 #else
    138 	mov	r2, #(PAGE_SIZE >> 7)
    139 #endif
    140 	mov	r3, #0
    141 	mov	r4, #0
    142 	mov	r5, #0
    143 	mov	r6, #0
    144 	mov	r7, #0
    145 	mov	r8, #0
    146 	mov	ip, #0
    147 	mov	lr, #0
    148 
    149 1:
    150 	stmia	r0!, {r3-r8,ip,lr}
    151 	stmia	r0!, {r3-r8,ip,lr}
    152 	stmia	r0!, {r3-r8,ip,lr}
    153 	stmia	r0!, {r3-r8,ip,lr}
    154 
    155 #ifdef BIG_LOOPS
    156 	/* There is little point making the loop any larger; unless we are
    157 	   running with the cache off, the load/store overheads will
    158 	   completely dominate this loop.  */
    159 	stmia	r0!, {r3-r8,ip,lr}
    160 	stmia	r0!, {r3-r8,ip,lr}
    161 	stmia	r0!, {r3-r8,ip,lr}
    162 	stmia	r0!, {r3-r8,ip,lr}
    163 
    164 	stmia	r0!, {r3-r8,ip,lr}
    165 	stmia	r0!, {r3-r8,ip,lr}
    166 	stmia	r0!, {r3-r8,ip,lr}
    167 	stmia	r0!, {r3-r8,ip,lr}
    168 
    169 	stmia	r0!, {r3-r8,ip,lr}
    170 	stmia	r0!, {r3-r8,ip,lr}
    171 	stmia	r0!, {r3-r8,ip,lr}
    172 	stmia	r0!, {r3-r8,ip,lr}
    173 
    174 #endif
    175 
    176 	subs	r2, r2, #1
    177 	bne	1b
    178 
    179 	pop	{r4-r8, pc}
    180 END(bzero_page)
    181 
    182 #else	/* __XSCALE__ */
    183 
    184 /*
    185  * XSCALE version of bcopy_page
    186  */
    187 ENTRY(bcopy_page)
    188 	pld	[r0]
    189 	push	{r4, r5}
    190 	mov	ip, #32
    191 	ldr	r2, [r0], #0x04		/* 0x00 */
    192 	ldr	r3, [r0], #0x04		/* 0x04 */
    193 1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
    194 	ldr	r4, [r0], #0x04		/* 0x08 */
    195 	ldr	r5, [r0], #0x04		/* 0x0c */
    196 	strd	r2, r3, [r1], #0x08
    197 	ldr	r2, [r0], #0x04		/* 0x10 */
    198 	ldr	r3, [r0], #0x04		/* 0x14 */
    199 	strd	r4, r5, [r1], #0x08
    200 	ldr	r4, [r0], #0x04		/* 0x18 */
    201 	ldr	r5, [r0], #0x04		/* 0x1c */
    202 	strd	r2, r3, [r1], #0x08
    203 	ldr	r2, [r0], #0x04		/* 0x20 */
    204 	ldr	r3, [r0], #0x04		/* 0x24 */
    205 	pld	[r0, #0x18]		/* Prefetch 0x40 */
    206 	strd	r4, r5, [r1], #0x08
    207 	ldr	r4, [r0], #0x04		/* 0x28 */
    208 	ldr	r5, [r0], #0x04		/* 0x2c */
    209 	strd	r2, r3, [r1], #0x08
    210 	ldr	r2, [r0], #0x04		/* 0x30 */
    211 	ldr	r3, [r0], #0x04		/* 0x34 */
    212 	strd	r4, r5, [r1], #0x08
    213 	ldr	r4, [r0], #0x04		/* 0x38 */
    214 	ldr	r5, [r0], #0x04		/* 0x3c */
    215 	strd	r2, r3, [r1], #0x08
    216 	ldr	r2, [r0], #0x04		/* 0x40 */
    217 	ldr	r3, [r0], #0x04		/* 0x44 */
    218 	pld	[r0, #0x18]		/* Prefetch 0x60 */
    219 	strd	r4, r5, [r1], #0x08
    220 	ldr	r4, [r0], #0x04		/* 0x48 */
    221 	ldr	r5, [r0], #0x04		/* 0x4c */
    222 	strd	r2, r3, [r1], #0x08
    223 	ldr	r2, [r0], #0x04		/* 0x50 */
    224 	ldr	r3, [r0], #0x04		/* 0x54 */
    225 	strd	r4, r5, [r1], #0x08
    226 	ldr	r4, [r0], #0x04		/* 0x58 */
    227 	ldr	r5, [r0], #0x04		/* 0x5c */
    228 	strd	r2, r3, [r1], #0x08
    229 	ldr	r2, [r0], #0x04		/* 0x60 */
    230 	ldr	r3, [r0], #0x04		/* 0x64 */
    231 	pld	[r0, #0x18]		/* Prefetch 0x80 */
    232 	strd	r4, r5, [r1], #0x08
    233 	ldr	r4, [r0], #0x04		/* 0x68 */
    234 	ldr	r5, [r0], #0x04		/* 0x6c */
    235 	strd	r2, r3, [r1], #0x08
    236 	ldr	r2, [r0], #0x04		/* 0x70 */
    237 	ldr	r3, [r0], #0x04		/* 0x74 */
    238 	strd	r4, r5, [r1], #0x08
    239 	ldr	r4, [r0], #0x04		/* 0x78 */
    240 	ldr	r5, [r0], #0x04		/* 0x7c */
    241 	strd	r2, r3, [r1], #0x08
    242 	subs	ip, ip, #0x01
    243 	ldrgt	r2, [r0], #0x04		/* 0x80 */
    244 	ldrgt	r3, [r0], #0x04		/* 0x84 */
    245 	strd	r4, r5, [r1], #0x08
    246 	bgt	1b
    247 	pop	{r4, r5}
    248 	RET
    249 END(bcopy_page)
    250 
    251 /*
    252  * XSCALE version of bzero_page
    253  */
    254 ENTRY(bzero_page)
    255 	mov	r1, #PAGE_SIZE
    256 	mov	r2, #0
    257 	mov	r3, #0
    258 1:	strd	r2, r3, [r0], #8	/* 32 */
    259 	strd	r2, r3, [r0], #8
    260 	strd	r2, r3, [r0], #8
    261 	strd	r2, r3, [r0], #8
    262 	strd	r2, r3, [r0], #8	/* 64 */
    263 	strd	r2, r3, [r0], #8
    264 	strd	r2, r3, [r0], #8
    265 	strd	r2, r3, [r0], #8
    266 	strd	r2, r3, [r0], #8	/* 96 */
    267 	strd	r2, r3, [r0], #8
    268 	strd	r2, r3, [r0], #8
    269 	strd	r2, r3, [r0], #8
    270 	strd	r2, r3, [r0], #8	/* 128 */
    271 	strd	r2, r3, [r0], #8
    272 	strd	r2, r3, [r0], #8
    273 	strd	r2, r3, [r0], #8
    274 	subs	r1, r1, #128
    275 	bne	1b
    276 	RET
    277 END(bzero_page)
    278 #endif	/* __XSCALE__ */
    279