Home | History | Annotate | Line # | Download | only in arm32
bcopy_page.S revision 1.6.2.3
      1 /*	$NetBSD: bcopy_page.S,v 1.6.2.3 2004/09/21 13:13:08 skrll Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Scott Stevens
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by Scott Stevens.
     18  * 4. The name of the author may not be used to endorse or promote products
     19  *    derived from this software without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  *
     32  * RiscBSD kernel project
     33  *
     34  * bcopy_page.S
     35  *
     36  * page optimised bcopy and bzero routines
     37  *
     38  * Created      : 08/04/95
     39  */
     40 
     41 #include <machine/asm.h>
     42 
     43 #include "assym.h"
     44 
     45 #ifndef __XSCALE__
     46 
     47 /* #define BIG_LOOPS */
     48 
     49 /*
     50  * bcopy_page(src, dest)
     51  *
     52  * Optimised copy page routine.
     53  *
     54  * On entry:
     55  *   r0 - src address
     56  *   r1 - dest address
     57  *
     58  * Requires:
     59  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
     60  *   otherwise.
     61  */
     62 
     63 #define	CHUNK_SIZE	32
     64 
     65 #define	PREFETCH_FIRST_CHUNK	/* nothing */
     66 #define	PREFETCH_NEXT_CHUNK	/* nothing */
     67 
     68 #ifndef COPY_CHUNK
     69 #define	COPY_CHUNK \
     70 	PREFETCH_NEXT_CHUNK ; \
     71 	ldmia	r0!, {r3-r8,ip,lr} ; \
     72 	stmia	r1!, {r3-r8,ip,lr}
     73 #endif /* ! COPY_CHUNK */
     74 
     75 #ifndef SAVE_REGS
     76 #define	SAVE_REGS	stmfd	sp!, {r4-r8, lr}
     77 #define	RESTORE_REGS	ldmfd	sp!, {r4-r8, pc}
     78 #endif
     79 
     80 ENTRY(bcopy_page)
     81 	PREFETCH_FIRST_CHUNK
     82 	SAVE_REGS
     83 #ifdef BIG_LOOPS
     84 	mov	r2, #(PAGE_SIZE >> 9)
     85 #else
     86 	mov	r2, #(PAGE_SIZE >> 7)
     87 #endif
     88 
     89 1:
     90 	COPY_CHUNK
     91 	COPY_CHUNK
     92 	COPY_CHUNK
     93 	COPY_CHUNK
     94 
     95 #ifdef BIG_LOOPS
     96 	/* There is little point making the loop any larger; unless we are
     97 	   running with the cache off, the load/store overheads will
     98 	   completely dominate this loop.  */
     99 	COPY_CHUNK
    100 	COPY_CHUNK
    101 	COPY_CHUNK
    102 	COPY_CHUNK
    103 
    104 	COPY_CHUNK
    105 	COPY_CHUNK
    106 	COPY_CHUNK
    107 	COPY_CHUNK
    108 
    109 	COPY_CHUNK
    110 	COPY_CHUNK
    111 	COPY_CHUNK
    112 	COPY_CHUNK
    113 #endif
    114 	subs	r2, r2, #1
    115 	bne	1b
    116 
    117 	RESTORE_REGS		/* ...and return. */
    118 
    119 /*
    120  * bzero_page(dest)
    121  *
    122  * Optimised zero page routine.
    123  *
    124  * On entry:
    125  *   r0 - dest address
    126  *
    127  * Requires:
    128  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
    129  *   otherwise
    130  */
    131 
    132 ENTRY(bzero_page)
    133 	stmfd	sp!, {r4-r8, lr}
    134 #ifdef BIG_LOOPS
    135 	mov	r2, #(PAGE_SIZE >> 9)
    136 #else
    137 	mov	r2, #(PAGE_SIZE >> 7)
    138 #endif
    139 	mov	r3, #0
    140 	mov	r4, #0
    141 	mov	r5, #0
    142 	mov	r6, #0
    143 	mov	r7, #0
    144 	mov	r8, #0
    145 	mov	ip, #0
    146 	mov	lr, #0
    147 
    148 1:
    149 	stmia	r0!, {r3-r8,ip,lr}
    150 	stmia	r0!, {r3-r8,ip,lr}
    151 	stmia	r0!, {r3-r8,ip,lr}
    152 	stmia	r0!, {r3-r8,ip,lr}
    153 
    154 #ifdef BIG_LOOPS
    155 	/* There is little point making the loop any larger; unless we are
    156 	   running with the cache off, the load/store overheads will
    157 	   completely dominate this loop.  */
    158 	stmia	r0!, {r3-r8,ip,lr}
    159 	stmia	r0!, {r3-r8,ip,lr}
    160 	stmia	r0!, {r3-r8,ip,lr}
    161 	stmia	r0!, {r3-r8,ip,lr}
    162 
    163 	stmia	r0!, {r3-r8,ip,lr}
    164 	stmia	r0!, {r3-r8,ip,lr}
    165 	stmia	r0!, {r3-r8,ip,lr}
    166 	stmia	r0!, {r3-r8,ip,lr}
    167 
    168 	stmia	r0!, {r3-r8,ip,lr}
    169 	stmia	r0!, {r3-r8,ip,lr}
    170 	stmia	r0!, {r3-r8,ip,lr}
    171 	stmia	r0!, {r3-r8,ip,lr}
    172 
    173 #endif
    174 
    175 	subs	r2, r2, #1
    176 	bne	1b
    177 
    178 	ldmfd	sp!, {r4-r8, pc}
    179 
    180 #else	/* __XSCALE__ */
    181 
    182 /*
    183  * XSCALE version of bcopy_page
    184  */
    185 ENTRY(bcopy_page)
    186 	pld	[r0]
    187 	stmfd	sp!, {r4, r5}
    188 	mov	ip, #32
    189 	ldr	r2, [r0], #0x04		/* 0x00 */
    190 	ldr	r3, [r0], #0x04		/* 0x04 */
    191 1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
    192 	ldr	r4, [r0], #0x04		/* 0x08 */
    193 	ldr	r5, [r0], #0x04		/* 0x0c */
    194 	strd	r2, [r1], #0x08
    195 	ldr	r2, [r0], #0x04		/* 0x10 */
    196 	ldr	r3, [r0], #0x04		/* 0x14 */
    197 	strd	r4, [r1], #0x08
    198 	ldr	r4, [r0], #0x04		/* 0x18 */
    199 	ldr	r5, [r0], #0x04		/* 0x1c */
    200 	strd	r2, [r1], #0x08
    201 	ldr	r2, [r0], #0x04		/* 0x20 */
    202 	ldr	r3, [r0], #0x04		/* 0x24 */
    203 	pld	[r0, #0x18]		/* Prefetch 0x40 */
    204 	strd	r4, [r1], #0x08
    205 	ldr	r4, [r0], #0x04		/* 0x28 */
    206 	ldr	r5, [r0], #0x04		/* 0x2c */
    207 	strd	r2, [r1], #0x08
    208 	ldr	r2, [r0], #0x04		/* 0x30 */
    209 	ldr	r3, [r0], #0x04		/* 0x34 */
    210 	strd	r4, [r1], #0x08
    211 	ldr	r4, [r0], #0x04		/* 0x38 */
    212 	ldr	r5, [r0], #0x04		/* 0x3c */
    213 	strd	r2, [r1], #0x08
    214 	ldr	r2, [r0], #0x04		/* 0x40 */
    215 	ldr	r3, [r0], #0x04		/* 0x44 */
    216 	pld	[r0, #0x18]		/* Prefetch 0x60 */
    217 	strd	r4, [r1], #0x08
    218 	ldr	r4, [r0], #0x04		/* 0x48 */
    219 	ldr	r5, [r0], #0x04		/* 0x4c */
    220 	strd	r2, [r1], #0x08
    221 	ldr	r2, [r0], #0x04		/* 0x50 */
    222 	ldr	r3, [r0], #0x04		/* 0x54 */
    223 	strd	r4, [r1], #0x08
    224 	ldr	r4, [r0], #0x04		/* 0x58 */
    225 	ldr	r5, [r0], #0x04		/* 0x5c */
    226 	strd	r2, [r1], #0x08
    227 	ldr	r2, [r0], #0x04		/* 0x60 */
    228 	ldr	r3, [r0], #0x04		/* 0x64 */
    229 	pld	[r0, #0x18]		/* Prefetch 0x80 */
    230 	strd	r4, [r1], #0x08
    231 	ldr	r4, [r0], #0x04		/* 0x68 */
    232 	ldr	r5, [r0], #0x04		/* 0x6c */
    233 	strd	r2, [r1], #0x08
    234 	ldr	r2, [r0], #0x04		/* 0x70 */
    235 	ldr	r3, [r0], #0x04		/* 0x74 */
    236 	strd	r4, [r1], #0x08
    237 	ldr	r4, [r0], #0x04		/* 0x78 */
    238 	ldr	r5, [r0], #0x04		/* 0x7c */
    239 	strd	r2, [r1], #0x08
    240 	subs	ip, ip, #0x01
    241 	ldrgt	r2, [r0], #0x04		/* 0x80 */
    242 	ldrgt	r3, [r0], #0x04		/* 0x84 */
    243 	strd	r4, [r1], #0x08
    244 	bgt	1b
    245 	ldmfd	sp!, {r4, r5}
    246 	mov	pc, lr
    247 
    248 /*
    249  * XSCALE version of bzero_page
    250  */
    251 ENTRY(bzero_page)
    252 	mov	r1, #PAGE_SIZE
    253 	mov	r2, #0
    254 	mov	r3, #0
    255 1:	strd	r2, [r0], #8		/* 32 */
    256 	strd	r2, [r0], #8
    257 	strd	r2, [r0], #8
    258 	strd	r2, [r0], #8
    259 	strd	r2, [r0], #8		/* 64 */
    260 	strd	r2, [r0], #8
    261 	strd	r2, [r0], #8
    262 	strd	r2, [r0], #8
    263 	strd	r2, [r0], #8		/* 96 */
    264 	strd	r2, [r0], #8
    265 	strd	r2, [r0], #8
    266 	strd	r2, [r0], #8
    267 	strd	r2, [r0], #8		/* 128 */
    268 	strd	r2, [r0], #8
    269 	strd	r2, [r0], #8
    270 	strd	r2, [r0], #8
    271 	subs	r1, r1, #128
    272 	bne	1b
    273 	mov	pc, lr
    274 #endif	/* __XSCALE__ */
    275