Home | History | Annotate | Line # | Download | only in arm32
bcopy_page.S revision 1.6.2.3
      1  1.6.2.3    skrll /*	$NetBSD: bcopy_page.S,v 1.6.2.3 2004/09/21 13:13:08 skrll Exp $	*/
      2      1.1     matt 
      3      1.1     matt /*
      4      1.1     matt  * Copyright (c) 1995 Scott Stevens
      5      1.1     matt  * All rights reserved.
      6      1.1     matt  *
      7      1.1     matt  * Redistribution and use in source and binary forms, with or without
      8      1.1     matt  * modification, are permitted provided that the following conditions
      9      1.1     matt  * are met:
     10      1.1     matt  * 1. Redistributions of source code must retain the above copyright
     11      1.1     matt  *    notice, this list of conditions and the following disclaimer.
     12      1.1     matt  * 2. Redistributions in binary form must reproduce the above copyright
     13      1.1     matt  *    notice, this list of conditions and the following disclaimer in the
     14      1.1     matt  *    documentation and/or other materials provided with the distribution.
     15      1.1     matt  * 3. All advertising materials mentioning features or use of this software
     16      1.1     matt  *    must display the following acknowledgement:
     17      1.1     matt  *	This product includes software developed by Scott Stevens.
     18      1.1     matt  * 4. The name of the author may not be used to endorse or promote products
     19      1.1     matt  *    derived from this software without specific prior written permission.
     20      1.1     matt  *
     21      1.1     matt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22      1.1     matt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23      1.1     matt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24      1.1     matt  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25      1.1     matt  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26      1.1     matt  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27      1.1     matt  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28      1.1     matt  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29      1.1     matt  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30      1.1     matt  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31      1.1     matt  *
     32      1.1     matt  * RiscBSD kernel project
     33      1.1     matt  *
     34      1.1     matt  * bcopy_page.S
     35      1.1     matt  *
     36      1.1     matt  * page optimised bcopy and bzero routines
     37      1.1     matt  *
     38      1.1     matt  * Created      : 08/04/95
     39      1.1     matt  */
     40      1.1     matt 
     41      1.1     matt #include <machine/asm.h>
     42      1.1     matt 
     43      1.6  thorpej #include "assym.h"
     44      1.6  thorpej 
     45  1.6.2.1    skrll #ifndef __XSCALE__
     46  1.6.2.1    skrll 
     47      1.2    chris /* #define BIG_LOOPS */
     48      1.2    chris 
     49      1.1     matt /*
     50      1.1     matt  * bcopy_page(src, dest)
     51      1.1     matt  *
     52      1.1     matt  * Optimised copy page routine.
     53      1.1     matt  *
     54      1.1     matt  * On entry:
     55      1.1     matt  *   r0 - src address
     56      1.1     matt  *   r1 - dest address
     57      1.1     matt  *
     58      1.1     matt  * Requires:
     59      1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
     60      1.2    chris  *   otherwise.
     61      1.1     matt  */
     62      1.1     matt 
     63      1.3  thorpej #define	CHUNK_SIZE	32
     64      1.3  thorpej 
     65      1.3  thorpej #define	PREFETCH_FIRST_CHUNK	/* nothing */
     66      1.3  thorpej #define	PREFETCH_NEXT_CHUNK	/* nothing */
     67      1.3  thorpej 
     68      1.3  thorpej #ifndef COPY_CHUNK
     69      1.3  thorpej #define	COPY_CHUNK \
     70      1.3  thorpej 	PREFETCH_NEXT_CHUNK ; \
     71      1.3  thorpej 	ldmia	r0!, {r3-r8,ip,lr} ; \
     72      1.3  thorpej 	stmia	r1!, {r3-r8,ip,lr}
     73      1.3  thorpej #endif /* ! COPY_CHUNK */
     74      1.3  thorpej 
     75      1.3  thorpej #ifndef SAVE_REGS
     76      1.3  thorpej #define	SAVE_REGS	stmfd	sp!, {r4-r8, lr}
     77      1.3  thorpej #define	RESTORE_REGS	ldmfd	sp!, {r4-r8, pc}
     78      1.3  thorpej #endif
     79      1.3  thorpej 
     80      1.1     matt ENTRY(bcopy_page)
     81      1.3  thorpej 	PREFETCH_FIRST_CHUNK
     82      1.3  thorpej 	SAVE_REGS
     83      1.2    chris #ifdef BIG_LOOPS
     84      1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
     85      1.2    chris #else
     86      1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
     87      1.2    chris #endif
     88      1.1     matt 
     89      1.5  thorpej 1:
     90      1.3  thorpej 	COPY_CHUNK
     91      1.3  thorpej 	COPY_CHUNK
     92      1.3  thorpej 	COPY_CHUNK
     93      1.3  thorpej 	COPY_CHUNK
     94      1.2    chris 
     95      1.2    chris #ifdef BIG_LOOPS
     96      1.2    chris 	/* There is little point making the loop any larger; unless we are
     97      1.2    chris 	   running with the cache off, the load/store overheads will
     98      1.2    chris 	   completely dominate this loop.  */
     99      1.3  thorpej 	COPY_CHUNK
    100      1.3  thorpej 	COPY_CHUNK
    101      1.3  thorpej 	COPY_CHUNK
    102      1.3  thorpej 	COPY_CHUNK
    103      1.3  thorpej 
    104      1.3  thorpej 	COPY_CHUNK
    105      1.3  thorpej 	COPY_CHUNK
    106      1.3  thorpej 	COPY_CHUNK
    107      1.3  thorpej 	COPY_CHUNK
    108      1.3  thorpej 
    109      1.3  thorpej 	COPY_CHUNK
    110      1.3  thorpej 	COPY_CHUNK
    111      1.3  thorpej 	COPY_CHUNK
    112      1.3  thorpej 	COPY_CHUNK
    113      1.2    chris #endif
    114      1.1     matt 	subs	r2, r2, #1
    115      1.5  thorpej 	bne	1b
    116      1.1     matt 
    117      1.3  thorpej 	RESTORE_REGS		/* ...and return. */
    118      1.1     matt 
    119      1.1     matt /*
    120      1.1     matt  * bzero_page(dest)
    121      1.1     matt  *
    122      1.1     matt  * Optimised zero page routine.
    123      1.1     matt  *
    124      1.1     matt  * On entry:
    125      1.1     matt  *   r0 - dest address
    126      1.1     matt  *
    127      1.1     matt  * Requires:
    128      1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
    129      1.2    chris  *   otherwise
    130      1.1     matt  */
    131      1.1     matt 
    132      1.1     matt ENTRY(bzero_page)
    133      1.2    chris 	stmfd	sp!, {r4-r8, lr}
    134      1.2    chris #ifdef BIG_LOOPS
    135      1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
    136      1.2    chris #else
    137      1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
    138      1.2    chris #endif
    139      1.1     matt 	mov	r3, #0
    140      1.1     matt 	mov	r4, #0
    141      1.1     matt 	mov	r5, #0
    142      1.1     matt 	mov	r6, #0
    143      1.1     matt 	mov	r7, #0
    144      1.1     matt 	mov	r8, #0
    145      1.2    chris 	mov	ip, #0
    146      1.2    chris 	mov	lr, #0
    147      1.1     matt 
    148      1.5  thorpej 1:
    149      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    150      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    151      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    152      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    153      1.2    chris 
    154      1.2    chris #ifdef BIG_LOOPS
    155      1.2    chris 	/* There is little point making the loop any larger; unless we are
    156      1.2    chris 	   running with the cache off, the load/store overheads will
    157      1.2    chris 	   completely dominate this loop.  */
    158      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    159      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    160      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    161      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    162      1.2    chris 
    163      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    164      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    165      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    166      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    167      1.2    chris 
    168      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    169      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    170      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    171      1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    172      1.2    chris 
    173      1.2    chris #endif
    174      1.1     matt 
    175      1.1     matt 	subs	r2, r2, #1
    176      1.5  thorpej 	bne	1b
    177      1.1     matt 
    178      1.2    chris 	ldmfd	sp!, {r4-r8, pc}
    179  1.6.2.1    skrll 
    180  1.6.2.1    skrll #else	/* __XSCALE__ */
    181  1.6.2.1    skrll 
    182  1.6.2.1    skrll /*
    183  1.6.2.1    skrll  * XSCALE version of bcopy_page
    184  1.6.2.1    skrll  */
    185  1.6.2.1    skrll ENTRY(bcopy_page)
    186  1.6.2.1    skrll 	pld	[r0]
    187  1.6.2.1    skrll 	stmfd	sp!, {r4, r5}
    188  1.6.2.1    skrll 	mov	ip, #32
    189  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x00 */
    190  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x04 */
    191  1.6.2.1    skrll 1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
    192  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x08 */
    193  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x0c */
    194  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    195  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x10 */
    196  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x14 */
    197  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    198  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x18 */
    199  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x1c */
    200  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    201  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x20 */
    202  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x24 */
    203  1.6.2.1    skrll 	pld	[r0, #0x18]		/* Prefetch 0x40 */
    204  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    205  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x28 */
    206  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x2c */
    207  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    208  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x30 */
    209  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x34 */
    210  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    211  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x38 */
    212  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x3c */
    213  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    214  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x40 */
    215  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x44 */
    216  1.6.2.1    skrll 	pld	[r0, #0x18]		/* Prefetch 0x60 */
    217  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    218  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x48 */
    219  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x4c */
    220  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    221  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x50 */
    222  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x54 */
    223  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    224  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x58 */
    225  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x5c */
    226  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    227  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x60 */
    228  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x64 */
    229  1.6.2.1    skrll 	pld	[r0, #0x18]		/* Prefetch 0x80 */
    230  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    231  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x68 */
    232  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x6c */
    233  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    234  1.6.2.1    skrll 	ldr	r2, [r0], #0x04		/* 0x70 */
    235  1.6.2.1    skrll 	ldr	r3, [r0], #0x04		/* 0x74 */
    236  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    237  1.6.2.1    skrll 	ldr	r4, [r0], #0x04		/* 0x78 */
    238  1.6.2.1    skrll 	ldr	r5, [r0], #0x04		/* 0x7c */
    239  1.6.2.1    skrll 	strd	r2, [r1], #0x08
    240  1.6.2.1    skrll 	subs	ip, ip, #0x01
    241  1.6.2.1    skrll 	ldrgt	r2, [r0], #0x04		/* 0x80 */
    242  1.6.2.1    skrll 	ldrgt	r3, [r0], #0x04		/* 0x84 */
    243  1.6.2.1    skrll 	strd	r4, [r1], #0x08
    244  1.6.2.1    skrll 	bgt	1b
    245  1.6.2.1    skrll 	ldmfd	sp!, {r4, r5}
    246  1.6.2.1    skrll 	mov	pc, lr
    247  1.6.2.1    skrll 
    248  1.6.2.1    skrll /*
    249  1.6.2.1    skrll  * XSCALE version of bzero_page
    250  1.6.2.1    skrll  */
    251  1.6.2.1    skrll ENTRY(bzero_page)
    252  1.6.2.1    skrll 	mov	r1, #PAGE_SIZE
    253  1.6.2.1    skrll 	mov	r2, #0
    254  1.6.2.1    skrll 	mov	r3, #0
    255  1.6.2.1    skrll 1:	strd	r2, [r0], #8		/* 32 */
    256  1.6.2.1    skrll 	strd	r2, [r0], #8
    257  1.6.2.1    skrll 	strd	r2, [r0], #8
    258  1.6.2.1    skrll 	strd	r2, [r0], #8
    259  1.6.2.1    skrll 	strd	r2, [r0], #8		/* 64 */
    260  1.6.2.1    skrll 	strd	r2, [r0], #8
    261  1.6.2.1    skrll 	strd	r2, [r0], #8
    262  1.6.2.1    skrll 	strd	r2, [r0], #8
    263  1.6.2.1    skrll 	strd	r2, [r0], #8		/* 96 */
    264  1.6.2.1    skrll 	strd	r2, [r0], #8
    265  1.6.2.1    skrll 	strd	r2, [r0], #8
    266  1.6.2.1    skrll 	strd	r2, [r0], #8
    267  1.6.2.1    skrll 	strd	r2, [r0], #8		/* 128 */
    268  1.6.2.1    skrll 	strd	r2, [r0], #8
    269  1.6.2.1    skrll 	strd	r2, [r0], #8
    270  1.6.2.1    skrll 	strd	r2, [r0], #8
    271  1.6.2.1    skrll 	subs	r1, r1, #128
    272  1.6.2.1    skrll 	bne	1b
    273  1.6.2.1    skrll 	mov	pc, lr
    274  1.6.2.1    skrll #endif	/* __XSCALE__ */
    275