Home | History | Annotate | Line # | Download | only in arm32
bcopy_page.S revision 1.8.124.1
      1  1.8.124.1      tls /*	$NetBSD: bcopy_page.S,v 1.8.124.1 2014/08/20 00:02:45 tls Exp $	*/
      2        1.1     matt 
      3        1.1     matt /*
      4        1.1     matt  * Copyright (c) 1995 Scott Stevens
      5        1.1     matt  * All rights reserved.
      6        1.1     matt  *
      7        1.1     matt  * Redistribution and use in source and binary forms, with or without
      8        1.1     matt  * modification, are permitted provided that the following conditions
      9        1.1     matt  * are met:
     10        1.1     matt  * 1. Redistributions of source code must retain the above copyright
     11        1.1     matt  *    notice, this list of conditions and the following disclaimer.
     12        1.1     matt  * 2. Redistributions in binary form must reproduce the above copyright
     13        1.1     matt  *    notice, this list of conditions and the following disclaimer in the
     14        1.1     matt  *    documentation and/or other materials provided with the distribution.
     15        1.1     matt  * 3. All advertising materials mentioning features or use of this software
     16        1.1     matt  *    must display the following acknowledgement:
     17        1.1     matt  *	This product includes software developed by Scott Stevens.
     18        1.1     matt  * 4. The name of the author may not be used to endorse or promote products
     19        1.1     matt  *    derived from this software without specific prior written permission.
     20        1.1     matt  *
     21        1.1     matt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22        1.1     matt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23        1.1     matt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24        1.1     matt  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25        1.1     matt  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26        1.1     matt  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27        1.1     matt  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28        1.1     matt  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29        1.1     matt  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30        1.1     matt  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31        1.1     matt  *
     32        1.1     matt  * RiscBSD kernel project
     33        1.1     matt  *
     34        1.1     matt  * bcopy_page.S
     35        1.1     matt  *
     36        1.1     matt  * page optimised bcopy and bzero routines
     37        1.1     matt  *
     38        1.1     matt  * Created      : 08/04/95
     39        1.1     matt  */
     40        1.1     matt 
     41        1.1     matt #include <machine/asm.h>
     42        1.1     matt 
     43        1.6  thorpej #include "assym.h"
     44        1.6  thorpej 
     45        1.7      scw #ifndef __XSCALE__
     46        1.7      scw 
     47        1.2    chris /* #define BIG_LOOPS */
     48        1.2    chris 
     49        1.1     matt /*
     50        1.1     matt  * bcopy_page(src, dest)
     51        1.1     matt  *
     52        1.1     matt  * Optimised copy page routine.
     53        1.1     matt  *
     54        1.1     matt  * On entry:
     55        1.1     matt  *   r0 - src address
     56        1.1     matt  *   r1 - dest address
     57        1.1     matt  *
     58        1.1     matt  * Requires:
     59        1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
     60        1.2    chris  *   otherwise.
     61        1.1     matt  */
     62        1.1     matt 
     63        1.3  thorpej #define	CHUNK_SIZE	32
     64        1.3  thorpej 
     65        1.3  thorpej #define	PREFETCH_FIRST_CHUNK	/* nothing */
     66        1.3  thorpej #define	PREFETCH_NEXT_CHUNK	/* nothing */
     67        1.3  thorpej 
     68        1.3  thorpej #ifndef COPY_CHUNK
     69        1.3  thorpej #define	COPY_CHUNK \
     70        1.3  thorpej 	PREFETCH_NEXT_CHUNK ; \
     71        1.3  thorpej 	ldmia	r0!, {r3-r8,ip,lr} ; \
     72        1.3  thorpej 	stmia	r1!, {r3-r8,ip,lr}
     73        1.3  thorpej #endif /* ! COPY_CHUNK */
     74        1.3  thorpej 
     75        1.3  thorpej #ifndef SAVE_REGS
     76  1.8.124.1      tls #define	SAVE_REGS	push	{r4-r8, lr}
     77  1.8.124.1      tls #define	RESTORE_REGS	pop	{r4-r8, pc}
     78        1.3  thorpej #endif
     79        1.3  thorpej 
     80        1.1     matt ENTRY(bcopy_page)
     81        1.3  thorpej 	PREFETCH_FIRST_CHUNK
     82        1.3  thorpej 	SAVE_REGS
     83        1.2    chris #ifdef BIG_LOOPS
     84        1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
     85        1.2    chris #else
     86        1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
     87        1.2    chris #endif
     88        1.1     matt 
     89        1.5  thorpej 1:
     90        1.3  thorpej 	COPY_CHUNK
     91        1.3  thorpej 	COPY_CHUNK
     92        1.3  thorpej 	COPY_CHUNK
     93        1.3  thorpej 	COPY_CHUNK
     94        1.2    chris 
     95        1.2    chris #ifdef BIG_LOOPS
     96        1.2    chris 	/* There is little point making the loop any larger; unless we are
     97        1.2    chris 	   running with the cache off, the load/store overheads will
     98        1.2    chris 	   completely dominate this loop.  */
     99        1.3  thorpej 	COPY_CHUNK
    100        1.3  thorpej 	COPY_CHUNK
    101        1.3  thorpej 	COPY_CHUNK
    102        1.3  thorpej 	COPY_CHUNK
    103        1.3  thorpej 
    104        1.3  thorpej 	COPY_CHUNK
    105        1.3  thorpej 	COPY_CHUNK
    106        1.3  thorpej 	COPY_CHUNK
    107        1.3  thorpej 	COPY_CHUNK
    108        1.3  thorpej 
    109        1.3  thorpej 	COPY_CHUNK
    110        1.3  thorpej 	COPY_CHUNK
    111        1.3  thorpej 	COPY_CHUNK
    112        1.3  thorpej 	COPY_CHUNK
    113        1.2    chris #endif
    114        1.1     matt 	subs	r2, r2, #1
    115        1.5  thorpej 	bne	1b
    116        1.1     matt 
    117        1.3  thorpej 	RESTORE_REGS		/* ...and return. */
    118  1.8.124.1      tls END(bcopy_page)
    119        1.1     matt 
    120        1.1     matt /*
    121        1.1     matt  * bzero_page(dest)
    122        1.1     matt  *
    123        1.1     matt  * Optimised zero page routine.
    124        1.1     matt  *
    125        1.1     matt  * On entry:
    126        1.1     matt  *   r0 - dest address
    127        1.1     matt  *
    128        1.1     matt  * Requires:
    129        1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
    130        1.2    chris  *   otherwise
    131        1.1     matt  */
    132        1.1     matt 
    133        1.1     matt ENTRY(bzero_page)
    134  1.8.124.1      tls 	push	{r4-r8, lr}
    135        1.2    chris #ifdef BIG_LOOPS
    136        1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
    137        1.2    chris #else
    138        1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
    139        1.2    chris #endif
    140        1.1     matt 	mov	r3, #0
    141        1.1     matt 	mov	r4, #0
    142        1.1     matt 	mov	r5, #0
    143        1.1     matt 	mov	r6, #0
    144        1.1     matt 	mov	r7, #0
    145        1.1     matt 	mov	r8, #0
    146        1.2    chris 	mov	ip, #0
    147        1.2    chris 	mov	lr, #0
    148        1.1     matt 
    149        1.5  thorpej 1:
    150        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    151        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    152        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    153        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    154        1.2    chris 
    155        1.2    chris #ifdef BIG_LOOPS
    156        1.2    chris 	/* There is little point making the loop any larger; unless we are
    157        1.2    chris 	   running with the cache off, the load/store overheads will
    158        1.2    chris 	   completely dominate this loop.  */
    159        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    160        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    161        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    162        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    163        1.2    chris 
    164        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    165        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    166        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    167        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    168        1.2    chris 
    169        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    170        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    171        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    172        1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    173        1.2    chris 
    174        1.2    chris #endif
    175        1.1     matt 
    176        1.1     matt 	subs	r2, r2, #1
    177        1.5  thorpej 	bne	1b
    178        1.1     matt 
    179  1.8.124.1      tls 	pop	{r4-r8, pc}
    180  1.8.124.1      tls END(bzero_page)
    181        1.7      scw 
    182        1.7      scw #else	/* __XSCALE__ */
    183        1.7      scw 
    184        1.7      scw /*
    185        1.7      scw  * XSCALE version of bcopy_page
    186        1.7      scw  */
    187        1.7      scw ENTRY(bcopy_page)
    188        1.7      scw 	pld	[r0]
    189  1.8.124.1      tls 	push	{r4, r5}
    190        1.7      scw 	mov	ip, #32
    191        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x00 */
    192        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x04 */
    193        1.7      scw 1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
    194        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x08 */
    195        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x0c */
    196  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    197        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x10 */
    198        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x14 */
    199  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    200        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x18 */
    201        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x1c */
    202  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    203        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x20 */
    204        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x24 */
    205        1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x40 */
    206  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    207        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x28 */
    208        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x2c */
    209  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    210        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x30 */
    211        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x34 */
    212  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    213        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x38 */
    214        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x3c */
    215  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    216        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x40 */
    217        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x44 */
    218        1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x60 */
    219  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    220        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x48 */
    221        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x4c */
    222  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    223        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x50 */
    224        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x54 */
    225  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    226        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x58 */
    227        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x5c */
    228  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    229        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x60 */
    230        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x64 */
    231        1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x80 */
    232  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    233        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x68 */
    234        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x6c */
    235  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    236        1.7      scw 	ldr	r2, [r0], #0x04		/* 0x70 */
    237        1.7      scw 	ldr	r3, [r0], #0x04		/* 0x74 */
    238  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    239        1.7      scw 	ldr	r4, [r0], #0x04		/* 0x78 */
    240        1.7      scw 	ldr	r5, [r0], #0x04		/* 0x7c */
    241  1.8.124.1      tls 	strd	r2, r3, [r1], #0x08
    242        1.7      scw 	subs	ip, ip, #0x01
    243        1.7      scw 	ldrgt	r2, [r0], #0x04		/* 0x80 */
    244        1.7      scw 	ldrgt	r3, [r0], #0x04		/* 0x84 */
    245  1.8.124.1      tls 	strd	r4, r5, [r1], #0x08
    246        1.7      scw 	bgt	1b
    247  1.8.124.1      tls 	pop	{r4, r5}
    248  1.8.124.1      tls 	RET
    249  1.8.124.1      tls END(bcopy_page)
    250        1.7      scw 
    251        1.7      scw /*
    252        1.7      scw  * XSCALE version of bzero_page
    253        1.7      scw  */
    254        1.7      scw ENTRY(bzero_page)
    255        1.7      scw 	mov	r1, #PAGE_SIZE
    256        1.7      scw 	mov	r2, #0
    257        1.7      scw 	mov	r3, #0
    258  1.8.124.1      tls 1:	strd	r2, r3, [r0], #8	/* 32 */
    259  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    260  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    261  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    262  1.8.124.1      tls 	strd	r2, r3, [r0], #8	/* 64 */
    263  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    264  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    265  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    266  1.8.124.1      tls 	strd	r2, r3, [r0], #8	/* 96 */
    267  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    268  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    269  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    270  1.8.124.1      tls 	strd	r2, r3, [r0], #8	/* 128 */
    271  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    272  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    273  1.8.124.1      tls 	strd	r2, r3, [r0], #8
    274        1.7      scw 	subs	r1, r1, #128
    275        1.7      scw 	bne	1b
    276  1.8.124.1      tls 	RET
    277  1.8.124.1      tls END(bzero_page)
    278        1.7      scw #endif	/* __XSCALE__ */
    279