Home | History | Annotate | Line # | Download | only in arm32
      1  1.10    joerg /*	$NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $	*/
      2   1.1     matt 
      3   1.1     matt /*
      4   1.1     matt  * Copyright (c) 1995 Scott Stevens
      5   1.1     matt  * All rights reserved.
      6   1.1     matt  *
      7   1.1     matt  * Redistribution and use in source and binary forms, with or without
      8   1.1     matt  * modification, are permitted provided that the following conditions
      9   1.1     matt  * are met:
     10   1.1     matt  * 1. Redistributions of source code must retain the above copyright
     11   1.1     matt  *    notice, this list of conditions and the following disclaimer.
     12   1.1     matt  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1     matt  *    notice, this list of conditions and the following disclaimer in the
     14   1.1     matt  *    documentation and/or other materials provided with the distribution.
     15   1.1     matt  * 3. All advertising materials mentioning features or use of this software
     16   1.1     matt  *    must display the following acknowledgement:
     17   1.1     matt  *	This product includes software developed by Scott Stevens.
     18   1.1     matt  * 4. The name of the author may not be used to endorse or promote products
     19   1.1     matt  *    derived from this software without specific prior written permission.
     20   1.1     matt  *
     21   1.1     matt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22   1.1     matt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23   1.1     matt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24   1.1     matt  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25   1.1     matt  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26   1.1     matt  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27   1.1     matt  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28   1.1     matt  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29   1.1     matt  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30   1.1     matt  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31   1.1     matt  *
     32   1.1     matt  * RiscBSD kernel project
     33   1.1     matt  *
     34   1.1     matt  * bcopy_page.S
     35   1.1     matt  *
     36   1.1     matt  * page optimised bcopy and bzero routines
     37   1.1     matt  *
     38   1.1     matt  * Created      : 08/04/95
     39   1.1     matt  */
     40   1.1     matt 
     41   1.1     matt #include <machine/asm.h>
     42   1.1     matt 
     43   1.6  thorpej #include "assym.h"
     44   1.6  thorpej 
     45   1.7      scw #ifndef __XSCALE__
     46   1.7      scw 
     47   1.2    chris /* #define BIG_LOOPS */
     48   1.2    chris 
     49   1.1     matt /*
     50   1.1     matt  * bcopy_page(src, dest)
     51   1.1     matt  *
     52   1.1     matt  * Optimised copy page routine.
     53   1.1     matt  *
     54   1.1     matt  * On entry:
     55   1.1     matt  *   r0 - src address
     56   1.1     matt  *   r1 - dest address
     57   1.1     matt  *
     58   1.1     matt  * Requires:
     59   1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
     60   1.2    chris  *   otherwise.
     61   1.1     matt  */
     62   1.1     matt 
     63   1.3  thorpej #define	CHUNK_SIZE	32
     64   1.3  thorpej 
     65   1.3  thorpej #define	PREFETCH_FIRST_CHUNK	/* nothing */
     66   1.3  thorpej #define	PREFETCH_NEXT_CHUNK	/* nothing */
     67   1.3  thorpej 
     68   1.3  thorpej #ifndef COPY_CHUNK
     69   1.3  thorpej #define	COPY_CHUNK \
     70   1.3  thorpej 	PREFETCH_NEXT_CHUNK ; \
     71   1.3  thorpej 	ldmia	r0!, {r3-r8,ip,lr} ; \
     72   1.3  thorpej 	stmia	r1!, {r3-r8,ip,lr}
     73   1.3  thorpej #endif /* ! COPY_CHUNK */
     74   1.3  thorpej 
     75   1.3  thorpej #ifndef SAVE_REGS
     76   1.9     matt #define	SAVE_REGS	push	{r4-r8, lr}
     77   1.9     matt #define	RESTORE_REGS	pop	{r4-r8, pc}
     78   1.3  thorpej #endif
     79   1.3  thorpej 
     80   1.1     matt ENTRY(bcopy_page)
     81   1.3  thorpej 	PREFETCH_FIRST_CHUNK
     82   1.3  thorpej 	SAVE_REGS
     83   1.2    chris #ifdef BIG_LOOPS
     84   1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
     85   1.2    chris #else
     86   1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
     87   1.2    chris #endif
     88   1.1     matt 
     89   1.5  thorpej 1:
     90   1.3  thorpej 	COPY_CHUNK
     91   1.3  thorpej 	COPY_CHUNK
     92   1.3  thorpej 	COPY_CHUNK
     93   1.3  thorpej 	COPY_CHUNK
     94   1.2    chris 
     95   1.2    chris #ifdef BIG_LOOPS
     96   1.2    chris 	/* There is little point making the loop any larger; unless we are
     97   1.2    chris 	   running with the cache off, the load/store overheads will
     98   1.2    chris 	   completely dominate this loop.  */
     99   1.3  thorpej 	COPY_CHUNK
    100   1.3  thorpej 	COPY_CHUNK
    101   1.3  thorpej 	COPY_CHUNK
    102   1.3  thorpej 	COPY_CHUNK
    103   1.3  thorpej 
    104   1.3  thorpej 	COPY_CHUNK
    105   1.3  thorpej 	COPY_CHUNK
    106   1.3  thorpej 	COPY_CHUNK
    107   1.3  thorpej 	COPY_CHUNK
    108   1.3  thorpej 
    109   1.3  thorpej 	COPY_CHUNK
    110   1.3  thorpej 	COPY_CHUNK
    111   1.3  thorpej 	COPY_CHUNK
    112   1.3  thorpej 	COPY_CHUNK
    113   1.2    chris #endif
    114   1.1     matt 	subs	r2, r2, #1
    115   1.5  thorpej 	bne	1b
    116   1.1     matt 
    117   1.3  thorpej 	RESTORE_REGS		/* ...and return. */
    118   1.9     matt END(bcopy_page)
    119   1.1     matt 
    120   1.1     matt /*
    121   1.1     matt  * bzero_page(dest)
    122   1.1     matt  *
    123   1.1     matt  * Optimised zero page routine.
    124   1.1     matt  *
    125   1.1     matt  * On entry:
    126   1.1     matt  *   r0 - dest address
    127   1.1     matt  *
    128   1.1     matt  * Requires:
    129   1.6  thorpej  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
    130   1.2    chris  *   otherwise
    131   1.1     matt  */
    132   1.1     matt 
    133   1.1     matt ENTRY(bzero_page)
    134   1.9     matt 	push	{r4-r8, lr}
    135   1.2    chris #ifdef BIG_LOOPS
    136   1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 9)
    137   1.2    chris #else
    138   1.6  thorpej 	mov	r2, #(PAGE_SIZE >> 7)
    139   1.2    chris #endif
    140   1.1     matt 	mov	r3, #0
    141   1.1     matt 	mov	r4, #0
    142   1.1     matt 	mov	r5, #0
    143   1.1     matt 	mov	r6, #0
    144   1.1     matt 	mov	r7, #0
    145   1.1     matt 	mov	r8, #0
    146   1.2    chris 	mov	ip, #0
    147   1.2    chris 	mov	lr, #0
    148   1.1     matt 
    149   1.5  thorpej 1:
    150   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    151   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    152   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    153   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    154   1.2    chris 
    155   1.2    chris #ifdef BIG_LOOPS
    156   1.2    chris 	/* There is little point making the loop any larger; unless we are
    157   1.2    chris 	   running with the cache off, the load/store overheads will
    158   1.2    chris 	   completely dominate this loop.  */
    159   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    160   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    161   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    162   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    163   1.2    chris 
    164   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    165   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    166   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    167   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    168   1.2    chris 
    169   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    170   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    171   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    172   1.2    chris 	stmia	r0!, {r3-r8,ip,lr}
    173   1.2    chris 
    174   1.2    chris #endif
    175   1.1     matt 
    176   1.1     matt 	subs	r2, r2, #1
    177   1.5  thorpej 	bne	1b
    178   1.1     matt 
    179   1.9     matt 	pop	{r4-r8, pc}
    180   1.9     matt END(bzero_page)
    181   1.7      scw 
    182   1.7      scw #else	/* __XSCALE__ */
    183   1.7      scw 
    184   1.7      scw /*
    185   1.7      scw  * XSCALE version of bcopy_page
    186   1.7      scw  */
    187   1.7      scw ENTRY(bcopy_page)
    188   1.7      scw 	pld	[r0]
    189   1.9     matt 	push	{r4, r5}
    190   1.7      scw 	mov	ip, #32
    191   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x00 */
    192   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x04 */
    193   1.7      scw 1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
    194   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x08 */
    195   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x0c */
    196  1.10    joerg 	strd	r2, r3, [r1], #0x08
    197   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x10 */
    198   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x14 */
    199  1.10    joerg 	strd	r4, r5, [r1], #0x08
    200   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x18 */
    201   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x1c */
    202  1.10    joerg 	strd	r2, r3, [r1], #0x08
    203   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x20 */
    204   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x24 */
    205   1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x40 */
    206  1.10    joerg 	strd	r4, r5, [r1], #0x08
    207   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x28 */
    208   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x2c */
    209  1.10    joerg 	strd	r2, r3, [r1], #0x08
    210   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x30 */
    211   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x34 */
    212  1.10    joerg 	strd	r4, r5, [r1], #0x08
    213   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x38 */
    214   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x3c */
    215  1.10    joerg 	strd	r2, r3, [r1], #0x08
    216   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x40 */
    217   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x44 */
    218   1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x60 */
    219  1.10    joerg 	strd	r4, r5, [r1], #0x08
    220   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x48 */
    221   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x4c */
    222  1.10    joerg 	strd	r2, r3, [r1], #0x08
    223   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x50 */
    224   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x54 */
    225  1.10    joerg 	strd	r4, r5, [r1], #0x08
    226   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x58 */
    227   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x5c */
    228  1.10    joerg 	strd	r2, r3, [r1], #0x08
    229   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x60 */
    230   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x64 */
    231   1.7      scw 	pld	[r0, #0x18]		/* Prefetch 0x80 */
    232  1.10    joerg 	strd	r4, r5, [r1], #0x08
    233   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x68 */
    234   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x6c */
    235  1.10    joerg 	strd	r2, r3, [r1], #0x08
    236   1.7      scw 	ldr	r2, [r0], #0x04		/* 0x70 */
    237   1.7      scw 	ldr	r3, [r0], #0x04		/* 0x74 */
    238  1.10    joerg 	strd	r4, r5, [r1], #0x08
    239   1.7      scw 	ldr	r4, [r0], #0x04		/* 0x78 */
    240   1.7      scw 	ldr	r5, [r0], #0x04		/* 0x7c */
    241  1.10    joerg 	strd	r2, r3, [r1], #0x08
    242   1.7      scw 	subs	ip, ip, #0x01
    243   1.7      scw 	ldrgt	r2, [r0], #0x04		/* 0x80 */
    244   1.7      scw 	ldrgt	r3, [r0], #0x04		/* 0x84 */
    245  1.10    joerg 	strd	r4, r5, [r1], #0x08
    246   1.7      scw 	bgt	1b
    247   1.9     matt 	pop	{r4, r5}
    248   1.9     matt 	RET
    249   1.9     matt END(bcopy_page)
    250   1.7      scw 
    251   1.7      scw /*
    252   1.7      scw  * XSCALE version of bzero_page
    253   1.7      scw  */
    254   1.7      scw ENTRY(bzero_page)
    255   1.7      scw 	mov	r1, #PAGE_SIZE
    256   1.7      scw 	mov	r2, #0
    257   1.7      scw 	mov	r3, #0
    258  1.10    joerg 1:	strd	r2, r3, [r0], #8	/* 32 */
    259  1.10    joerg 	strd	r2, r3, [r0], #8
    260  1.10    joerg 	strd	r2, r3, [r0], #8
    261  1.10    joerg 	strd	r2, r3, [r0], #8
    262  1.10    joerg 	strd	r2, r3, [r0], #8	/* 64 */
    263  1.10    joerg 	strd	r2, r3, [r0], #8
    264  1.10    joerg 	strd	r2, r3, [r0], #8
    265  1.10    joerg 	strd	r2, r3, [r0], #8
    266  1.10    joerg 	strd	r2, r3, [r0], #8	/* 96 */
    267  1.10    joerg 	strd	r2, r3, [r0], #8
    268  1.10    joerg 	strd	r2, r3, [r0], #8
    269  1.10    joerg 	strd	r2, r3, [r0], #8
    270  1.10    joerg 	strd	r2, r3, [r0], #8	/* 128 */
    271  1.10    joerg 	strd	r2, r3, [r0], #8
    272  1.10    joerg 	strd	r2, r3, [r0], #8
    273  1.10    joerg 	strd	r2, r3, [r0], #8
    274   1.7      scw 	subs	r1, r1, #128
    275   1.7      scw 	bne	1b
    276   1.9     matt 	RET
    277   1.9     matt END(bzero_page)
    278   1.7      scw #endif	/* __XSCALE__ */
    279