bcopy_page.S revision 1.5 1 /* $NetBSD: bcopy_page.S,v 1.5 2002/08/17 16:36:33 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Scott Stevens
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Scott Stevens.
18 * 4. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * RiscBSD kernel project
33 *
34 * bcopy_page.S
35 *
36 * page optimised bcopy and bzero routines
37 *
38 * Created : 08/04/95
39 */
40
41 #include <machine/param.h>
42 #include <machine/asm.h>
43
44 /* #define BIG_LOOPS */
45
46 /*
47 * bcopy_page(src, dest)
48 *
49 * Optimised copy page routine.
50 *
51 * On entry:
52 * r0 - src address
53 * r1 - dest address
54 *
55 * Requires:
56 * number of bytes per page (NBPG) is a multiple of 512 (BIG_LOOPS), 128
57 * otherwise.
58 */
59
60 #define CHUNK_SIZE 32
61
62 #ifdef __XSCALE__
63 /* Conveniently, the chunk size is the XScale cache line size. */
64 #define PREFETCH_FIRST_CHUNK pld [r0]
65 #define PREFETCH_NEXT_CHUNK pld [r0, #(CHUNK_SIZE)]
66 #else
67 #define PREFETCH_FIRST_CHUNK /* nothing */
68 #define PREFETCH_NEXT_CHUNK /* nothing */
69 #endif
70
71 #ifndef COPY_CHUNK
72 #define COPY_CHUNK \
73 PREFETCH_NEXT_CHUNK ; \
74 ldmia r0!, {r3-r8,ip,lr} ; \
75 stmia r1!, {r3-r8,ip,lr}
76 #endif /* ! COPY_CHUNK */
77
78 #ifndef SAVE_REGS
79 #define SAVE_REGS stmfd sp!, {r4-r8, lr}
80 #define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
81 #endif
82
83 ENTRY(bcopy_page)
84 PREFETCH_FIRST_CHUNK
85 SAVE_REGS
86 #ifdef BIG_LOOPS
87 mov r2, #(NBPG >> 9)
88 #else
89 mov r2, #(NBPG >> 7)
90 #endif
91
92 1:
93 COPY_CHUNK
94 COPY_CHUNK
95 COPY_CHUNK
96 COPY_CHUNK
97
98 #ifdef BIG_LOOPS
99 /* There is little point making the loop any larger; unless we are
100 running with the cache off, the load/store overheads will
101 completely dominate this loop. */
102 COPY_CHUNK
103 COPY_CHUNK
104 COPY_CHUNK
105 COPY_CHUNK
106
107 COPY_CHUNK
108 COPY_CHUNK
109 COPY_CHUNK
110 COPY_CHUNK
111
112 COPY_CHUNK
113 COPY_CHUNK
114 COPY_CHUNK
115 COPY_CHUNK
116 #endif
117 subs r2, r2, #1
118 bne 1b
119
120 RESTORE_REGS /* ...and return. */
121
122 /*
123 * bzero_page(dest)
124 *
125 * Optimised zero page routine.
126 *
127 * On entry:
128 * r0 - dest address
129 *
130 * Requires:
131 * number of bytes per page (NBPG) is a multiple of 512 (BIG_LOOPS), 128
132 * otherwise
133 */
134
135 ENTRY(bzero_page)
136 stmfd sp!, {r4-r8, lr}
137 #ifdef BIG_LOOPS
138 mov r2, #(NBPG >> 9)
139 #else
140 mov r2, #(NBPG >> 7)
141 #endif
142 mov r3, #0
143 mov r4, #0
144 mov r5, #0
145 mov r6, #0
146 mov r7, #0
147 mov r8, #0
148 mov ip, #0
149 mov lr, #0
150
151 1:
152 stmia r0!, {r3-r8,ip,lr}
153 stmia r0!, {r3-r8,ip,lr}
154 stmia r0!, {r3-r8,ip,lr}
155 stmia r0!, {r3-r8,ip,lr}
156
157 #ifdef BIG_LOOPS
158 /* There is little point making the loop any larger; unless we are
159 running with the cache off, the load/store overheads will
160 completely dominate this loop. */
161 stmia r0!, {r3-r8,ip,lr}
162 stmia r0!, {r3-r8,ip,lr}
163 stmia r0!, {r3-r8,ip,lr}
164 stmia r0!, {r3-r8,ip,lr}
165
166 stmia r0!, {r3-r8,ip,lr}
167 stmia r0!, {r3-r8,ip,lr}
168 stmia r0!, {r3-r8,ip,lr}
169 stmia r0!, {r3-r8,ip,lr}
170
171 stmia r0!, {r3-r8,ip,lr}
172 stmia r0!, {r3-r8,ip,lr}
173 stmia r0!, {r3-r8,ip,lr}
174 stmia r0!, {r3-r8,ip,lr}
175
176 #endif
177
178 subs r2, r2, #1
179 bne 1b
180
181 ldmfd sp!, {r4-r8, pc}
182