bcopy_page.S revision 1.6.2.3 1 /* $NetBSD: bcopy_page.S,v 1.6.2.3 2004/09/21 13:13:08 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1995 Scott Stevens
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Scott Stevens.
18 * 4. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * RiscBSD kernel project
33 *
34 * bcopy_page.S
35 *
36 * page optimised bcopy and bzero routines
37 *
38 * Created : 08/04/95
39 */
40
41 #include <machine/asm.h>
42
43 #include "assym.h"
44
45 #ifndef __XSCALE__
46
47 /* #define BIG_LOOPS */
48
49 /*
50 * bcopy_page(src, dest)
51 *
52 * Optimised copy page routine.
53 *
54 * On entry:
55 * r0 - src address
56 * r1 - dest address
57 *
58 * Requires:
59 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
60 * otherwise.
61 */
62
63 #define CHUNK_SIZE 32
64
65 #define PREFETCH_FIRST_CHUNK /* nothing */
66 #define PREFETCH_NEXT_CHUNK /* nothing */
67
68 #ifndef COPY_CHUNK
69 #define COPY_CHUNK \
70 PREFETCH_NEXT_CHUNK ; \
71 ldmia r0!, {r3-r8,ip,lr} ; \
72 stmia r1!, {r3-r8,ip,lr}
73 #endif /* ! COPY_CHUNK */
74
75 #ifndef SAVE_REGS
76 #define SAVE_REGS stmfd sp!, {r4-r8, lr}
77 #define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
78 #endif
79
80 ENTRY(bcopy_page)
81 PREFETCH_FIRST_CHUNK
82 SAVE_REGS
83 #ifdef BIG_LOOPS
84 mov r2, #(PAGE_SIZE >> 9)
85 #else
86 mov r2, #(PAGE_SIZE >> 7)
87 #endif
88
89 1:
90 COPY_CHUNK
91 COPY_CHUNK
92 COPY_CHUNK
93 COPY_CHUNK
94
95 #ifdef BIG_LOOPS
96 /* There is little point making the loop any larger; unless we are
97 running with the cache off, the load/store overheads will
98 completely dominate this loop. */
99 COPY_CHUNK
100 COPY_CHUNK
101 COPY_CHUNK
102 COPY_CHUNK
103
104 COPY_CHUNK
105 COPY_CHUNK
106 COPY_CHUNK
107 COPY_CHUNK
108
109 COPY_CHUNK
110 COPY_CHUNK
111 COPY_CHUNK
112 COPY_CHUNK
113 #endif
114 subs r2, r2, #1
115 bne 1b
116
117 RESTORE_REGS /* ...and return. */
118
119 /*
120 * bzero_page(dest)
121 *
122 * Optimised zero page routine.
123 *
124 * On entry:
125 * r0 - dest address
126 *
127 * Requires:
128 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
129 * otherwise
130 */
131
132 ENTRY(bzero_page)
133 stmfd sp!, {r4-r8, lr}
134 #ifdef BIG_LOOPS
135 mov r2, #(PAGE_SIZE >> 9)
136 #else
137 mov r2, #(PAGE_SIZE >> 7)
138 #endif
139 mov r3, #0
140 mov r4, #0
141 mov r5, #0
142 mov r6, #0
143 mov r7, #0
144 mov r8, #0
145 mov ip, #0
146 mov lr, #0
147
148 1:
149 stmia r0!, {r3-r8,ip,lr}
150 stmia r0!, {r3-r8,ip,lr}
151 stmia r0!, {r3-r8,ip,lr}
152 stmia r0!, {r3-r8,ip,lr}
153
154 #ifdef BIG_LOOPS
155 /* There is little point making the loop any larger; unless we are
156 running with the cache off, the load/store overheads will
157 completely dominate this loop. */
158 stmia r0!, {r3-r8,ip,lr}
159 stmia r0!, {r3-r8,ip,lr}
160 stmia r0!, {r3-r8,ip,lr}
161 stmia r0!, {r3-r8,ip,lr}
162
163 stmia r0!, {r3-r8,ip,lr}
164 stmia r0!, {r3-r8,ip,lr}
165 stmia r0!, {r3-r8,ip,lr}
166 stmia r0!, {r3-r8,ip,lr}
167
168 stmia r0!, {r3-r8,ip,lr}
169 stmia r0!, {r3-r8,ip,lr}
170 stmia r0!, {r3-r8,ip,lr}
171 stmia r0!, {r3-r8,ip,lr}
172
173 #endif
174
175 subs r2, r2, #1
176 bne 1b
177
178 ldmfd sp!, {r4-r8, pc}
179
180 #else /* __XSCALE__ */
181
182 /*
183 * XSCALE version of bcopy_page
184 */
185 ENTRY(bcopy_page)
186 pld [r0]
187 stmfd sp!, {r4, r5}
188 mov ip, #32
189 ldr r2, [r0], #0x04 /* 0x00 */
190 ldr r3, [r0], #0x04 /* 0x04 */
191 1: pld [r0, #0x18] /* Prefetch 0x20 */
192 ldr r4, [r0], #0x04 /* 0x08 */
193 ldr r5, [r0], #0x04 /* 0x0c */
194 strd r2, [r1], #0x08
195 ldr r2, [r0], #0x04 /* 0x10 */
196 ldr r3, [r0], #0x04 /* 0x14 */
197 strd r4, [r1], #0x08
198 ldr r4, [r0], #0x04 /* 0x18 */
199 ldr r5, [r0], #0x04 /* 0x1c */
200 strd r2, [r1], #0x08
201 ldr r2, [r0], #0x04 /* 0x20 */
202 ldr r3, [r0], #0x04 /* 0x24 */
203 pld [r0, #0x18] /* Prefetch 0x40 */
204 strd r4, [r1], #0x08
205 ldr r4, [r0], #0x04 /* 0x28 */
206 ldr r5, [r0], #0x04 /* 0x2c */
207 strd r2, [r1], #0x08
208 ldr r2, [r0], #0x04 /* 0x30 */
209 ldr r3, [r0], #0x04 /* 0x34 */
210 strd r4, [r1], #0x08
211 ldr r4, [r0], #0x04 /* 0x38 */
212 ldr r5, [r0], #0x04 /* 0x3c */
213 strd r2, [r1], #0x08
214 ldr r2, [r0], #0x04 /* 0x40 */
215 ldr r3, [r0], #0x04 /* 0x44 */
216 pld [r0, #0x18] /* Prefetch 0x60 */
217 strd r4, [r1], #0x08
218 ldr r4, [r0], #0x04 /* 0x48 */
219 ldr r5, [r0], #0x04 /* 0x4c */
220 strd r2, [r1], #0x08
221 ldr r2, [r0], #0x04 /* 0x50 */
222 ldr r3, [r0], #0x04 /* 0x54 */
223 strd r4, [r1], #0x08
224 ldr r4, [r0], #0x04 /* 0x58 */
225 ldr r5, [r0], #0x04 /* 0x5c */
226 strd r2, [r1], #0x08
227 ldr r2, [r0], #0x04 /* 0x60 */
228 ldr r3, [r0], #0x04 /* 0x64 */
229 pld [r0, #0x18] /* Prefetch 0x80 */
230 strd r4, [r1], #0x08
231 ldr r4, [r0], #0x04 /* 0x68 */
232 ldr r5, [r0], #0x04 /* 0x6c */
233 strd r2, [r1], #0x08
234 ldr r2, [r0], #0x04 /* 0x70 */
235 ldr r3, [r0], #0x04 /* 0x74 */
236 strd r4, [r1], #0x08
237 ldr r4, [r0], #0x04 /* 0x78 */
238 ldr r5, [r0], #0x04 /* 0x7c */
239 strd r2, [r1], #0x08
240 subs ip, ip, #0x01
241 ldrgt r2, [r0], #0x04 /* 0x80 */
242 ldrgt r3, [r0], #0x04 /* 0x84 */
243 strd r4, [r1], #0x08
244 bgt 1b
245 ldmfd sp!, {r4, r5}
246 mov pc, lr
247
248 /*
249 * XSCALE version of bzero_page
250 */
251 ENTRY(bzero_page)
252 mov r1, #PAGE_SIZE
253 mov r2, #0
254 mov r3, #0
255 1: strd r2, [r0], #8 /* 32 */
256 strd r2, [r0], #8
257 strd r2, [r0], #8
258 strd r2, [r0], #8
259 strd r2, [r0], #8 /* 64 */
260 strd r2, [r0], #8
261 strd r2, [r0], #8
262 strd r2, [r0], #8
263 strd r2, [r0], #8 /* 96 */
264 strd r2, [r0], #8
265 strd r2, [r0], #8
266 strd r2, [r0], #8
267 strd r2, [r0], #8 /* 128 */
268 strd r2, [r0], #8
269 strd r2, [r0], #8
270 strd r2, [r0], #8
271 subs r1, r1, #128
272 bne 1b
273 mov pc, lr
274 #endif /* __XSCALE__ */
275