bcopy_page.S revision 1.9 1 1.9 matt /* $NetBSD: bcopy_page.S,v 1.9 2013/08/18 06:29:29 matt Exp $ */
2 1.1 matt
3 1.1 matt /*
4 1.1 matt * Copyright (c) 1995 Scott Stevens
5 1.1 matt * All rights reserved.
6 1.1 matt *
7 1.1 matt * Redistribution and use in source and binary forms, with or without
8 1.1 matt * modification, are permitted provided that the following conditions
9 1.1 matt * are met:
10 1.1 matt * 1. Redistributions of source code must retain the above copyright
11 1.1 matt * notice, this list of conditions and the following disclaimer.
12 1.1 matt * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 matt * notice, this list of conditions and the following disclaimer in the
14 1.1 matt * documentation and/or other materials provided with the distribution.
15 1.1 matt * 3. All advertising materials mentioning features or use of this software
16 1.1 matt * must display the following acknowledgement:
17 1.1 matt * This product includes software developed by Scott Stevens.
18 1.1 matt * 4. The name of the author may not be used to endorse or promote products
19 1.1 matt * derived from this software without specific prior written permission.
20 1.1 matt *
21 1.1 matt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 1.1 matt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 1.1 matt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 1.1 matt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 1.1 matt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 1.1 matt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 1.1 matt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 1.1 matt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 1.1 matt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 1.1 matt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 1.1 matt *
32 1.1 matt * RiscBSD kernel project
33 1.1 matt *
34 1.1 matt * bcopy_page.S
35 1.1 matt *
36 1.1 matt * page optimised bcopy and bzero routines
37 1.1 matt *
38 1.1 matt * Created : 08/04/95
39 1.1 matt */
40 1.1 matt
41 1.1 matt #include <machine/asm.h>
42 1.1 matt
43 1.6 thorpej #include "assym.h"
44 1.6 thorpej
45 1.7 scw #ifndef __XSCALE__
46 1.7 scw
47 1.2 chris /* #define BIG_LOOPS */
48 1.2 chris
49 1.1 matt /*
50 1.1 matt * bcopy_page(src, dest)
51 1.1 matt *
52 1.1 matt * Optimised copy page routine.
53 1.1 matt *
54 1.1 matt * On entry:
55 1.1 matt * r0 - src address
56 1.1 matt * r1 - dest address
57 1.1 matt *
58 1.1 matt * Requires:
59 1.6 thorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
60 1.2 chris * otherwise.
61 1.1 matt */
62 1.1 matt
63 1.3 thorpej #define CHUNK_SIZE 32
64 1.3 thorpej
65 1.3 thorpej #define PREFETCH_FIRST_CHUNK /* nothing */
66 1.3 thorpej #define PREFETCH_NEXT_CHUNK /* nothing */
67 1.3 thorpej
68 1.3 thorpej #ifndef COPY_CHUNK
69 1.3 thorpej #define COPY_CHUNK \
70 1.3 thorpej PREFETCH_NEXT_CHUNK ; \
71 1.3 thorpej ldmia r0!, {r3-r8,ip,lr} ; \
72 1.3 thorpej stmia r1!, {r3-r8,ip,lr}
73 1.3 thorpej #endif /* ! COPY_CHUNK */
74 1.3 thorpej
75 1.3 thorpej #ifndef SAVE_REGS
76 1.9 matt #define SAVE_REGS push {r4-r8, lr}
77 1.9 matt #define RESTORE_REGS pop {r4-r8, pc}
78 1.3 thorpej #endif
79 1.3 thorpej
80 1.1 matt ENTRY(bcopy_page)
81 1.3 thorpej PREFETCH_FIRST_CHUNK
82 1.3 thorpej SAVE_REGS
83 1.2 chris #ifdef BIG_LOOPS
84 1.6 thorpej mov r2, #(PAGE_SIZE >> 9)
85 1.2 chris #else
86 1.6 thorpej mov r2, #(PAGE_SIZE >> 7)
87 1.2 chris #endif
88 1.1 matt
89 1.5 thorpej 1:
90 1.3 thorpej COPY_CHUNK
91 1.3 thorpej COPY_CHUNK
92 1.3 thorpej COPY_CHUNK
93 1.3 thorpej COPY_CHUNK
94 1.2 chris
95 1.2 chris #ifdef BIG_LOOPS
96 1.2 chris /* There is little point making the loop any larger; unless we are
97 1.2 chris running with the cache off, the load/store overheads will
98 1.2 chris completely dominate this loop. */
99 1.3 thorpej COPY_CHUNK
100 1.3 thorpej COPY_CHUNK
101 1.3 thorpej COPY_CHUNK
102 1.3 thorpej COPY_CHUNK
103 1.3 thorpej
104 1.3 thorpej COPY_CHUNK
105 1.3 thorpej COPY_CHUNK
106 1.3 thorpej COPY_CHUNK
107 1.3 thorpej COPY_CHUNK
108 1.3 thorpej
109 1.3 thorpej COPY_CHUNK
110 1.3 thorpej COPY_CHUNK
111 1.3 thorpej COPY_CHUNK
112 1.3 thorpej COPY_CHUNK
113 1.2 chris #endif
114 1.1 matt subs r2, r2, #1
115 1.5 thorpej bne 1b
116 1.1 matt
117 1.3 thorpej RESTORE_REGS /* ...and return. */
118 1.9 matt END(bcopy_page)
119 1.1 matt
120 1.1 matt /*
121 1.1 matt * bzero_page(dest)
122 1.1 matt *
123 1.1 matt * Optimised zero page routine.
124 1.1 matt *
125 1.1 matt * On entry:
126 1.1 matt * r0 - dest address
127 1.1 matt *
128 1.1 matt * Requires:
129 1.6 thorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
130 1.2 chris * otherwise
131 1.1 matt */
132 1.1 matt
133 1.1 matt ENTRY(bzero_page)
134 1.9 matt push {r4-r8, lr}
135 1.2 chris #ifdef BIG_LOOPS
136 1.6 thorpej mov r2, #(PAGE_SIZE >> 9)
137 1.2 chris #else
138 1.6 thorpej mov r2, #(PAGE_SIZE >> 7)
139 1.2 chris #endif
140 1.1 matt mov r3, #0
141 1.1 matt mov r4, #0
142 1.1 matt mov r5, #0
143 1.1 matt mov r6, #0
144 1.1 matt mov r7, #0
145 1.1 matt mov r8, #0
146 1.2 chris mov ip, #0
147 1.2 chris mov lr, #0
148 1.1 matt
149 1.5 thorpej 1:
150 1.2 chris stmia r0!, {r3-r8,ip,lr}
151 1.2 chris stmia r0!, {r3-r8,ip,lr}
152 1.2 chris stmia r0!, {r3-r8,ip,lr}
153 1.2 chris stmia r0!, {r3-r8,ip,lr}
154 1.2 chris
155 1.2 chris #ifdef BIG_LOOPS
156 1.2 chris /* There is little point making the loop any larger; unless we are
157 1.2 chris running with the cache off, the load/store overheads will
158 1.2 chris completely dominate this loop. */
159 1.2 chris stmia r0!, {r3-r8,ip,lr}
160 1.2 chris stmia r0!, {r3-r8,ip,lr}
161 1.2 chris stmia r0!, {r3-r8,ip,lr}
162 1.2 chris stmia r0!, {r3-r8,ip,lr}
163 1.2 chris
164 1.2 chris stmia r0!, {r3-r8,ip,lr}
165 1.2 chris stmia r0!, {r3-r8,ip,lr}
166 1.2 chris stmia r0!, {r3-r8,ip,lr}
167 1.2 chris stmia r0!, {r3-r8,ip,lr}
168 1.2 chris
169 1.2 chris stmia r0!, {r3-r8,ip,lr}
170 1.2 chris stmia r0!, {r3-r8,ip,lr}
171 1.2 chris stmia r0!, {r3-r8,ip,lr}
172 1.2 chris stmia r0!, {r3-r8,ip,lr}
173 1.2 chris
174 1.2 chris #endif
175 1.1 matt
176 1.1 matt subs r2, r2, #1
177 1.5 thorpej bne 1b
178 1.1 matt
179 1.9 matt pop {r4-r8, pc}
180 1.9 matt END(bzero_page)
181 1.7 scw
182 1.7 scw #else /* __XSCALE__ */
183 1.7 scw
184 1.7 scw /*
185 1.7 scw * XSCALE version of bcopy_page
186 1.7 scw */
187 1.7 scw ENTRY(bcopy_page)
188 1.7 scw pld [r0]
189 1.9 matt push {r4, r5}
190 1.7 scw mov ip, #32
191 1.7 scw ldr r2, [r0], #0x04 /* 0x00 */
192 1.7 scw ldr r3, [r0], #0x04 /* 0x04 */
193 1.7 scw 1: pld [r0, #0x18] /* Prefetch 0x20 */
194 1.7 scw ldr r4, [r0], #0x04 /* 0x08 */
195 1.7 scw ldr r5, [r0], #0x04 /* 0x0c */
196 1.7 scw strd r2, [r1], #0x08
197 1.7 scw ldr r2, [r0], #0x04 /* 0x10 */
198 1.7 scw ldr r3, [r0], #0x04 /* 0x14 */
199 1.7 scw strd r4, [r1], #0x08
200 1.7 scw ldr r4, [r0], #0x04 /* 0x18 */
201 1.7 scw ldr r5, [r0], #0x04 /* 0x1c */
202 1.7 scw strd r2, [r1], #0x08
203 1.7 scw ldr r2, [r0], #0x04 /* 0x20 */
204 1.7 scw ldr r3, [r0], #0x04 /* 0x24 */
205 1.7 scw pld [r0, #0x18] /* Prefetch 0x40 */
206 1.7 scw strd r4, [r1], #0x08
207 1.7 scw ldr r4, [r0], #0x04 /* 0x28 */
208 1.7 scw ldr r5, [r0], #0x04 /* 0x2c */
209 1.7 scw strd r2, [r1], #0x08
210 1.7 scw ldr r2, [r0], #0x04 /* 0x30 */
211 1.7 scw ldr r3, [r0], #0x04 /* 0x34 */
212 1.7 scw strd r4, [r1], #0x08
213 1.7 scw ldr r4, [r0], #0x04 /* 0x38 */
214 1.7 scw ldr r5, [r0], #0x04 /* 0x3c */
215 1.7 scw strd r2, [r1], #0x08
216 1.7 scw ldr r2, [r0], #0x04 /* 0x40 */
217 1.7 scw ldr r3, [r0], #0x04 /* 0x44 */
218 1.7 scw pld [r0, #0x18] /* Prefetch 0x60 */
219 1.7 scw strd r4, [r1], #0x08
220 1.7 scw ldr r4, [r0], #0x04 /* 0x48 */
221 1.7 scw ldr r5, [r0], #0x04 /* 0x4c */
222 1.7 scw strd r2, [r1], #0x08
223 1.7 scw ldr r2, [r0], #0x04 /* 0x50 */
224 1.7 scw ldr r3, [r0], #0x04 /* 0x54 */
225 1.7 scw strd r4, [r1], #0x08
226 1.7 scw ldr r4, [r0], #0x04 /* 0x58 */
227 1.7 scw ldr r5, [r0], #0x04 /* 0x5c */
228 1.7 scw strd r2, [r1], #0x08
229 1.7 scw ldr r2, [r0], #0x04 /* 0x60 */
230 1.7 scw ldr r3, [r0], #0x04 /* 0x64 */
231 1.7 scw pld [r0, #0x18] /* Prefetch 0x80 */
232 1.7 scw strd r4, [r1], #0x08
233 1.7 scw ldr r4, [r0], #0x04 /* 0x68 */
234 1.7 scw ldr r5, [r0], #0x04 /* 0x6c */
235 1.7 scw strd r2, [r1], #0x08
236 1.7 scw ldr r2, [r0], #0x04 /* 0x70 */
237 1.7 scw ldr r3, [r0], #0x04 /* 0x74 */
238 1.7 scw strd r4, [r1], #0x08
239 1.7 scw ldr r4, [r0], #0x04 /* 0x78 */
240 1.7 scw ldr r5, [r0], #0x04 /* 0x7c */
241 1.7 scw strd r2, [r1], #0x08
242 1.7 scw subs ip, ip, #0x01
243 1.7 scw ldrgt r2, [r0], #0x04 /* 0x80 */
244 1.7 scw ldrgt r3, [r0], #0x04 /* 0x84 */
245 1.7 scw strd r4, [r1], #0x08
246 1.7 scw bgt 1b
247 1.9 matt pop {r4, r5}
248 1.9 matt RET
249 1.9 matt END(bcopy_page)
250 1.7 scw
251 1.7 scw /*
252 1.7 scw * XSCALE version of bzero_page
253 1.7 scw */
254 1.7 scw ENTRY(bzero_page)
255 1.7 scw mov r1, #PAGE_SIZE
256 1.7 scw mov r2, #0
257 1.7 scw mov r3, #0
258 1.7 scw 1: strd r2, [r0], #8 /* 32 */
259 1.7 scw strd r2, [r0], #8
260 1.7 scw strd r2, [r0], #8
261 1.7 scw strd r2, [r0], #8
262 1.7 scw strd r2, [r0], #8 /* 64 */
263 1.7 scw strd r2, [r0], #8
264 1.7 scw strd r2, [r0], #8
265 1.7 scw strd r2, [r0], #8
266 1.7 scw strd r2, [r0], #8 /* 96 */
267 1.7 scw strd r2, [r0], #8
268 1.7 scw strd r2, [r0], #8
269 1.7 scw strd r2, [r0], #8
270 1.7 scw strd r2, [r0], #8 /* 128 */
271 1.7 scw strd r2, [r0], #8
272 1.7 scw strd r2, [r0], #8
273 1.7 scw strd r2, [r0], #8
274 1.7 scw subs r1, r1, #128
275 1.7 scw bne 1b
276 1.9 matt RET
277 1.9 matt END(bzero_page)
278 1.7 scw #endif /* __XSCALE__ */
279