bzero.S revision 1.3 1 1.3 mjl /* $NetBSD: bzero.S,v 1.3 2001/11/30 02:25:50 mjl Exp $ */
2 1.1 mjl
3 1.1 mjl /*-
4 1.1 mjl * Copyright (C) 2001 Martin J. Laubach <mjl (at) netbsd.org>
5 1.1 mjl * All rights reserved.
6 1.1 mjl *
7 1.1 mjl * Redistribution and use in source and binary forms, with or without
8 1.1 mjl * modification, are permitted provided that the following conditions
9 1.1 mjl * are met:
10 1.1 mjl * 1. Redistributions of source code must retain the above copyright
11 1.1 mjl * notice, this list of conditions and the following disclaimer.
12 1.1 mjl * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 mjl * notice, this list of conditions and the following disclaimer in the
14 1.1 mjl * documentation and/or other materials provided with the distribution.
15 1.1 mjl * 3. The name of the author may not be used to endorse or promote products
16 1.1 mjl * derived from this software without specific prior written permission.
17 1.1 mjl *
18 1.1 mjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 mjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 mjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 mjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 mjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 mjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 mjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 mjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 mjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 mjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 mjl */
29 1.1 mjl /*----------------------------------------------------------------------*/
30 1.1 mjl
31 1.1 mjl #include <machine/asm.h>
32 1.2 mjl #ifdef _KERNEL
33 1.2 mjl #include <assym.h>
34 1.2 mjl #endif
35 1.1 mjl
36 1.1 mjl #define USE_STSWX 0 /* don't. slower than trivial copy loop */
37 1.1 mjl
38 1.1 mjl /*----------------------------------------------------------------------*/
39 1.1 mjl /*
40 1.1 mjl void bzero(void *b r3, size_t len r4);
41 1.1 mjl void * memset(void *b r3, int c r4, size_t len r5);
42 1.1 mjl */
43 1.1 mjl /*----------------------------------------------------------------------*/
44 1.1 mjl
45 1.1 mjl #define r_dst r3
46 1.1 mjl #define r_len r4
47 1.1 mjl #define r_val r0
48 1.1 mjl
49 1.1 mjl .text
50 1.1 mjl .align 4
51 1.1 mjl ENTRY(bzero)
52 1.1 mjl li r_val, 0 /* Value to stuff in */
53 1.1 mjl b cb_memset
54 1.1 mjl
55 1.1 mjl ENTRY(memset)
56 1.1 mjl cmplwi cr1, r5, 0
57 1.1 mjl mr. r0, r4
58 1.1 mjl mr r8, r3
59 1.1 mjl beqlr- cr1 /* Nothing to do */
60 1.1 mjl
61 1.1 mjl rlwimi r0, r4, 8, 16, 23 /* word extend fill value */
62 1.1 mjl rlwimi r0, r0, 16, 0, 15
63 1.1 mjl mr r4, r5
64 1.1 mjl bne- simple_fill /* =! 0, use trivial fill */
65 1.1 mjl cb_memset:
66 1.1 mjl
67 1.1 mjl /*----------------------------------------------------------------------*/
68 1.2 mjl #ifndef _KERNEL
69 1.1 mjl /* First find out cache line size */
70 1.1 mjl #ifdef PIC
71 1.1 mjl mflr r9
72 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
73 1.1 mjl mflr r10
74 1.1 mjl mtlr r9
75 1.1 mjl lwz r5,cache_size@got(r10)
76 1.1 mjl #else
77 1.1 mjl lis r5,cache_size@h
78 1.1 mjl ori r5,r5,cache_size@l
79 1.1 mjl #endif
80 1.1 mjl lwz r6, 0(r5)
81 1.1 mjl cmpwi r6, -1
82 1.1 mjl bne+ cb_cacheline_known
83 1.1 mjl
84 1.1 mjl /*----------------------------------------------------------------------*/
85 1.1 mjl #define CTL_MACHDEP 7
86 1.1 mjl #define CPU_CACHELINE 1
87 1.1 mjl
88 1.1 mjl #define STKFRAME_SZ 48
89 1.1 mjl #define MIB 8
90 1.1 mjl #define OLDPLEN 16
91 1.1 mjl #define R3_SAVE 20
92 1.1 mjl #define R4_SAVE 24
93 1.1 mjl #define R0_SAVE 28
94 1.1 mjl #define R8_SAVE 32
95 1.1 mjl
96 1.1 mjl mflr r6
97 1.1 mjl stw r6, 4(r1)
98 1.1 mjl stwu r1, -STKFRAME_SZ(r1)
99 1.1 mjl
100 1.1 mjl stw r8, R8_SAVE(r1)
101 1.1 mjl stw r3, R3_SAVE(r1)
102 1.1 mjl stw r4, R4_SAVE(r1)
103 1.1 mjl stw r0, R0_SAVE(r1)
104 1.1 mjl
105 1.1 mjl li r0, CTL_MACHDEP /* Construct MIB */
106 1.1 mjl stw r0, MIB(r1)
107 1.1 mjl li r0, CPU_CACHELINE
108 1.1 mjl stw r0, MIB+4(r1)
109 1.1 mjl
110 1.1 mjl li r0, 4 /* Oldlenp := 4 */
111 1.1 mjl stw r0, OLDPLEN(r1)
112 1.1 mjl
113 1.1 mjl addi r3, r1, MIB
114 1.1 mjl li r4, 2 /* namelen */
115 1.1 mjl /* r5 already contains &cache_size */
116 1.1 mjl addi r6, r1, OLDPLEN
117 1.1 mjl li r7, 0
118 1.1 mjl li r8, 0
119 1.1 mjl bl PIC_PLT(_C_LABEL(sysctl))
120 1.1 mjl
121 1.1 mjl lwz r8, R8_SAVE(r1)
122 1.1 mjl lwz r3, R3_SAVE(r1)
123 1.1 mjl lwz r4, R4_SAVE(r1)
124 1.1 mjl lwz r0, R0_SAVE(r1)
125 1.1 mjl
126 1.1 mjl #ifdef PIC
127 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
128 1.1 mjl mflr r10
129 1.1 mjl lwz r9, cache_size@got(r10)
130 1.1 mjl lwz r9, 0(r9)
131 1.1 mjl #else
132 1.1 mjl lis r5, cache_size@ha
133 1.1 mjl lwz r9, cache_size@l(r5)
134 1.1 mjl #endif
135 1.1 mjl la r1, STKFRAME_SZ(r1)
136 1.1 mjl lwz r5, 4(r1)
137 1.1 mjl mtlr r5
138 1.1 mjl
139 1.1 mjl cntlzw r6, r9 /* compute shift value */
140 1.1 mjl li r5, 31
141 1.1 mjl subf r5, r6, r5
142 1.1 mjl
143 1.1 mjl #ifdef PIC
144 1.1 mjl lwz r6, cache_sh@got(r10)
145 1.1 mjl stw r5, 0(r6)
146 1.1 mjl #else
147 1.1 mjl lis r6, cache_sh@ha
148 1.1 mjl stw r5, cache_sh@l(r6)
149 1.1 mjl #endif
150 1.1 mjl /*----------------------------------------------------------------------*/
151 1.1 mjl /* Okay, we know the cache line size (r9) and shift value (r10) */
152 1.1 mjl cb_cacheline_known:
153 1.1 mjl #ifdef PIC
154 1.1 mjl lwz r5, cache_size@got(r10)
155 1.1 mjl lwz r9, 0(r5)
156 1.1 mjl lwz r5, cache_sh@got(r10)
157 1.1 mjl lwz r10, 0(r5)
158 1.1 mjl #else
159 1.1 mjl lis r9, cache_size@ha
160 1.1 mjl lwz r9, cache_size@l(r9)
161 1.1 mjl lis r10, cache_sh@ha
162 1.1 mjl lwz r10, cache_sh@l(r10)
163 1.1 mjl #endif
164 1.2 mjl
165 1.2 mjl #else /* _KERNEL */
166 1.2 mjl li r9, CACHELINESIZE
167 1.2 mjl #if CACHELINESIZE == 32
168 1.2 mjl #define CACHELINESHIFT 5
169 1.2 mjl #else
170 1.2 mjl #error Define CACHELINESHIFT for your CACHELINESIZE
171 1.2 mjl #endif
172 1.2 mjl li r10, CACHELINESHIFT
173 1.2 mjl #endif /* _KERNEL */
174 1.1 mjl /* Back in memory filling business */
175 1.1 mjl
176 1.1 mjl cmplwi cr1, r_len, 0 /* Nothing to do? */
177 1.1 mjl add r5, r9, r9
178 1.1 mjl cmplw r_len, r5 /* <= 2*CL bytes to move? */
179 1.1 mjl beqlr- cr1 /* then do nothing */
180 1.1 mjl
181 1.1 mjl blt+ simple_fill /* a trivial fill routine */
182 1.1 mjl
183 1.1 mjl /* Word align the block, fill bytewise until dst even*/
184 1.1 mjl
185 1.1 mjl andi. r5, r_dst, 0x03
186 1.1 mjl li r6, 4
187 1.1 mjl beq+ cb_aligned_w /* already aligned to word? */
188 1.1 mjl
189 1.1 mjl subf r5, r5, r6 /* bytes to fill to align4 */
190 1.1 mjl #if USE_STSWX
191 1.1 mjl mtxer r5
192 1.1 mjl stswx r0, 0, r_dst
193 1.1 mjl add r_dst, r5, r_dst
194 1.1 mjl #else
195 1.1 mjl mtctr r5
196 1.1 mjl
197 1.1 mjl subi r_dst, r_dst, 1
198 1.1 mjl 1: stbu r_val, 1(r_dst) /* Fill bytewise */
199 1.1 mjl bdnz 1b
200 1.1 mjl
201 1.1 mjl addi r_dst, r_dst, 1
202 1.1 mjl #endif
203 1.1 mjl subf r_len, r5, r_len
204 1.1 mjl
205 1.1 mjl cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
206 1.1 mjl
207 1.1 mjl /* I know I have something to do since we had > 2*CL initially */
208 1.1 mjl /* so no need to check for r_len = 0 */
209 1.1 mjl
210 1.1 mjl rlwinm. r5, r_dst, 30, 29, 31
211 1.1 mjl srwi r6, r9, 2
212 1.1 mjl beq cb_aligned_cb /* already on CL boundary? */
213 1.1 mjl
214 1.1 mjl subf r5, r5, r6 /* words to fill to alignment */
215 1.1 mjl mtctr r5
216 1.1 mjl slwi r5, r5, 2
217 1.1 mjl subf r_len, r5, r_len
218 1.1 mjl
219 1.1 mjl subi r_dst, r_dst, 4
220 1.1 mjl 1: stwu r_val, 4(r_dst) /* Fill wordwise */
221 1.1 mjl bdnz 1b
222 1.1 mjl addi r_dst, r_dst, 4
223 1.1 mjl
224 1.1 mjl cb_aligned_cb: /* no need to check r_len, see above */
225 1.1 mjl
226 1.1 mjl srw. r5, r_len, r10 /* Number of cache blocks */
227 1.1 mjl mtctr r5
228 1.1 mjl beq cblocks_done
229 1.1 mjl
230 1.1 mjl slw r5, r5, r10
231 1.1 mjl subf r_len, r5, r_len
232 1.1 mjl
233 1.1 mjl 1: dcbz 0, r_dst /* Clear blockwise */
234 1.1 mjl add r_dst, r_dst, r9
235 1.1 mjl bdnz 1b
236 1.1 mjl
237 1.1 mjl cblocks_done: /* still CL aligned, but less than CL bytes left */
238 1.1 mjl cmplwi cr1, r_len, 0
239 1.1 mjl cmplwi r_len, 8
240 1.1 mjl beq- cr1, sf_return
241 1.1 mjl
242 1.1 mjl blt- sf_bytewise /* <8 remaining? */
243 1.1 mjl b sf_aligned_w
244 1.1 mjl
245 1.1 mjl /*----------------------------------------------------------------------*/
246 1.1 mjl wbzero: li r_val, 0
247 1.1 mjl
248 1.1 mjl cmplwi r_len, 0
249 1.1 mjl beqlr- /* Nothing to do */
250 1.1 mjl
251 1.1 mjl simple_fill:
252 1.1 mjl #if USE_STSWX
253 1.1 mjl cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
254 1.1 mjl #else
255 1.1 mjl cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
256 1.1 mjl #endif
257 1.1 mjl andi. r5, r_dst, 0x03 /* bytes to fill to align4 */
258 1.1 mjl blt cr1, sf_bytewise /* trivial byte mover */
259 1.1 mjl
260 1.1 mjl li r6, 4
261 1.1 mjl subf r5, r5, r6
262 1.1 mjl beq+ sf_aligned_w /* dest is word aligned */
263 1.1 mjl
264 1.1 mjl #if USE_STSWX
265 1.1 mjl mtxer r5
266 1.1 mjl stswx r0, 0, r_dst
267 1.1 mjl add r_dst, r5, r_dst
268 1.1 mjl #else
269 1.1 mjl mtctr r5 /* nope, then fill bytewise */
270 1.1 mjl subi r_dst, r_dst, 1 /* until it is */
271 1.1 mjl 1: stbu r_val, 1(r_dst)
272 1.1 mjl bdnz 1b
273 1.1 mjl
274 1.1 mjl addi r_dst, r_dst, 1
275 1.1 mjl #endif
276 1.1 mjl subf r_len, r5, r_len
277 1.1 mjl
278 1.1 mjl sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
279 1.1 mjl #if USE_STSWX
280 1.1 mjl mr r6, r0
281 1.1 mjl mr r7, r0
282 1.1 mjl
283 1.1 mjl srwi r5, r_len, 3
284 1.1 mjl mtctr r5
285 1.1 mjl
286 1.1 mjl slwi r5, r5, 3 /* adjust len */
287 1.1 mjl subf. r_len, r5, r_len
288 1.1 mjl
289 1.1 mjl 1: stswi r6, r_dst, 8
290 1.1 mjl addi r_dst, r_dst, 8
291 1.1 mjl bdnz 1b
292 1.1 mjl #else
293 1.1 mjl srwi r5, r_len, 2 /* words to fill */
294 1.1 mjl mtctr r5
295 1.1 mjl
296 1.1 mjl slwi r5, r5, 2
297 1.1 mjl subf. r_len, r5, r_len /* adjust len for fill */
298 1.1 mjl
299 1.1 mjl subi r_dst, r_dst, 4
300 1.1 mjl 1: stwu r_val, 4(r_dst)
301 1.1 mjl bdnz 1b
302 1.1 mjl addi r_dst, r_dst, 4
303 1.1 mjl #endif
304 1.1 mjl
305 1.1 mjl sf_word_done: bne- sf_bytewise
306 1.1 mjl
307 1.1 mjl sf_return: mr r3, r8 /* restore orig ptr */
308 1.1 mjl blr /* for memset functionality */
309 1.1 mjl
310 1.1 mjl sf_bytewise:
311 1.1 mjl #if USE_STSWX
312 1.1 mjl mr r5, r0
313 1.1 mjl mr r6, r0
314 1.1 mjl mr r7, r0
315 1.1 mjl
316 1.1 mjl mtxer r_len
317 1.1 mjl stswx r5, 0, r_dst
318 1.1 mjl #else
319 1.1 mjl mtctr r_len
320 1.1 mjl
321 1.1 mjl subi r_dst, r_dst, 1
322 1.1 mjl 1: stbu r_val, 1(r_dst)
323 1.1 mjl bdnz 1b
324 1.1 mjl #endif
325 1.1 mjl mr r3, r8 /* restore orig ptr */
326 1.1 mjl blr /* for memset functionality */
327 1.1 mjl
328 1.1 mjl /*----------------------------------------------------------------------*/
329 1.3 mjl #ifndef _KERNEL
330 1.1 mjl .data
331 1.1 mjl cache_size: .long -1
332 1.1 mjl cache_sh: .long 0
333 1.1 mjl
334 1.3 mjl #endif
335 1.1 mjl /*----------------------------------------------------------------------*/
336