bzero.S revision 1.3.2.3 1 1.3.2.3 nathanw /* $NetBSD: bzero.S,v 1.3.2.3 2002/03/22 20:41:54 nathanw Exp $ */
2 1.3.2.2 nathanw
3 1.3.2.2 nathanw /*-
4 1.3.2.2 nathanw * Copyright (C) 2001 Martin J. Laubach <mjl (at) netbsd.org>
5 1.3.2.2 nathanw * All rights reserved.
6 1.3.2.2 nathanw *
7 1.3.2.2 nathanw * Redistribution and use in source and binary forms, with or without
8 1.3.2.2 nathanw * modification, are permitted provided that the following conditions
9 1.3.2.2 nathanw * are met:
10 1.3.2.2 nathanw * 1. Redistributions of source code must retain the above copyright
11 1.3.2.2 nathanw * notice, this list of conditions and the following disclaimer.
12 1.3.2.2 nathanw * 2. Redistributions in binary form must reproduce the above copyright
13 1.3.2.2 nathanw * notice, this list of conditions and the following disclaimer in the
14 1.3.2.2 nathanw * documentation and/or other materials provided with the distribution.
15 1.3.2.2 nathanw * 3. The name of the author may not be used to endorse or promote products
16 1.3.2.2 nathanw * derived from this software without specific prior written permission.
17 1.3.2.2 nathanw *
18 1.3.2.2 nathanw * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.3.2.2 nathanw * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.3.2.2 nathanw * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.3.2.2 nathanw * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.3.2.2 nathanw * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.3.2.2 nathanw * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.3.2.2 nathanw * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.3.2.2 nathanw * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.3.2.2 nathanw * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.3.2.2 nathanw * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.3.2.2 nathanw */
29 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
30 1.3.2.2 nathanw
31 1.3.2.2 nathanw #include <machine/asm.h>
32 1.3.2.2 nathanw #ifdef _KERNEL
33 1.3.2.2 nathanw #include <assym.h>
34 1.3.2.2 nathanw #endif
35 1.3.2.2 nathanw
36 1.3.2.2 nathanw #define USE_STSWX 0 /* don't. slower than trivial copy loop */
37 1.3.2.2 nathanw
38 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
39 1.3.2.2 nathanw /*
40 1.3.2.2 nathanw void bzero(void *b r3, size_t len r4);
41 1.3.2.2 nathanw void * memset(void *b r3, int c r4, size_t len r5);
42 1.3.2.2 nathanw */
43 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
44 1.3.2.2 nathanw
45 1.3.2.2 nathanw #define r_dst r3
46 1.3.2.2 nathanw #define r_len r4
47 1.3.2.2 nathanw #define r_val r0
48 1.3.2.2 nathanw
49 1.3.2.2 nathanw .text
50 1.3.2.2 nathanw .align 4
51 1.3.2.2 nathanw ENTRY(bzero)
52 1.3.2.2 nathanw li r_val, 0 /* Value to stuff in */
53 1.3.2.2 nathanw b cb_memset
54 1.3.2.2 nathanw
55 1.3.2.2 nathanw ENTRY(memset)
56 1.3.2.2 nathanw cmplwi cr1, r5, 0
57 1.3.2.2 nathanw mr. r0, r4
58 1.3.2.2 nathanw mr r8, r3
59 1.3.2.2 nathanw beqlr- cr1 /* Nothing to do */
60 1.3.2.2 nathanw
61 1.3.2.2 nathanw rlwimi r0, r4, 8, 16, 23 /* word extend fill value */
62 1.3.2.2 nathanw rlwimi r0, r0, 16, 0, 15
63 1.3.2.2 nathanw mr r4, r5
64 1.3.2.2 nathanw bne- simple_fill /* =! 0, use trivial fill */
65 1.3.2.2 nathanw cb_memset:
66 1.3.2.2 nathanw
67 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
68 1.3.2.2 nathanw #ifndef _KERNEL
69 1.3.2.2 nathanw /* First find out cache line size */
70 1.3.2.2 nathanw #ifdef PIC
71 1.3.2.2 nathanw mflr r9
72 1.3.2.2 nathanw bl _GLOBAL_OFFSET_TABLE_@local-4
73 1.3.2.2 nathanw mflr r10
74 1.3.2.2 nathanw mtlr r9
75 1.3.2.3 nathanw lwz r5,cache_info@got(r10)
76 1.3.2.2 nathanw #else
77 1.3.2.3 nathanw lis r5,cache_info@h
78 1.3.2.3 nathanw ori r5,r5,cache_info@l
79 1.3.2.2 nathanw #endif
80 1.3.2.3 nathanw lwz r6, 4(r5)
81 1.3.2.2 nathanw cmpwi r6, -1
82 1.3.2.2 nathanw bne+ cb_cacheline_known
83 1.3.2.2 nathanw
84 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
85 1.3.2.2 nathanw #define CTL_MACHDEP 7
86 1.3.2.2 nathanw #define CPU_CACHELINE 1
87 1.3.2.3 nathanw #define CPU_CACHEINFO 5
88 1.3.2.2 nathanw
89 1.3.2.2 nathanw #define STKFRAME_SZ 48
90 1.3.2.2 nathanw #define MIB 8
91 1.3.2.2 nathanw #define OLDPLEN 16
92 1.3.2.2 nathanw #define R3_SAVE 20
93 1.3.2.2 nathanw #define R4_SAVE 24
94 1.3.2.2 nathanw #define R0_SAVE 28
95 1.3.2.2 nathanw #define R8_SAVE 32
96 1.3.2.2 nathanw
97 1.3.2.2 nathanw mflr r6
98 1.3.2.2 nathanw stw r6, 4(r1)
99 1.3.2.2 nathanw stwu r1, -STKFRAME_SZ(r1)
100 1.3.2.2 nathanw
101 1.3.2.2 nathanw stw r8, R8_SAVE(r1)
102 1.3.2.2 nathanw stw r3, R3_SAVE(r1)
103 1.3.2.2 nathanw stw r4, R4_SAVE(r1)
104 1.3.2.2 nathanw stw r0, R0_SAVE(r1)
105 1.3.2.2 nathanw
106 1.3.2.3 nathanw
107 1.3.2.3 nathanw
108 1.3.2.3 nathanw li r0, CTL_MACHDEP /* Construct MIB */
109 1.3.2.3 nathanw stw r0, MIB(r1)
110 1.3.2.3 nathanw li r0, CPU_CACHEINFO
111 1.3.2.3 nathanw stw r0, MIB+4(r1)
112 1.3.2.3 nathanw
113 1.3.2.3 nathanw li r0, 4*4 /* Oldlenp := 4*4 */
114 1.3.2.3 nathanw stw r0, OLDPLEN(r1)
115 1.3.2.3 nathanw
116 1.3.2.3 nathanw addi r3, r1, MIB
117 1.3.2.3 nathanw li r4, 2 /* namelen */
118 1.3.2.3 nathanw /* r5 already contains &cache_info */
119 1.3.2.3 nathanw addi r6, r1, OLDPLEN
120 1.3.2.3 nathanw li r7, 0
121 1.3.2.3 nathanw li r8, 0
122 1.3.2.3 nathanw bl PIC_PLT(_C_LABEL(sysctl))
123 1.3.2.3 nathanw
124 1.3.2.3 nathanw cmpwi r3, 0 /* Check result */
125 1.3.2.3 nathanw beq 1f
126 1.3.2.3 nathanw
127 1.3.2.3 nathanw /* Failure, try older sysctl */
128 1.3.2.3 nathanw
129 1.3.2.2 nathanw li r0, CTL_MACHDEP /* Construct MIB */
130 1.3.2.2 nathanw stw r0, MIB(r1)
131 1.3.2.2 nathanw li r0, CPU_CACHELINE
132 1.3.2.2 nathanw stw r0, MIB+4(r1)
133 1.3.2.2 nathanw
134 1.3.2.2 nathanw li r0, 4 /* Oldlenp := 4 */
135 1.3.2.2 nathanw stw r0, OLDPLEN(r1)
136 1.3.2.2 nathanw
137 1.3.2.2 nathanw addi r3, r1, MIB
138 1.3.2.2 nathanw li r4, 2 /* namelen */
139 1.3.2.3 nathanw #ifdef PIC
140 1.3.2.3 nathanw mflr r9
141 1.3.2.3 nathanw bl _GLOBAL_OFFSET_TABLE_@local-4
142 1.3.2.3 nathanw mflr r10
143 1.3.2.3 nathanw mtlr r9
144 1.3.2.3 nathanw lwz r5,cache_info@got(r10)
145 1.3.2.3 nathanw addi r5, r5, 4
146 1.3.2.3 nathanw #else
147 1.3.2.3 nathanw lis r5,cache_info+4@h
148 1.3.2.3 nathanw ori r5,r5,cache_info+4@l
149 1.3.2.3 nathanw #endif
150 1.3.2.2 nathanw addi r6, r1, OLDPLEN
151 1.3.2.2 nathanw li r7, 0
152 1.3.2.2 nathanw li r8, 0
153 1.3.2.2 nathanw bl PIC_PLT(_C_LABEL(sysctl))
154 1.3.2.3 nathanw 1:
155 1.3.2.2 nathanw lwz r8, R8_SAVE(r1)
156 1.3.2.2 nathanw lwz r3, R3_SAVE(r1)
157 1.3.2.2 nathanw lwz r4, R4_SAVE(r1)
158 1.3.2.2 nathanw lwz r0, R0_SAVE(r1)
159 1.3.2.2 nathanw
160 1.3.2.2 nathanw #ifdef PIC
161 1.3.2.2 nathanw bl _GLOBAL_OFFSET_TABLE_@local-4
162 1.3.2.2 nathanw mflr r10
163 1.3.2.3 nathanw lwz r9, cache_info@got(r10)
164 1.3.2.3 nathanw lwz r9, 4(r9)
165 1.3.2.2 nathanw #else
166 1.3.2.3 nathanw lis r5, cache_info+4@ha
167 1.3.2.3 nathanw lwz r9, cache_info+4@l(r5)
168 1.3.2.2 nathanw #endif
169 1.3.2.2 nathanw la r1, STKFRAME_SZ(r1)
170 1.3.2.2 nathanw lwz r5, 4(r1)
171 1.3.2.2 nathanw mtlr r5
172 1.3.2.2 nathanw
173 1.3.2.2 nathanw cntlzw r6, r9 /* compute shift value */
174 1.3.2.2 nathanw li r5, 31
175 1.3.2.2 nathanw subf r5, r6, r5
176 1.3.2.2 nathanw
177 1.3.2.2 nathanw #ifdef PIC
178 1.3.2.2 nathanw lwz r6, cache_sh@got(r10)
179 1.3.2.2 nathanw stw r5, 0(r6)
180 1.3.2.2 nathanw #else
181 1.3.2.2 nathanw lis r6, cache_sh@ha
182 1.3.2.2 nathanw stw r5, cache_sh@l(r6)
183 1.3.2.2 nathanw #endif
184 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
185 1.3.2.2 nathanw /* Okay, we know the cache line size (r9) and shift value (r10) */
186 1.3.2.2 nathanw cb_cacheline_known:
187 1.3.2.2 nathanw #ifdef PIC
188 1.3.2.3 nathanw lwz r5, cache_info@got(r10)
189 1.3.2.3 nathanw lwz r9, 4(r5)
190 1.3.2.2 nathanw lwz r5, cache_sh@got(r10)
191 1.3.2.2 nathanw lwz r10, 0(r5)
192 1.3.2.2 nathanw #else
193 1.3.2.3 nathanw lis r9, cache_info+4@ha
194 1.3.2.3 nathanw lwz r9, cache_info+4@l(r9)
195 1.3.2.2 nathanw lis r10, cache_sh@ha
196 1.3.2.2 nathanw lwz r10, cache_sh@l(r10)
197 1.3.2.2 nathanw #endif
198 1.3.2.2 nathanw
199 1.3.2.2 nathanw #else /* _KERNEL */
200 1.3.2.3 nathanw #ifdef MULTIPROCESSOR
201 1.3.2.3 nathanw mfspr r10, 0 /* Get cpu_info pointer */
202 1.3.2.2 nathanw #else
203 1.3.2.3 nathanw lis r10, cpu_info_store@ha
204 1.3.2.3 nathanw addi r10, r10, cpu_info_store@l
205 1.3.2.2 nathanw #endif
206 1.3.2.3 nathanw lwz r9, CPU_CI+4(r10) /* Load D$ line size */
207 1.3.2.3 nathanw cntlzw r10, r9 /* Calculate shift.. */
208 1.3.2.3 nathanw li r6, 31
209 1.3.2.3 nathanw subf r10, r10, r6
210 1.3.2.2 nathanw #endif /* _KERNEL */
211 1.3.2.2 nathanw /* Back in memory filling business */
212 1.3.2.2 nathanw
213 1.3.2.2 nathanw cmplwi cr1, r_len, 0 /* Nothing to do? */
214 1.3.2.2 nathanw add r5, r9, r9
215 1.3.2.2 nathanw cmplw r_len, r5 /* <= 2*CL bytes to move? */
216 1.3.2.2 nathanw beqlr- cr1 /* then do nothing */
217 1.3.2.2 nathanw
218 1.3.2.2 nathanw blt+ simple_fill /* a trivial fill routine */
219 1.3.2.2 nathanw
220 1.3.2.2 nathanw /* Word align the block, fill bytewise until dst even*/
221 1.3.2.2 nathanw
222 1.3.2.2 nathanw andi. r5, r_dst, 0x03
223 1.3.2.2 nathanw li r6, 4
224 1.3.2.2 nathanw beq+ cb_aligned_w /* already aligned to word? */
225 1.3.2.2 nathanw
226 1.3.2.2 nathanw subf r5, r5, r6 /* bytes to fill to align4 */
227 1.3.2.2 nathanw #if USE_STSWX
228 1.3.2.2 nathanw mtxer r5
229 1.3.2.2 nathanw stswx r0, 0, r_dst
230 1.3.2.2 nathanw add r_dst, r5, r_dst
231 1.3.2.2 nathanw #else
232 1.3.2.2 nathanw mtctr r5
233 1.3.2.2 nathanw
234 1.3.2.2 nathanw subi r_dst, r_dst, 1
235 1.3.2.2 nathanw 1: stbu r_val, 1(r_dst) /* Fill bytewise */
236 1.3.2.2 nathanw bdnz 1b
237 1.3.2.2 nathanw
238 1.3.2.2 nathanw addi r_dst, r_dst, 1
239 1.3.2.2 nathanw #endif
240 1.3.2.2 nathanw subf r_len, r5, r_len
241 1.3.2.2 nathanw
242 1.3.2.2 nathanw cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
243 1.3.2.2 nathanw
244 1.3.2.2 nathanw /* I know I have something to do since we had > 2*CL initially */
245 1.3.2.2 nathanw /* so no need to check for r_len = 0 */
246 1.3.2.2 nathanw
247 1.3.2.2 nathanw rlwinm. r5, r_dst, 30, 29, 31
248 1.3.2.2 nathanw srwi r6, r9, 2
249 1.3.2.2 nathanw beq cb_aligned_cb /* already on CL boundary? */
250 1.3.2.2 nathanw
251 1.3.2.2 nathanw subf r5, r5, r6 /* words to fill to alignment */
252 1.3.2.2 nathanw mtctr r5
253 1.3.2.2 nathanw slwi r5, r5, 2
254 1.3.2.2 nathanw subf r_len, r5, r_len
255 1.3.2.2 nathanw
256 1.3.2.2 nathanw subi r_dst, r_dst, 4
257 1.3.2.2 nathanw 1: stwu r_val, 4(r_dst) /* Fill wordwise */
258 1.3.2.2 nathanw bdnz 1b
259 1.3.2.2 nathanw addi r_dst, r_dst, 4
260 1.3.2.2 nathanw
261 1.3.2.2 nathanw cb_aligned_cb: /* no need to check r_len, see above */
262 1.3.2.2 nathanw
263 1.3.2.2 nathanw srw. r5, r_len, r10 /* Number of cache blocks */
264 1.3.2.2 nathanw mtctr r5
265 1.3.2.2 nathanw beq cblocks_done
266 1.3.2.2 nathanw
267 1.3.2.2 nathanw slw r5, r5, r10
268 1.3.2.2 nathanw subf r_len, r5, r_len
269 1.3.2.2 nathanw
270 1.3.2.2 nathanw 1: dcbz 0, r_dst /* Clear blockwise */
271 1.3.2.2 nathanw add r_dst, r_dst, r9
272 1.3.2.2 nathanw bdnz 1b
273 1.3.2.2 nathanw
274 1.3.2.2 nathanw cblocks_done: /* still CL aligned, but less than CL bytes left */
275 1.3.2.2 nathanw cmplwi cr1, r_len, 0
276 1.3.2.2 nathanw cmplwi r_len, 8
277 1.3.2.2 nathanw beq- cr1, sf_return
278 1.3.2.2 nathanw
279 1.3.2.2 nathanw blt- sf_bytewise /* <8 remaining? */
280 1.3.2.2 nathanw b sf_aligned_w
281 1.3.2.2 nathanw
282 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
283 1.3.2.2 nathanw wbzero: li r_val, 0
284 1.3.2.2 nathanw
285 1.3.2.2 nathanw cmplwi r_len, 0
286 1.3.2.2 nathanw beqlr- /* Nothing to do */
287 1.3.2.2 nathanw
288 1.3.2.2 nathanw simple_fill:
289 1.3.2.2 nathanw #if USE_STSWX
290 1.3.2.2 nathanw cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
291 1.3.2.2 nathanw #else
292 1.3.2.2 nathanw cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
293 1.3.2.2 nathanw #endif
294 1.3.2.2 nathanw andi. r5, r_dst, 0x03 /* bytes to fill to align4 */
295 1.3.2.2 nathanw blt cr1, sf_bytewise /* trivial byte mover */
296 1.3.2.2 nathanw
297 1.3.2.2 nathanw li r6, 4
298 1.3.2.2 nathanw subf r5, r5, r6
299 1.3.2.2 nathanw beq+ sf_aligned_w /* dest is word aligned */
300 1.3.2.2 nathanw
301 1.3.2.2 nathanw #if USE_STSWX
302 1.3.2.2 nathanw mtxer r5
303 1.3.2.2 nathanw stswx r0, 0, r_dst
304 1.3.2.2 nathanw add r_dst, r5, r_dst
305 1.3.2.2 nathanw #else
306 1.3.2.2 nathanw mtctr r5 /* nope, then fill bytewise */
307 1.3.2.2 nathanw subi r_dst, r_dst, 1 /* until it is */
308 1.3.2.2 nathanw 1: stbu r_val, 1(r_dst)
309 1.3.2.2 nathanw bdnz 1b
310 1.3.2.2 nathanw
311 1.3.2.2 nathanw addi r_dst, r_dst, 1
312 1.3.2.2 nathanw #endif
313 1.3.2.2 nathanw subf r_len, r5, r_len
314 1.3.2.2 nathanw
315 1.3.2.2 nathanw sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
316 1.3.2.2 nathanw #if USE_STSWX
317 1.3.2.2 nathanw mr r6, r0
318 1.3.2.2 nathanw mr r7, r0
319 1.3.2.2 nathanw
320 1.3.2.2 nathanw srwi r5, r_len, 3
321 1.3.2.2 nathanw mtctr r5
322 1.3.2.2 nathanw
323 1.3.2.2 nathanw slwi r5, r5, 3 /* adjust len */
324 1.3.2.2 nathanw subf. r_len, r5, r_len
325 1.3.2.2 nathanw
326 1.3.2.2 nathanw 1: stswi r6, r_dst, 8
327 1.3.2.2 nathanw addi r_dst, r_dst, 8
328 1.3.2.2 nathanw bdnz 1b
329 1.3.2.2 nathanw #else
330 1.3.2.2 nathanw srwi r5, r_len, 2 /* words to fill */
331 1.3.2.2 nathanw mtctr r5
332 1.3.2.2 nathanw
333 1.3.2.2 nathanw slwi r5, r5, 2
334 1.3.2.2 nathanw subf. r_len, r5, r_len /* adjust len for fill */
335 1.3.2.2 nathanw
336 1.3.2.2 nathanw subi r_dst, r_dst, 4
337 1.3.2.2 nathanw 1: stwu r_val, 4(r_dst)
338 1.3.2.2 nathanw bdnz 1b
339 1.3.2.2 nathanw addi r_dst, r_dst, 4
340 1.3.2.2 nathanw #endif
341 1.3.2.2 nathanw
342 1.3.2.2 nathanw sf_word_done: bne- sf_bytewise
343 1.3.2.2 nathanw
344 1.3.2.2 nathanw sf_return: mr r3, r8 /* restore orig ptr */
345 1.3.2.2 nathanw blr /* for memset functionality */
346 1.3.2.2 nathanw
347 1.3.2.2 nathanw sf_bytewise:
348 1.3.2.2 nathanw #if USE_STSWX
349 1.3.2.2 nathanw mr r5, r0
350 1.3.2.2 nathanw mr r6, r0
351 1.3.2.2 nathanw mr r7, r0
352 1.3.2.2 nathanw
353 1.3.2.2 nathanw mtxer r_len
354 1.3.2.2 nathanw stswx r5, 0, r_dst
355 1.3.2.2 nathanw #else
356 1.3.2.2 nathanw mtctr r_len
357 1.3.2.2 nathanw
358 1.3.2.2 nathanw subi r_dst, r_dst, 1
359 1.3.2.2 nathanw 1: stbu r_val, 1(r_dst)
360 1.3.2.2 nathanw bdnz 1b
361 1.3.2.2 nathanw #endif
362 1.3.2.2 nathanw mr r3, r8 /* restore orig ptr */
363 1.3.2.2 nathanw blr /* for memset functionality */
364 1.3.2.2 nathanw
365 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
366 1.3.2.2 nathanw #ifndef _KERNEL
367 1.3.2.2 nathanw .data
368 1.3.2.3 nathanw cache_info: .long -1, -1, -1, -1
369 1.3.2.2 nathanw cache_sh: .long 0
370 1.3.2.2 nathanw
371 1.3.2.2 nathanw #endif
372 1.3.2.2 nathanw /*----------------------------------------------------------------------*/
373