bzero.S revision 1.1 1 /* $NetBSD: bzero.S,v 1.1 2006/07/01 16:37:20 ross Exp $ */
2
3 /*-
4 * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 /*----------------------------------------------------------------------*/
30
31 #include <machine/asm.h>
32 #ifdef _KERNEL
33 #include <assym.h>
34 #endif
35
36 #define USE_STSWX 0 /* don't. slower than trivial copy loop */
37
38 /*----------------------------------------------------------------------*/
39 /*
40 void bzero(void *b %r3, size_t len %r4);
41 void * memset(void *b %r3, int c %r4, size_t len %r5);
42 */
43 /*----------------------------------------------------------------------*/
44
45 #define r_dst %r3
46 #define r_len %r4
47 #define r_val %r0
48
49 .text
50 .align 4
51 ENTRY(bzero)
52 li r_val, 0 /* Value to stuff in */
53 b cb_memset
54
55 ENTRY(memset)
56 cmplwi cr1, %r5, 0
57 mr. %r0, %r4
58 mr %r8, %r3
59 beqlr- cr1 /* Nothing to do */
60
61 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
62 rlwimi %r0, %r0, 16, 0, 15
63 mr %r4, %r5
64 bne- simple_fill /* =! 0, use trivial fill */
65 cb_memset:
66
67 /*----------------------------------------------------------------------*/
68 #ifndef _KERNEL
69 /* First find out cache line size */
70 #ifdef PIC
71 mflr %r9
72 bl _GLOBAL_OFFSET_TABLE_@local-4
73 mflr %r10
74 mtlr %r9
75 lwz %r5,cache_info@got(%r10)
76 #else
77 lis %r5,cache_info@h
78 ori %r5,%r5,cache_info@l
79 #endif
80 lwz %r6, 4(%r5)
81 cmpwi %r6, -1
82 bne+ cb_cacheline_known
83
84 /*----------------------------------------------------------------------*/
85 #define CTL_MACHDEP 7
86 #define CPU_CACHELINE 1
87 #define CPU_CACHEINFO 5
88
89 #define STKFRAME_SZ 48
90 #define MIB 8
91 #define OLDPLEN 16
92 #define R3_SAVE 20
93 #define R4_SAVE 24
94 #define R0_SAVE 28
95 #define R8_SAVE 32
96
97 mflr %r6
98 stw %r6, 4(%r1)
99 stwu %r1, -STKFRAME_SZ(%r1)
100
101 stw %r8, R8_SAVE(%r1)
102 stw %r3, R3_SAVE(%r1)
103 stw %r4, R4_SAVE(%r1)
104 stw %r0, R0_SAVE(%r1)
105
106
107
108 li %r0, CTL_MACHDEP /* Construct MIB */
109 stw %r0, MIB(%r1)
110 li %r0, CPU_CACHEINFO
111 stw %r0, MIB+4(%r1)
112
113 li %r0, 4*4 /* Oldlenp := 4*4 */
114 stw %r0, OLDPLEN(%r1)
115
116 addi %r3, %r1, MIB
117 li %r4, 2 /* namelen */
118 /* %r5 already contains &cache_info */
119 addi %r6, %r1, OLDPLEN
120 li %r7, 0
121 li %r8, 0
122 bl PIC_PLT(_C_LABEL(sysctl))
123
124 cmpwi %r3, 0 /* Check result */
125 beq 1f
126
127 /* Failure, try older sysctl */
128
129 li %r0, CTL_MACHDEP /* Construct MIB */
130 stw %r0, MIB(%r1)
131 li %r0, CPU_CACHELINE
132 stw %r0, MIB+4(%r1)
133
134 li %r0, 4 /* Oldlenp := 4 */
135 stw %r0, OLDPLEN(%r1)
136
137 addi %r3, %r1, MIB
138 li %r4, 2 /* namelen */
139 #ifdef PIC
140 mflr %r9
141 bl _GLOBAL_OFFSET_TABLE_@local-4
142 mflr %r10
143 mtlr %r9
144 lwz %r5,cache_info@got(%r10)
145 addi %r5, %r5, 4
146 #else
147 lis %r5,cache_info+4@h
148 ori %r5,%r5,cache_info+4@l
149 #endif
150 addi %r6, %r1, OLDPLEN
151 li %r7, 0
152 li %r8, 0
153 bl PIC_PLT(_C_LABEL(sysctl))
154 1:
155 lwz %r8, R8_SAVE(%r1)
156 lwz %r3, R3_SAVE(%r1)
157 lwz %r4, R4_SAVE(%r1)
158 lwz %r0, R0_SAVE(%r1)
159
160 #ifdef PIC
161 bl _GLOBAL_OFFSET_TABLE_@local-4
162 mflr %r10
163 lwz %r9, cache_info@got(%r10)
164 lwz %r9, 4(%r9)
165 #else
166 lis %r5, cache_info+4@ha
167 lwz %r9, cache_info+4@l(%r5)
168 #endif
169 la %r1, STKFRAME_SZ(%r1)
170 lwz %r5, 4(%r1)
171 mtlr %r5
172
173 cntlzw %r6, %r9 /* compute shift value */
174 li %r5, 31
175 subf %r5, %r6, %r5
176
177 #ifdef PIC
178 lwz %r6, cache_sh@got(%r10)
179 stw %r5, 0(%r6)
180 #else
181 lis %r6, cache_sh@ha
182 stw %r5, cache_sh@l(%r6)
183 #endif
184 /*----------------------------------------------------------------------*/
185 /* Okay, we know the cache line size (%r9) and shift value (%r10) */
186 cb_cacheline_known:
187 #ifdef PIC
188 lwz %r5, cache_info@got(%r10)
189 lwz %r9, 4(%r5)
190 lwz %r5, cache_sh@got(%r10)
191 lwz %r10, 0(%r5)
192 #else
193 lis %r9, cache_info+4@ha
194 lwz %r9, cache_info+4@l(%r9)
195 lis %r10, cache_sh@ha
196 lwz %r10, cache_sh@l(%r10)
197 #endif
198
199 #else /* _KERNEL */
200 #ifdef MULTIPROCESSOR
201 mfsprg %r10, 0 /* Get cpu_info pointer */
202 #else
203 lis %r10, cpu_info_store@ha
204 addi %r10, %r10, cpu_info_store@l
205 #endif
206 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */
207 cntlzw %r10, %r9 /* Calculate shift.. */
208 li %r6, 31
209 subf %r10, %r10, %r6
210 #endif /* _KERNEL */
211 /* Back in memory filling business */
212
213 cmplwi cr1, r_len, 0 /* Nothing to do? */
214 add %r5, %r9, %r9
215 cmplw r_len, %r5 /* <= 2*CL bytes to move? */
216 beqlr- cr1 /* then do nothing */
217
218 blt+ simple_fill /* a trivial fill routine */
219
220 /* Word align the block, fill bytewise until dst even*/
221
222 andi. %r5, r_dst, 0x03
223 li %r6, 4
224 beq+ cb_aligned_w /* already aligned to word? */
225
226 subf %r5, %r5, %r6 /* bytes to fill to align4 */
227 #if USE_STSWX
228 mtxer %r5
229 stswx %r0, 0, r_dst
230 add r_dst, %r5, r_dst
231 #else
232 mtctr %r5
233
234 subi r_dst, r_dst, 1
235 1: stbu r_val, 1(r_dst) /* Fill bytewise */
236 bdnz 1b
237
238 addi r_dst, r_dst, 1
239 #endif
240 subf r_len, %r5, r_len
241
242 cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
243
244 /* I know I have something to do since we had > 2*CL initially */
245 /* so no need to check for r_len = 0 */
246
247 subi %r6, %r9, 1 /* CL mask */
248 and. %r5, r_dst, %r6
249 srwi %r5, %r5, 2
250 srwi %r6, %r9, 2
251 beq cb_aligned_cb /* already on CL boundary? */
252
253 subf %r5, %r5, %r6 /* words to fill to alignment */
254 mtctr %r5
255 slwi %r5, %r5, 2
256 subf r_len, %r5, r_len
257
258 subi r_dst, r_dst, 4
259 1: stwu r_val, 4(r_dst) /* Fill wordwise */
260 bdnz 1b
261 addi r_dst, r_dst, 4
262
263 cb_aligned_cb: /* no need to check r_len, see above */
264
265 srw. %r5, r_len, %r10 /* Number of cache blocks */
266 mtctr %r5
267 beq cblocks_done
268
269 slw %r5, %r5, %r10
270 subf r_len, %r5, r_len
271
272 1: dcbz 0, r_dst /* Clear blockwise */
273 add r_dst, r_dst, %r9
274 bdnz 1b
275
276 cblocks_done: /* still CL aligned, but less than CL bytes left */
277 cmplwi cr1, r_len, 0
278 cmplwi r_len, 8
279 beq- cr1, sf_return
280
281 blt- sf_bytewise /* <8 remaining? */
282 b sf_aligned_w
283
284 /*----------------------------------------------------------------------*/
285 wbzero: li r_val, 0
286
287 cmplwi r_len, 0
288 beqlr- /* Nothing to do */
289
290 simple_fill:
291 #if USE_STSWX
292 cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
293 #else
294 cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
295 #endif
296 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
297 blt cr1, sf_bytewise /* trivial byte mover */
298
299 li %r6, 4
300 subf %r5, %r5, %r6
301 beq+ sf_aligned_w /* dest is word aligned */
302
303 #if USE_STSWX
304 mtxer %r5
305 stswx %r0, 0, r_dst
306 add r_dst, %r5, r_dst
307 #else
308 mtctr %r5 /* nope, then fill bytewise */
309 subi r_dst, r_dst, 1 /* until it is */
310 1: stbu r_val, 1(r_dst)
311 bdnz 1b
312
313 addi r_dst, r_dst, 1
314 #endif
315 subf r_len, %r5, r_len
316
317 sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
318 #if USE_STSWX
319 mr %r6, %r0
320 mr %r7, %r0
321
322 srwi %r5, r_len, 3
323 mtctr %r5
324
325 slwi %r5, %r5, 3 /* adjust len */
326 subf. r_len, %r5, r_len
327
328 1: stswi %r6, r_dst, 8
329 addi r_dst, r_dst, 8
330 bdnz 1b
331 #else
332 srwi %r5, r_len, 2 /* words to fill */
333 mtctr %r5
334
335 slwi %r5, %r5, 2
336 subf. r_len, %r5, r_len /* adjust len for fill */
337
338 subi r_dst, r_dst, 4
339 1: stwu r_val, 4(r_dst)
340 bdnz 1b
341 addi r_dst, r_dst, 4
342 #endif
343
344 sf_word_done: bne- sf_bytewise
345
346 sf_return: mr %r3, %r8 /* restore orig ptr */
347 blr /* for memset functionality */
348
349 sf_bytewise:
350 #if USE_STSWX
351 mr %r5, %r0
352 mr %r6, %r0
353 mr %r7, %r0
354
355 mtxer r_len
356 stswx %r5, 0, r_dst
357 #else
358 mtctr r_len
359
360 subi r_dst, r_dst, 1
361 1: stbu r_val, 1(r_dst)
362 bdnz 1b
363 #endif
364 mr %r3, %r8 /* restore orig ptr */
365 blr /* for memset functionality */
366
367 /*----------------------------------------------------------------------*/
368 #ifndef _KERNEL
369 .data
370 cache_info: .long -1, -1, -1, -1
371 cache_sh: .long 0
372
373 #endif
374 /*----------------------------------------------------------------------*/
375