bzero.S revision 1.1 1 1.1 ross /* $NetBSD: bzero.S,v 1.1 2006/07/01 16:37:20 ross Exp $ */
2 1.1 ross
3 1.1 ross /*-
4 1.1 ross * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 1.1 ross * All rights reserved.
6 1.1 ross *
7 1.1 ross * Redistribution and use in source and binary forms, with or without
8 1.1 ross * modification, are permitted provided that the following conditions
9 1.1 ross * are met:
10 1.1 ross * 1. Redistributions of source code must retain the above copyright
11 1.1 ross * notice, this list of conditions and the following disclaimer.
12 1.1 ross * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 ross * notice, this list of conditions and the following disclaimer in the
14 1.1 ross * documentation and/or other materials provided with the distribution.
15 1.1 ross * 3. The name of the author may not be used to endorse or promote products
16 1.1 ross * derived from this software without specific prior written permission.
17 1.1 ross *
18 1.1 ross * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 ross * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 ross * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 ross * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 ross * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 ross * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 ross * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 ross * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 ross * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 ross * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 ross */
29 1.1 ross /*----------------------------------------------------------------------*/
30 1.1 ross
31 1.1 ross #include <machine/asm.h>
32 1.1 ross #ifdef _KERNEL
33 1.1 ross #include <assym.h>
34 1.1 ross #endif
35 1.1 ross
36 1.1 ross #define USE_STSWX 0 /* don't. slower than trivial copy loop */
37 1.1 ross
38 1.1 ross /*----------------------------------------------------------------------*/
39 1.1 ross /*
40 1.1 ross void bzero(void *b %r3, size_t len %r4);
41 1.1 ross void * memset(void *b %r3, int c %r4, size_t len %r5);
42 1.1 ross */
43 1.1 ross /*----------------------------------------------------------------------*/
44 1.1 ross
45 1.1 ross #define r_dst %r3
46 1.1 ross #define r_len %r4
47 1.1 ross #define r_val %r0
48 1.1 ross
49 1.1 ross .text
50 1.1 ross .align 4
51 1.1 ross ENTRY(bzero)
52 1.1 ross li r_val, 0 /* Value to stuff in */
53 1.1 ross b cb_memset
54 1.1 ross
55 1.1 ross ENTRY(memset)
56 1.1 ross cmplwi cr1, %r5, 0
57 1.1 ross mr. %r0, %r4
58 1.1 ross mr %r8, %r3
59 1.1 ross beqlr- cr1 /* Nothing to do */
60 1.1 ross
61 1.1 ross rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
62 1.1 ross rlwimi %r0, %r0, 16, 0, 15
63 1.1 ross mr %r4, %r5
64 1.1 ross bne- simple_fill /* =! 0, use trivial fill */
65 1.1 ross cb_memset:
66 1.1 ross
67 1.1 ross /*----------------------------------------------------------------------*/
68 1.1 ross #ifndef _KERNEL
69 1.1 ross /* First find out cache line size */
70 1.1 ross #ifdef PIC
71 1.1 ross mflr %r9
72 1.1 ross bl _GLOBAL_OFFSET_TABLE_@local-4
73 1.1 ross mflr %r10
74 1.1 ross mtlr %r9
75 1.1 ross lwz %r5,cache_info@got(%r10)
76 1.1 ross #else
77 1.1 ross lis %r5,cache_info@h
78 1.1 ross ori %r5,%r5,cache_info@l
79 1.1 ross #endif
80 1.1 ross lwz %r6, 4(%r5)
81 1.1 ross cmpwi %r6, -1
82 1.1 ross bne+ cb_cacheline_known
83 1.1 ross
84 1.1 ross /*----------------------------------------------------------------------*/
85 1.1 ross #define CTL_MACHDEP 7
86 1.1 ross #define CPU_CACHELINE 1
87 1.1 ross #define CPU_CACHEINFO 5
88 1.1 ross
89 1.1 ross #define STKFRAME_SZ 48
90 1.1 ross #define MIB 8
91 1.1 ross #define OLDPLEN 16
92 1.1 ross #define R3_SAVE 20
93 1.1 ross #define R4_SAVE 24
94 1.1 ross #define R0_SAVE 28
95 1.1 ross #define R8_SAVE 32
96 1.1 ross
97 1.1 ross mflr %r6
98 1.1 ross stw %r6, 4(%r1)
99 1.1 ross stwu %r1, -STKFRAME_SZ(%r1)
100 1.1 ross
101 1.1 ross stw %r8, R8_SAVE(%r1)
102 1.1 ross stw %r3, R3_SAVE(%r1)
103 1.1 ross stw %r4, R4_SAVE(%r1)
104 1.1 ross stw %r0, R0_SAVE(%r1)
105 1.1 ross
106 1.1 ross
107 1.1 ross
108 1.1 ross li %r0, CTL_MACHDEP /* Construct MIB */
109 1.1 ross stw %r0, MIB(%r1)
110 1.1 ross li %r0, CPU_CACHEINFO
111 1.1 ross stw %r0, MIB+4(%r1)
112 1.1 ross
113 1.1 ross li %r0, 4*4 /* Oldlenp := 4*4 */
114 1.1 ross stw %r0, OLDPLEN(%r1)
115 1.1 ross
116 1.1 ross addi %r3, %r1, MIB
117 1.1 ross li %r4, 2 /* namelen */
118 1.1 ross /* %r5 already contains &cache_info */
119 1.1 ross addi %r6, %r1, OLDPLEN
120 1.1 ross li %r7, 0
121 1.1 ross li %r8, 0
122 1.1 ross bl PIC_PLT(_C_LABEL(sysctl))
123 1.1 ross
124 1.1 ross cmpwi %r3, 0 /* Check result */
125 1.1 ross beq 1f
126 1.1 ross
127 1.1 ross /* Failure, try older sysctl */
128 1.1 ross
129 1.1 ross li %r0, CTL_MACHDEP /* Construct MIB */
130 1.1 ross stw %r0, MIB(%r1)
131 1.1 ross li %r0, CPU_CACHELINE
132 1.1 ross stw %r0, MIB+4(%r1)
133 1.1 ross
134 1.1 ross li %r0, 4 /* Oldlenp := 4 */
135 1.1 ross stw %r0, OLDPLEN(%r1)
136 1.1 ross
137 1.1 ross addi %r3, %r1, MIB
138 1.1 ross li %r4, 2 /* namelen */
139 1.1 ross #ifdef PIC
140 1.1 ross mflr %r9
141 1.1 ross bl _GLOBAL_OFFSET_TABLE_@local-4
142 1.1 ross mflr %r10
143 1.1 ross mtlr %r9
144 1.1 ross lwz %r5,cache_info@got(%r10)
145 1.1 ross addi %r5, %r5, 4
146 1.1 ross #else
147 1.1 ross lis %r5,cache_info+4@h
148 1.1 ross ori %r5,%r5,cache_info+4@l
149 1.1 ross #endif
150 1.1 ross addi %r6, %r1, OLDPLEN
151 1.1 ross li %r7, 0
152 1.1 ross li %r8, 0
153 1.1 ross bl PIC_PLT(_C_LABEL(sysctl))
154 1.1 ross 1:
155 1.1 ross lwz %r8, R8_SAVE(%r1)
156 1.1 ross lwz %r3, R3_SAVE(%r1)
157 1.1 ross lwz %r4, R4_SAVE(%r1)
158 1.1 ross lwz %r0, R0_SAVE(%r1)
159 1.1 ross
160 1.1 ross #ifdef PIC
161 1.1 ross bl _GLOBAL_OFFSET_TABLE_@local-4
162 1.1 ross mflr %r10
163 1.1 ross lwz %r9, cache_info@got(%r10)
164 1.1 ross lwz %r9, 4(%r9)
165 1.1 ross #else
166 1.1 ross lis %r5, cache_info+4@ha
167 1.1 ross lwz %r9, cache_info+4@l(%r5)
168 1.1 ross #endif
169 1.1 ross la %r1, STKFRAME_SZ(%r1)
170 1.1 ross lwz %r5, 4(%r1)
171 1.1 ross mtlr %r5
172 1.1 ross
173 1.1 ross cntlzw %r6, %r9 /* compute shift value */
174 1.1 ross li %r5, 31
175 1.1 ross subf %r5, %r6, %r5
176 1.1 ross
177 1.1 ross #ifdef PIC
178 1.1 ross lwz %r6, cache_sh@got(%r10)
179 1.1 ross stw %r5, 0(%r6)
180 1.1 ross #else
181 1.1 ross lis %r6, cache_sh@ha
182 1.1 ross stw %r5, cache_sh@l(%r6)
183 1.1 ross #endif
184 1.1 ross /*----------------------------------------------------------------------*/
185 1.1 ross /* Okay, we know the cache line size (%r9) and shift value (%r10) */
186 1.1 ross cb_cacheline_known:
187 1.1 ross #ifdef PIC
188 1.1 ross lwz %r5, cache_info@got(%r10)
189 1.1 ross lwz %r9, 4(%r5)
190 1.1 ross lwz %r5, cache_sh@got(%r10)
191 1.1 ross lwz %r10, 0(%r5)
192 1.1 ross #else
193 1.1 ross lis %r9, cache_info+4@ha
194 1.1 ross lwz %r9, cache_info+4@l(%r9)
195 1.1 ross lis %r10, cache_sh@ha
196 1.1 ross lwz %r10, cache_sh@l(%r10)
197 1.1 ross #endif
198 1.1 ross
199 1.1 ross #else /* _KERNEL */
200 1.1 ross #ifdef MULTIPROCESSOR
201 1.1 ross mfsprg %r10, 0 /* Get cpu_info pointer */
202 1.1 ross #else
203 1.1 ross lis %r10, cpu_info_store@ha
204 1.1 ross addi %r10, %r10, cpu_info_store@l
205 1.1 ross #endif
206 1.1 ross lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */
207 1.1 ross cntlzw %r10, %r9 /* Calculate shift.. */
208 1.1 ross li %r6, 31
209 1.1 ross subf %r10, %r10, %r6
210 1.1 ross #endif /* _KERNEL */
211 1.1 ross /* Back in memory filling business */
212 1.1 ross
213 1.1 ross cmplwi cr1, r_len, 0 /* Nothing to do? */
214 1.1 ross add %r5, %r9, %r9
215 1.1 ross cmplw r_len, %r5 /* <= 2*CL bytes to move? */
216 1.1 ross beqlr- cr1 /* then do nothing */
217 1.1 ross
218 1.1 ross blt+ simple_fill /* a trivial fill routine */
219 1.1 ross
220 1.1 ross /* Word align the block, fill bytewise until dst even*/
221 1.1 ross
222 1.1 ross andi. %r5, r_dst, 0x03
223 1.1 ross li %r6, 4
224 1.1 ross beq+ cb_aligned_w /* already aligned to word? */
225 1.1 ross
226 1.1 ross subf %r5, %r5, %r6 /* bytes to fill to align4 */
227 1.1 ross #if USE_STSWX
228 1.1 ross mtxer %r5
229 1.1 ross stswx %r0, 0, r_dst
230 1.1 ross add r_dst, %r5, r_dst
231 1.1 ross #else
232 1.1 ross mtctr %r5
233 1.1 ross
234 1.1 ross subi r_dst, r_dst, 1
235 1.1 ross 1: stbu r_val, 1(r_dst) /* Fill bytewise */
236 1.1 ross bdnz 1b
237 1.1 ross
238 1.1 ross addi r_dst, r_dst, 1
239 1.1 ross #endif
240 1.1 ross subf r_len, %r5, r_len
241 1.1 ross
242 1.1 ross cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
243 1.1 ross
244 1.1 ross /* I know I have something to do since we had > 2*CL initially */
245 1.1 ross /* so no need to check for r_len = 0 */
246 1.1 ross
247 1.1 ross subi %r6, %r9, 1 /* CL mask */
248 1.1 ross and. %r5, r_dst, %r6
249 1.1 ross srwi %r5, %r5, 2
250 1.1 ross srwi %r6, %r9, 2
251 1.1 ross beq cb_aligned_cb /* already on CL boundary? */
252 1.1 ross
253 1.1 ross subf %r5, %r5, %r6 /* words to fill to alignment */
254 1.1 ross mtctr %r5
255 1.1 ross slwi %r5, %r5, 2
256 1.1 ross subf r_len, %r5, r_len
257 1.1 ross
258 1.1 ross subi r_dst, r_dst, 4
259 1.1 ross 1: stwu r_val, 4(r_dst) /* Fill wordwise */
260 1.1 ross bdnz 1b
261 1.1 ross addi r_dst, r_dst, 4
262 1.1 ross
263 1.1 ross cb_aligned_cb: /* no need to check r_len, see above */
264 1.1 ross
265 1.1 ross srw. %r5, r_len, %r10 /* Number of cache blocks */
266 1.1 ross mtctr %r5
267 1.1 ross beq cblocks_done
268 1.1 ross
269 1.1 ross slw %r5, %r5, %r10
270 1.1 ross subf r_len, %r5, r_len
271 1.1 ross
272 1.1 ross 1: dcbz 0, r_dst /* Clear blockwise */
273 1.1 ross add r_dst, r_dst, %r9
274 1.1 ross bdnz 1b
275 1.1 ross
276 1.1 ross cblocks_done: /* still CL aligned, but less than CL bytes left */
277 1.1 ross cmplwi cr1, r_len, 0
278 1.1 ross cmplwi r_len, 8
279 1.1 ross beq- cr1, sf_return
280 1.1 ross
281 1.1 ross blt- sf_bytewise /* <8 remaining? */
282 1.1 ross b sf_aligned_w
283 1.1 ross
284 1.1 ross /*----------------------------------------------------------------------*/
285 1.1 ross wbzero: li r_val, 0
286 1.1 ross
287 1.1 ross cmplwi r_len, 0
288 1.1 ross beqlr- /* Nothing to do */
289 1.1 ross
290 1.1 ross simple_fill:
291 1.1 ross #if USE_STSWX
292 1.1 ross cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
293 1.1 ross #else
294 1.1 ross cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
295 1.1 ross #endif
296 1.1 ross andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
297 1.1 ross blt cr1, sf_bytewise /* trivial byte mover */
298 1.1 ross
299 1.1 ross li %r6, 4
300 1.1 ross subf %r5, %r5, %r6
301 1.1 ross beq+ sf_aligned_w /* dest is word aligned */
302 1.1 ross
303 1.1 ross #if USE_STSWX
304 1.1 ross mtxer %r5
305 1.1 ross stswx %r0, 0, r_dst
306 1.1 ross add r_dst, %r5, r_dst
307 1.1 ross #else
308 1.1 ross mtctr %r5 /* nope, then fill bytewise */
309 1.1 ross subi r_dst, r_dst, 1 /* until it is */
310 1.1 ross 1: stbu r_val, 1(r_dst)
311 1.1 ross bdnz 1b
312 1.1 ross
313 1.1 ross addi r_dst, r_dst, 1
314 1.1 ross #endif
315 1.1 ross subf r_len, %r5, r_len
316 1.1 ross
317 1.1 ross sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
318 1.1 ross #if USE_STSWX
319 1.1 ross mr %r6, %r0
320 1.1 ross mr %r7, %r0
321 1.1 ross
322 1.1 ross srwi %r5, r_len, 3
323 1.1 ross mtctr %r5
324 1.1 ross
325 1.1 ross slwi %r5, %r5, 3 /* adjust len */
326 1.1 ross subf. r_len, %r5, r_len
327 1.1 ross
328 1.1 ross 1: stswi %r6, r_dst, 8
329 1.1 ross addi r_dst, r_dst, 8
330 1.1 ross bdnz 1b
331 1.1 ross #else
332 1.1 ross srwi %r5, r_len, 2 /* words to fill */
333 1.1 ross mtctr %r5
334 1.1 ross
335 1.1 ross slwi %r5, %r5, 2
336 1.1 ross subf. r_len, %r5, r_len /* adjust len for fill */
337 1.1 ross
338 1.1 ross subi r_dst, r_dst, 4
339 1.1 ross 1: stwu r_val, 4(r_dst)
340 1.1 ross bdnz 1b
341 1.1 ross addi r_dst, r_dst, 4
342 1.1 ross #endif
343 1.1 ross
344 1.1 ross sf_word_done: bne- sf_bytewise
345 1.1 ross
346 1.1 ross sf_return: mr %r3, %r8 /* restore orig ptr */
347 1.1 ross blr /* for memset functionality */
348 1.1 ross
349 1.1 ross sf_bytewise:
350 1.1 ross #if USE_STSWX
351 1.1 ross mr %r5, %r0
352 1.1 ross mr %r6, %r0
353 1.1 ross mr %r7, %r0
354 1.1 ross
355 1.1 ross mtxer r_len
356 1.1 ross stswx %r5, 0, r_dst
357 1.1 ross #else
358 1.1 ross mtctr r_len
359 1.1 ross
360 1.1 ross subi r_dst, r_dst, 1
361 1.1 ross 1: stbu r_val, 1(r_dst)
362 1.1 ross bdnz 1b
363 1.1 ross #endif
364 1.1 ross mr %r3, %r8 /* restore orig ptr */
365 1.1 ross blr /* for memset functionality */
366 1.1 ross
367 1.1 ross /*----------------------------------------------------------------------*/
368 1.1 ross #ifndef _KERNEL
369 1.1 ross .data
370 1.1 ross cache_info: .long -1, -1, -1, -1
371 1.1 ross cache_sh: .long 0
372 1.1 ross
373 1.1 ross #endif
374 1.1 ross /*----------------------------------------------------------------------*/
375