bzero.S revision 1.8 1 1.8 matt /* $NetBSD: bzero.S,v 1.8 2011/01/15 07:31:12 matt Exp $ */
2 1.1 mjl
3 1.1 mjl /*-
4 1.7 salo * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 1.1 mjl * All rights reserved.
6 1.1 mjl *
7 1.1 mjl * Redistribution and use in source and binary forms, with or without
8 1.1 mjl * modification, are permitted provided that the following conditions
9 1.1 mjl * are met:
10 1.1 mjl * 1. Redistributions of source code must retain the above copyright
11 1.1 mjl * notice, this list of conditions and the following disclaimer.
12 1.1 mjl * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 mjl * notice, this list of conditions and the following disclaimer in the
14 1.1 mjl * documentation and/or other materials provided with the distribution.
15 1.1 mjl * 3. The name of the author may not be used to endorse or promote products
16 1.1 mjl * derived from this software without specific prior written permission.
17 1.1 mjl *
18 1.1 mjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 mjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 mjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 mjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 mjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 mjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 mjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 mjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 mjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 mjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 mjl */
29 1.1 mjl /*----------------------------------------------------------------------*/
30 1.1 mjl
31 1.1 mjl #include <machine/asm.h>
32 1.8 matt
33 1.8 matt
34 1.8 matt #if defined(LIBC_SCCS) && !defined(lint)
35 1.8 matt __RCSID("$NetBSD: bzero.S,v 1.8 2011/01/15 07:31:12 matt Exp $")
36 1.8 matt #endif /* LIBC_SCCS && !lint */
37 1.8 matt
38 1.2 mjl #ifdef _KERNEL
39 1.2 mjl #include <assym.h>
40 1.2 mjl #endif
41 1.1 mjl
42 1.1 mjl #define USE_STSWX 0 /* don't. slower than trivial copy loop */
43 1.1 mjl
44 1.1 mjl /*----------------------------------------------------------------------*/
45 1.1 mjl /*
46 1.5 matt void bzero(void *b %r3, size_t len %r4);
47 1.5 matt void * memset(void *b %r3, int c %r4, size_t len %r5);
48 1.1 mjl */
49 1.1 mjl /*----------------------------------------------------------------------*/
50 1.1 mjl
51 1.5 matt #define r_dst %r3
52 1.5 matt #define r_len %r4
53 1.5 matt #define r_val %r0
54 1.1 mjl
55 1.1 mjl .text
56 1.1 mjl .align 4
57 1.1 mjl ENTRY(bzero)
58 1.1 mjl li r_val, 0 /* Value to stuff in */
59 1.1 mjl b cb_memset
60 1.8 matt END(bzero)
61 1.1 mjl
62 1.1 mjl ENTRY(memset)
63 1.5 matt cmplwi cr1, %r5, 0
64 1.5 matt mr. %r0, %r4
65 1.5 matt mr %r8, %r3
66 1.1 mjl beqlr- cr1 /* Nothing to do */
67 1.1 mjl
68 1.5 matt rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
69 1.5 matt rlwimi %r0, %r0, 16, 0, 15
70 1.5 matt mr %r4, %r5
71 1.1 mjl bne- simple_fill /* =! 0, use trivial fill */
72 1.1 mjl cb_memset:
73 1.1 mjl
74 1.1 mjl /*----------------------------------------------------------------------*/
75 1.2 mjl #ifndef _KERNEL
76 1.1 mjl /* First find out cache line size */
77 1.1 mjl #ifdef PIC
78 1.5 matt mflr %r9
79 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
80 1.5 matt mflr %r10
81 1.5 matt mtlr %r9
82 1.5 matt lwz %r5,cache_info@got(%r10)
83 1.1 mjl #else
84 1.5 matt lis %r5,cache_info@h
85 1.5 matt ori %r5,%r5,cache_info@l
86 1.1 mjl #endif
87 1.5 matt lwz %r6, 4(%r5)
88 1.5 matt cmpwi %r6, -1
89 1.1 mjl bne+ cb_cacheline_known
90 1.1 mjl
91 1.1 mjl /*----------------------------------------------------------------------*/
92 1.1 mjl #define CTL_MACHDEP 7
93 1.1 mjl #define CPU_CACHELINE 1
94 1.4 eeh #define CPU_CACHEINFO 5
95 1.1 mjl
96 1.1 mjl #define STKFRAME_SZ 48
97 1.1 mjl #define MIB 8
98 1.1 mjl #define OLDPLEN 16
99 1.1 mjl #define R3_SAVE 20
100 1.1 mjl #define R4_SAVE 24
101 1.1 mjl #define R0_SAVE 28
102 1.1 mjl #define R8_SAVE 32
103 1.1 mjl
104 1.5 matt mflr %r6
105 1.5 matt stw %r6, 4(%r1)
106 1.5 matt stwu %r1, -STKFRAME_SZ(%r1)
107 1.5 matt
108 1.5 matt stw %r8, R8_SAVE(%r1)
109 1.5 matt stw %r3, R3_SAVE(%r1)
110 1.5 matt stw %r4, R4_SAVE(%r1)
111 1.5 matt stw %r0, R0_SAVE(%r1)
112 1.1 mjl
113 1.4 eeh
114 1.4 eeh
115 1.5 matt li %r0, CTL_MACHDEP /* Construct MIB */
116 1.5 matt stw %r0, MIB(%r1)
117 1.5 matt li %r0, CPU_CACHEINFO
118 1.5 matt stw %r0, MIB+4(%r1)
119 1.5 matt
120 1.5 matt li %r0, 4*4 /* Oldlenp := 4*4 */
121 1.5 matt stw %r0, OLDPLEN(%r1)
122 1.5 matt
123 1.5 matt addi %r3, %r1, MIB
124 1.5 matt li %r4, 2 /* namelen */
125 1.5 matt /* %r5 already contains &cache_info */
126 1.5 matt addi %r6, %r1, OLDPLEN
127 1.5 matt li %r7, 0
128 1.5 matt li %r8, 0
129 1.4 eeh bl PIC_PLT(_C_LABEL(sysctl))
130 1.4 eeh
131 1.5 matt cmpwi %r3, 0 /* Check result */
132 1.4 eeh beq 1f
133 1.4 eeh
134 1.4 eeh /* Failure, try older sysctl */
135 1.4 eeh
136 1.5 matt li %r0, CTL_MACHDEP /* Construct MIB */
137 1.5 matt stw %r0, MIB(%r1)
138 1.5 matt li %r0, CPU_CACHELINE
139 1.5 matt stw %r0, MIB+4(%r1)
140 1.1 mjl
141 1.5 matt li %r0, 4 /* Oldlenp := 4 */
142 1.5 matt stw %r0, OLDPLEN(%r1)
143 1.1 mjl
144 1.5 matt addi %r3, %r1, MIB
145 1.5 matt li %r4, 2 /* namelen */
146 1.4 eeh #ifdef PIC
147 1.5 matt mflr %r9
148 1.4 eeh bl _GLOBAL_OFFSET_TABLE_@local-4
149 1.5 matt mflr %r10
150 1.5 matt mtlr %r9
151 1.5 matt lwz %r5,cache_info@got(%r10)
152 1.5 matt addi %r5, %r5, 4
153 1.5 matt #else
154 1.5 matt lis %r5,cache_info+4@h
155 1.5 matt ori %r5,%r5,cache_info+4@l
156 1.5 matt #endif
157 1.5 matt addi %r6, %r1, OLDPLEN
158 1.5 matt li %r7, 0
159 1.5 matt li %r8, 0
160 1.1 mjl bl PIC_PLT(_C_LABEL(sysctl))
161 1.4 eeh 1:
162 1.5 matt lwz %r8, R8_SAVE(%r1)
163 1.5 matt lwz %r3, R3_SAVE(%r1)
164 1.5 matt lwz %r4, R4_SAVE(%r1)
165 1.5 matt lwz %r0, R0_SAVE(%r1)
166 1.1 mjl
167 1.1 mjl #ifdef PIC
168 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
169 1.5 matt mflr %r10
170 1.5 matt lwz %r9, cache_info@got(%r10)
171 1.5 matt lwz %r9, 4(%r9)
172 1.5 matt #else
173 1.5 matt lis %r5, cache_info+4@ha
174 1.5 matt lwz %r9, cache_info+4@l(%r5)
175 1.5 matt #endif
176 1.5 matt la %r1, STKFRAME_SZ(%r1)
177 1.5 matt lwz %r5, 4(%r1)
178 1.5 matt mtlr %r5
179 1.5 matt
180 1.5 matt cntlzw %r6, %r9 /* compute shift value */
181 1.5 matt li %r5, 31
182 1.5 matt subf %r5, %r6, %r5
183 1.1 mjl
184 1.1 mjl #ifdef PIC
185 1.5 matt lwz %r6, cache_sh@got(%r10)
186 1.5 matt stw %r5, 0(%r6)
187 1.1 mjl #else
188 1.5 matt lis %r6, cache_sh@ha
189 1.5 matt stw %r5, cache_sh@l(%r6)
190 1.1 mjl #endif
191 1.1 mjl /*----------------------------------------------------------------------*/
192 1.5 matt /* Okay, we know the cache line size (%r9) and shift value (%r10) */
193 1.1 mjl cb_cacheline_known:
194 1.1 mjl #ifdef PIC
195 1.5 matt lwz %r5, cache_info@got(%r10)
196 1.5 matt lwz %r9, 4(%r5)
197 1.5 matt lwz %r5, cache_sh@got(%r10)
198 1.5 matt lwz %r10, 0(%r5)
199 1.5 matt #else
200 1.5 matt lis %r9, cache_info+4@ha
201 1.5 matt lwz %r9, cache_info+4@l(%r9)
202 1.5 matt lis %r10, cache_sh@ha
203 1.5 matt lwz %r10, cache_sh@l(%r10)
204 1.1 mjl #endif
205 1.2 mjl
206 1.2 mjl #else /* _KERNEL */
207 1.4 eeh #ifdef MULTIPROCESSOR
208 1.5 matt mfsprg %r10, 0 /* Get cpu_info pointer */
209 1.2 mjl #else
210 1.5 matt lis %r10, cpu_info_store@ha
211 1.5 matt addi %r10, %r10, cpu_info_store@l
212 1.2 mjl #endif
213 1.5 matt lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */
214 1.5 matt cntlzw %r10, %r9 /* Calculate shift.. */
215 1.5 matt li %r6, 31
216 1.5 matt subf %r10, %r10, %r6
217 1.2 mjl #endif /* _KERNEL */
218 1.1 mjl /* Back in memory filling business */
219 1.1 mjl
220 1.1 mjl cmplwi cr1, r_len, 0 /* Nothing to do? */
221 1.5 matt add %r5, %r9, %r9
222 1.5 matt cmplw r_len, %r5 /* <= 2*CL bytes to move? */
223 1.1 mjl beqlr- cr1 /* then do nothing */
224 1.1 mjl
225 1.1 mjl blt+ simple_fill /* a trivial fill routine */
226 1.1 mjl
227 1.1 mjl /* Word align the block, fill bytewise until dst even*/
228 1.1 mjl
229 1.5 matt andi. %r5, r_dst, 0x03
230 1.5 matt li %r6, 4
231 1.1 mjl beq+ cb_aligned_w /* already aligned to word? */
232 1.1 mjl
233 1.5 matt subf %r5, %r5, %r6 /* bytes to fill to align4 */
234 1.1 mjl #if USE_STSWX
235 1.5 matt mtxer %r5
236 1.5 matt stswx %r0, 0, r_dst
237 1.5 matt add r_dst, %r5, r_dst
238 1.1 mjl #else
239 1.5 matt mtctr %r5
240 1.1 mjl
241 1.1 mjl subi r_dst, r_dst, 1
242 1.1 mjl 1: stbu r_val, 1(r_dst) /* Fill bytewise */
243 1.1 mjl bdnz 1b
244 1.1 mjl
245 1.1 mjl addi r_dst, r_dst, 1
246 1.1 mjl #endif
247 1.5 matt subf r_len, %r5, r_len
248 1.1 mjl
249 1.1 mjl cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
250 1.1 mjl
251 1.1 mjl /* I know I have something to do since we had > 2*CL initially */
252 1.1 mjl /* so no need to check for r_len = 0 */
253 1.1 mjl
254 1.6 hannken subi %r6, %r9, 1 /* CL mask */
255 1.6 hannken and. %r5, r_dst, %r6
256 1.6 hannken srwi %r5, %r5, 2
257 1.5 matt srwi %r6, %r9, 2
258 1.1 mjl beq cb_aligned_cb /* already on CL boundary? */
259 1.1 mjl
260 1.5 matt subf %r5, %r5, %r6 /* words to fill to alignment */
261 1.5 matt mtctr %r5
262 1.5 matt slwi %r5, %r5, 2
263 1.5 matt subf r_len, %r5, r_len
264 1.1 mjl
265 1.1 mjl subi r_dst, r_dst, 4
266 1.1 mjl 1: stwu r_val, 4(r_dst) /* Fill wordwise */
267 1.1 mjl bdnz 1b
268 1.1 mjl addi r_dst, r_dst, 4
269 1.1 mjl
270 1.1 mjl cb_aligned_cb: /* no need to check r_len, see above */
271 1.1 mjl
272 1.5 matt srw. %r5, r_len, %r10 /* Number of cache blocks */
273 1.5 matt mtctr %r5
274 1.1 mjl beq cblocks_done
275 1.1 mjl
276 1.5 matt slw %r5, %r5, %r10
277 1.5 matt subf r_len, %r5, r_len
278 1.1 mjl
279 1.1 mjl 1: dcbz 0, r_dst /* Clear blockwise */
280 1.5 matt add r_dst, r_dst, %r9
281 1.1 mjl bdnz 1b
282 1.1 mjl
283 1.1 mjl cblocks_done: /* still CL aligned, but less than CL bytes left */
284 1.1 mjl cmplwi cr1, r_len, 0
285 1.1 mjl cmplwi r_len, 8
286 1.1 mjl beq- cr1, sf_return
287 1.1 mjl
288 1.1 mjl blt- sf_bytewise /* <8 remaining? */
289 1.1 mjl b sf_aligned_w
290 1.1 mjl
291 1.1 mjl /*----------------------------------------------------------------------*/
292 1.1 mjl wbzero: li r_val, 0
293 1.1 mjl
294 1.1 mjl cmplwi r_len, 0
295 1.1 mjl beqlr- /* Nothing to do */
296 1.1 mjl
297 1.1 mjl simple_fill:
298 1.1 mjl #if USE_STSWX
299 1.1 mjl cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
300 1.1 mjl #else
301 1.1 mjl cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
302 1.1 mjl #endif
303 1.5 matt andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
304 1.1 mjl blt cr1, sf_bytewise /* trivial byte mover */
305 1.1 mjl
306 1.5 matt li %r6, 4
307 1.5 matt subf %r5, %r5, %r6
308 1.1 mjl beq+ sf_aligned_w /* dest is word aligned */
309 1.1 mjl
310 1.1 mjl #if USE_STSWX
311 1.5 matt mtxer %r5
312 1.5 matt stswx %r0, 0, r_dst
313 1.5 matt add r_dst, %r5, r_dst
314 1.1 mjl #else
315 1.5 matt mtctr %r5 /* nope, then fill bytewise */
316 1.1 mjl subi r_dst, r_dst, 1 /* until it is */
317 1.1 mjl 1: stbu r_val, 1(r_dst)
318 1.1 mjl bdnz 1b
319 1.1 mjl
320 1.1 mjl addi r_dst, r_dst, 1
321 1.1 mjl #endif
322 1.5 matt subf r_len, %r5, r_len
323 1.1 mjl
324 1.1 mjl sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
325 1.1 mjl #if USE_STSWX
326 1.5 matt mr %r6, %r0
327 1.5 matt mr %r7, %r0
328 1.1 mjl
329 1.5 matt srwi %r5, r_len, 3
330 1.5 matt mtctr %r5
331 1.1 mjl
332 1.5 matt slwi %r5, %r5, 3 /* adjust len */
333 1.5 matt subf. r_len, %r5, r_len
334 1.1 mjl
335 1.5 matt 1: stswi %r6, r_dst, 8
336 1.1 mjl addi r_dst, r_dst, 8
337 1.1 mjl bdnz 1b
338 1.1 mjl #else
339 1.5 matt srwi %r5, r_len, 2 /* words to fill */
340 1.5 matt mtctr %r5
341 1.1 mjl
342 1.5 matt slwi %r5, %r5, 2
343 1.5 matt subf. r_len, %r5, r_len /* adjust len for fill */
344 1.1 mjl
345 1.1 mjl subi r_dst, r_dst, 4
346 1.1 mjl 1: stwu r_val, 4(r_dst)
347 1.1 mjl bdnz 1b
348 1.1 mjl addi r_dst, r_dst, 4
349 1.1 mjl #endif
350 1.1 mjl
351 1.1 mjl sf_word_done: bne- sf_bytewise
352 1.1 mjl
353 1.5 matt sf_return: mr %r3, %r8 /* restore orig ptr */
354 1.1 mjl blr /* for memset functionality */
355 1.1 mjl
356 1.1 mjl sf_bytewise:
357 1.1 mjl #if USE_STSWX
358 1.5 matt mr %r5, %r0
359 1.5 matt mr %r6, %r0
360 1.5 matt mr %r7, %r0
361 1.1 mjl
362 1.1 mjl mtxer r_len
363 1.5 matt stswx %r5, 0, r_dst
364 1.1 mjl #else
365 1.1 mjl mtctr r_len
366 1.1 mjl
367 1.1 mjl subi r_dst, r_dst, 1
368 1.1 mjl 1: stbu r_val, 1(r_dst)
369 1.1 mjl bdnz 1b
370 1.1 mjl #endif
371 1.5 matt mr %r3, %r8 /* restore orig ptr */
372 1.1 mjl blr /* for memset functionality */
373 1.8 matt END(memset)
374 1.1 mjl
375 1.1 mjl /*----------------------------------------------------------------------*/
376 1.3 mjl #ifndef _KERNEL
377 1.1 mjl .data
378 1.4 eeh cache_info: .long -1, -1, -1, -1
379 1.1 mjl cache_sh: .long 0
380 1.1 mjl
381 1.3 mjl #endif
382 1.1 mjl /*----------------------------------------------------------------------*/
383