bzero.S revision 1.12 1 /* $NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $ */
2
3 /*-
4 * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 /*----------------------------------------------------------------------*/
30
31 #include <machine/asm.h>
32
33
34 #if defined(LIBC_SCCS) && !defined(lint)
35 __RCSID("$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $")
36 #endif /* LIBC_SCCS && !lint */
37
38 #ifdef _KERNEL
39 #include <assym.h>
40 #endif
41
42 #define USE_STSWX 0 /* don't. slower than trivial copy loop */
43
44 /*----------------------------------------------------------------------*/
45 /*
46 void bzero(void *b %r3, size_t len %r4);
47 void * memset(void *b %r3, int c %r4, size_t len %r5);
48 */
49 /*----------------------------------------------------------------------*/
50
51 #define r_dst %r3
52 #define r_len %r4
53 #define r_val %r0
54
55 .text
56 .align 4
57 ENTRY(bzero)
58 li r_val, 0 /* Value to stuff in */
59 b cb_memset
60 END(bzero)
61
62 ENTRY(memset)
63 cmplwi %cr1, %r5, 0
64 mr. %r0, %r4
65 mr %r8, %r3
66 beqlr- %cr1 /* Nothing to do */
67
68 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
69 rlwimi %r0, %r0, 16, 0, 15
70 mr %r4, %r5
71 bne- simple_fill /* =! 0, use trivial fill */
72 cb_memset:
73
74 /*----------------------------------------------------------------------*/
75 #ifndef _KERNEL
76 /* First find out cache line size */
77 mflr %r9
78 #ifdef PIC
79 bcl 20,31,1f
80 1: mflr %r5
81 mtlr %r9
82 addis %r5,%r5,cache_info+4-1b@ha
83 lwzu %r9,cache_info+4-1b@l(%r5)
84 #else
85 lis %r5,cache_info+4@ha
86 lwzu %r9,cache_info+4@l(%r5)
87 #endif
88 lwz %r10,cache_sh-(cache_info+4)(%r5)
89 cmpwi %r9, -1
90 bne+ cb_cacheline_known
91
92 addi %r5, %r5, -4 /* point r5 @ beginning of cache_info */
93
94 /*----------------------------------------------------------------------*/
95 #define CTL_MACHDEP 7
96 #define CPU_CACHELINE 1
97 #define CPU_CACHEINFO 5
98
99 #define STKFRAME_SZ 64
100 #define MIB 8
101 #define OLDPLEN 16
102 #define R3_SAVE 20
103 #define R4_SAVE 24
104 #define R0_SAVE 28
105 #define R8_SAVE 32
106 #define R31_SAVE 36
107 #ifdef PIC
108 #define R30_SAVE 40
109 #endif
110
111 stw %r9, 4(%r1)
112 stwu %r1, -STKFRAME_SZ(%r1)
113
114 stw %r31, R31_SAVE(%r1)
115 mr %r31, %r5 /* cache info */
116
117 #ifdef PIC
118 stw %r30, R30_SAVE(%r1)
119 PIC_TOCSETUP(cb_memset,%r30)
120 #endif
121
122 stw %r8, R8_SAVE(%r1)
123 stw %r3, R3_SAVE(%r1)
124 stw %r4, R4_SAVE(%r1)
125 stw %r0, R0_SAVE(%r1)
126
127 li %r0, CTL_MACHDEP /* Construct MIB */
128 stw %r0, MIB(%r1)
129 li %r0, CPU_CACHEINFO
130 stw %r0, MIB+4(%r1)
131
132 li %r0, 4*4 /* Oldlenp := 4*4 */
133 stw %r0, OLDPLEN(%r1)
134
135 addi %r3, %r1, MIB
136 li %r4, 2 /* namelen */
137 /* %r5 already contains &cache_info */
138 addi %r6, %r1, OLDPLEN
139 li %r7, 0
140 li %r8, 0
141 bl PIC_PLT(_C_LABEL(sysctl))
142
143 cmpwi %r3, 0 /* Check result */
144 beq 1f
145
146 /* Failure, try older sysctl */
147
148 li %r0, CTL_MACHDEP /* Construct MIB */
149 stw %r0, MIB(%r1)
150 li %r0, CPU_CACHELINE
151 stw %r0, MIB+4(%r1)
152
153 li %r0, 4 /* Oldlenp := 4 */
154 stw %r0, OLDPLEN(%r1)
155
156 addi %r3, %r1, MIB
157 li %r4, 2 /* namelen */
158 addi %r5, %r31, 4
159 addi %r6, %r1, OLDPLEN
160 li %r7, 0
161 li %r8, 0
162 bl PIC_PLT(_C_LABEL(sysctl))
163 1:
164 lwz %r3, R3_SAVE(%r1)
165 lwz %r4, R4_SAVE(%r1)
166 lwz %r8, R8_SAVE(%r1)
167 lwz %r0, R0_SAVE(%r1)
168 lwz %r9, 4(%r31)
169 lwz %r31, R31_SAVE(%r1)
170 #ifdef PIC
171 lwz %r30, R30_SAVE(%r1)
172 #endif
173 addi %r1, %r1, STKFRAME_SZ
174 lwz %r7, 4(%r1)
175 mtlr %r7
176
177 cntlzw %r6, %r9 /* compute shift value */
178 li %r5, 31
179 subf %r10, %r6, %r5
180
181 #ifdef PIC
182 mflr %r9
183 bcl 20,31,1f
184 1: mflr %r5
185 mtlr %r9
186
187 addis %r5, %r5, cache_info+4-1b@ha
188 lwzu %r9, cache_info+4-1b@l(%r5)
189 #else
190 lis %r5, cache_info+4@ha
191 lwzu %r9, cache_info+4@l(%r5)
192 #endif
193 stw %r10, cache_sh-(cache_info+4)(%r5)
194
195 /*----------------------------------------------------------------------*/
196 /* Okay, we know the cache line size (%r9) and shift value (%r10) */
197 cb_cacheline_known:
198 #else /* _KERNEL */
199 #ifdef MULTIPROCESSOR
200 mfsprg %r10, 0 /* Get cpu_info pointer */
201 #else
202 lis %r10, cpu_info_store@ha
203 addi %r10, %r10, cpu_info_store@l
204 #endif
205 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */
206 cntlzw %r10, %r9 /* Calculate shift.. */
207 li %r6, 31
208 subf %r10, %r10, %r6
209 #endif /* _KERNEL */
210 /* Back in memory filling business */
211
212 cmplwi %cr1, r_len, 0 /* Nothing to do? */
213 add %r5, %r9, %r9
214 cmplw r_len, %r5 /* <= 2*CL bytes to move? */
215 beqlr- %cr1 /* then do nothing */
216
217 blt+ simple_fill /* a trivial fill routine */
218
219 /* Word align the block, fill bytewise until dst even*/
220
221 andi. %r5, r_dst, 0x03
222 li %r6, 4
223 beq+ cb_aligned_w /* already aligned to word? */
224
225 subf %r5, %r5, %r6 /* bytes to fill to align4 */
226 #if USE_STSWX
227 mtxer %r5
228 stswx %r0, 0, r_dst
229 add r_dst, %r5, r_dst
230 #else
231 mtctr %r5
232
233 subi r_dst, r_dst, 1
234 1: stbu r_val, 1(r_dst) /* Fill bytewise */
235 bdnz 1b
236
237 addi r_dst, r_dst, 1
238 #endif
239 subf r_len, %r5, r_len
240
241 cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
242
243 /* I know I have something to do since we had > 2*CL initially */
244 /* so no need to check for r_len = 0 */
245
246 subi %r6, %r9, 1 /* CL mask */
247 and. %r5, r_dst, %r6
248 srwi %r5, %r5, 2
249 srwi %r6, %r9, 2
250 beq cb_aligned_cb /* already on CL boundary? */
251
252 subf %r5, %r5, %r6 /* words to fill to alignment */
253 mtctr %r5
254 slwi %r5, %r5, 2
255 subf r_len, %r5, r_len
256
257 subi r_dst, r_dst, 4
258 1: stwu r_val, 4(r_dst) /* Fill wordwise */
259 bdnz 1b
260 addi r_dst, r_dst, 4
261
262 cb_aligned_cb: /* no need to check r_len, see above */
263
264 srw. %r5, r_len, %r10 /* Number of cache blocks */
265 mtctr %r5
266 beq cblocks_done
267
268 slw %r5, %r5, %r10
269 subf r_len, %r5, r_len
270
271 1: dcbz 0, r_dst /* Clear blockwise */
272 add r_dst, r_dst, %r9
273 bdnz 1b
274
275 cblocks_done: /* still CL aligned, but less than CL bytes left */
276 cmplwi %cr1, r_len, 0
277 cmplwi r_len, 8
278 beq- %cr1, sf_return
279
280 blt- sf_bytewise /* <8 remaining? */
281 b sf_aligned_w
282
283 /*----------------------------------------------------------------------*/
284 wbzero: li r_val, 0
285
286 cmplwi r_len, 0
287 beqlr- /* Nothing to do */
288
289 simple_fill:
290 #if USE_STSWX
291 cmplwi %cr1, r_len, 12 /* < 12 bytes to move? */
292 #else
293 cmplwi %cr1, r_len, 8 /* < 8 bytes to move? */
294 #endif
295 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
296 blt %cr1, sf_bytewise /* trivial byte mover */
297
298 li %r6, 4
299 subf %r5, %r5, %r6
300 beq+ sf_aligned_w /* dest is word aligned */
301
302 #if USE_STSWX
303 mtxer %r5
304 stswx %r0, 0, r_dst
305 add r_dst, %r5, r_dst
306 #else
307 mtctr %r5 /* nope, then fill bytewise */
308 subi r_dst, r_dst, 1 /* until it is */
309 1: stbu r_val, 1(r_dst)
310 bdnz 1b
311
312 addi r_dst, r_dst, 1
313 #endif
314 subf r_len, %r5, r_len
315
316 sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
317 #if USE_STSWX
318 mr %r6, %r0
319 mr %r7, %r0
320
321 srwi %r5, r_len, 3
322 mtctr %r5
323
324 slwi %r5, %r5, 3 /* adjust len */
325 subf. r_len, %r5, r_len
326
327 1: stswi %r6, r_dst, 8
328 addi r_dst, r_dst, 8
329 bdnz 1b
330 #else
331 srwi %r5, r_len, 2 /* words to fill */
332 mtctr %r5
333
334 slwi %r5, %r5, 2
335 subf. r_len, %r5, r_len /* adjust len for fill */
336
337 subi r_dst, r_dst, 4
338 1: stwu r_val, 4(r_dst)
339 bdnz 1b
340 addi r_dst, r_dst, 4
341 #endif
342
343 sf_word_done: bne- sf_bytewise
344
345 sf_return: mr %r3, %r8 /* restore orig ptr */
346 blr /* for memset functionality */
347
348 sf_bytewise:
349 #if USE_STSWX
350 mr %r5, %r0
351 mr %r6, %r0
352 mr %r7, %r0
353
354 mtxer r_len
355 stswx %r5, 0, r_dst
356 #else
357 mtctr r_len
358
359 subi r_dst, r_dst, 1
360 1: stbu r_val, 1(r_dst)
361 bdnz 1b
362 #endif
363 mr %r3, %r8 /* restore orig ptr */
364 blr /* for memset functionality */
365 END(memset)
366
367 /*----------------------------------------------------------------------*/
368 #ifndef _KERNEL
369 .data
370 .p2align 2
371 cache_info: .long -1, -1, -1, -1
372 cache_sh: .long 0
373
374 #endif
375 /*----------------------------------------------------------------------*/
376