bzero.S revision 1.2 1 /* $NetBSD: bzero.S,v 1.2 2001/11/29 00:20:37 mjl Exp $ */
2
3 /*-
4 * Copyright (C) 2001 Martin J. Laubach <mjl (at) netbsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 /*----------------------------------------------------------------------*/
30
31 #include <machine/asm.h>
32 #ifdef _KERNEL
33 #include <assym.h>
34 #endif
35
36 #define USE_STSWX 0 /* don't. slower than trivial copy loop */
37
38 /*----------------------------------------------------------------------*/
39 /*
40 void bzero(void *b r3, size_t len r4);
41 void * memset(void *b r3, int c r4, size_t len r5);
42 */
43 /*----------------------------------------------------------------------*/
44
45 #define r_dst r3
46 #define r_len r4
47 #define r_val r0
48
49 .text
50 .align 4
51 ENTRY(bzero)
52 li r_val, 0 /* Value to stuff in */
53 b cb_memset
54
55 ENTRY(memset)
56 cmplwi cr1, r5, 0
57 mr. r0, r4
58 mr r8, r3
59 beqlr- cr1 /* Nothing to do */
60
61 rlwimi r0, r4, 8, 16, 23 /* word extend fill value */
62 rlwimi r0, r0, 16, 0, 15
63 mr r4, r5
64 bne- simple_fill /* =! 0, use trivial fill */
65 cb_memset:
66
67 /*----------------------------------------------------------------------*/
68 #ifndef _KERNEL
69 /* First find out cache line size */
70 #ifdef PIC
71 mflr r9
72 bl _GLOBAL_OFFSET_TABLE_@local-4
73 mflr r10
74 mtlr r9
75 lwz r5,cache_size@got(r10)
76 #else
77 lis r5,cache_size@h
78 ori r5,r5,cache_size@l
79 #endif
80 lwz r6, 0(r5)
81 cmpwi r6, -1
82 bne+ cb_cacheline_known
83
84 /*----------------------------------------------------------------------*/
85 #define CTL_MACHDEP 7
86 #define CPU_CACHELINE 1
87
88 #define STKFRAME_SZ 48
89 #define MIB 8
90 #define OLDPLEN 16
91 #define R3_SAVE 20
92 #define R4_SAVE 24
93 #define R0_SAVE 28
94 #define R8_SAVE 32
95
96 mflr r6
97 stw r6, 4(r1)
98 stwu r1, -STKFRAME_SZ(r1)
99
100 stw r8, R8_SAVE(r1)
101 stw r3, R3_SAVE(r1)
102 stw r4, R4_SAVE(r1)
103 stw r0, R0_SAVE(r1)
104
105 li r0, CTL_MACHDEP /* Construct MIB */
106 stw r0, MIB(r1)
107 li r0, CPU_CACHELINE
108 stw r0, MIB+4(r1)
109
110 li r0, 4 /* Oldlenp := 4 */
111 stw r0, OLDPLEN(r1)
112
113 addi r3, r1, MIB
114 li r4, 2 /* namelen */
115 /* r5 already contains &cache_size */
116 addi r6, r1, OLDPLEN
117 li r7, 0
118 li r8, 0
119 bl PIC_PLT(_C_LABEL(sysctl))
120
121 lwz r8, R8_SAVE(r1)
122 lwz r3, R3_SAVE(r1)
123 lwz r4, R4_SAVE(r1)
124 lwz r0, R0_SAVE(r1)
125
126 #ifdef PIC
127 bl _GLOBAL_OFFSET_TABLE_@local-4
128 mflr r10
129 lwz r9, cache_size@got(r10)
130 lwz r9, 0(r9)
131 #else
132 lis r5, cache_size@ha
133 lwz r9, cache_size@l(r5)
134 #endif
135 la r1, STKFRAME_SZ(r1)
136 lwz r5, 4(r1)
137 mtlr r5
138
139 cntlzw r6, r9 /* compute shift value */
140 li r5, 31
141 subf r5, r6, r5
142
143 #ifdef PIC
144 lwz r6, cache_sh@got(r10)
145 stw r5, 0(r6)
146 #else
147 lis r6, cache_sh@ha
148 stw r5, cache_sh@l(r6)
149 #endif
150 /*----------------------------------------------------------------------*/
151 /* Okay, we know the cache line size (r9) and shift value (r10) */
152 cb_cacheline_known:
153 #ifdef PIC
154 lwz r5, cache_size@got(r10)
155 lwz r9, 0(r5)
156 lwz r5, cache_sh@got(r10)
157 lwz r10, 0(r5)
158 #else
159 lis r9, cache_size@ha
160 lwz r9, cache_size@l(r9)
161 lis r10, cache_sh@ha
162 lwz r10, cache_sh@l(r10)
163 #endif
164
165 #else /* _KERNEL */
166 li r9, CACHELINESIZE
167 #if CACHELINESIZE == 32
168 #define CACHELINESHIFT 5
169 #else
170 #error Define CACHELINESHIFT for your CACHELINESIZE
171 #endif
172 li r10, CACHELINESHIFT
173 #endif /* _KERNEL */
174 /* Back in memory filling business */
175
176 cmplwi cr1, r_len, 0 /* Nothing to do? */
177 add r5, r9, r9
178 cmplw r_len, r5 /* <= 2*CL bytes to move? */
179 beqlr- cr1 /* then do nothing */
180
181 blt+ simple_fill /* a trivial fill routine */
182
183 /* Word align the block, fill bytewise until dst even*/
184
185 andi. r5, r_dst, 0x03
186 li r6, 4
187 beq+ cb_aligned_w /* already aligned to word? */
188
189 subf r5, r5, r6 /* bytes to fill to align4 */
190 #if USE_STSWX
191 mtxer r5
192 stswx r0, 0, r_dst
193 add r_dst, r5, r_dst
194 #else
195 mtctr r5
196
197 subi r_dst, r_dst, 1
198 1: stbu r_val, 1(r_dst) /* Fill bytewise */
199 bdnz 1b
200
201 addi r_dst, r_dst, 1
202 #endif
203 subf r_len, r5, r_len
204
205 cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
206
207 /* I know I have something to do since we had > 2*CL initially */
208 /* so no need to check for r_len = 0 */
209
210 rlwinm. r5, r_dst, 30, 29, 31
211 srwi r6, r9, 2
212 beq cb_aligned_cb /* already on CL boundary? */
213
214 subf r5, r5, r6 /* words to fill to alignment */
215 mtctr r5
216 slwi r5, r5, 2
217 subf r_len, r5, r_len
218
219 subi r_dst, r_dst, 4
220 1: stwu r_val, 4(r_dst) /* Fill wordwise */
221 bdnz 1b
222 addi r_dst, r_dst, 4
223
224 cb_aligned_cb: /* no need to check r_len, see above */
225
226 srw. r5, r_len, r10 /* Number of cache blocks */
227 mtctr r5
228 beq cblocks_done
229
230 slw r5, r5, r10
231 subf r_len, r5, r_len
232
233 1: dcbz 0, r_dst /* Clear blockwise */
234 add r_dst, r_dst, r9
235 bdnz 1b
236
237 cblocks_done: /* still CL aligned, but less than CL bytes left */
238 cmplwi cr1, r_len, 0
239 cmplwi r_len, 8
240 beq- cr1, sf_return
241
242 blt- sf_bytewise /* <8 remaining? */
243 b sf_aligned_w
244
245 /*----------------------------------------------------------------------*/
246 wbzero: li r_val, 0
247
248 cmplwi r_len, 0
249 beqlr- /* Nothing to do */
250
251 simple_fill:
252 #if USE_STSWX
253 cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
254 #else
255 cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
256 #endif
257 andi. r5, r_dst, 0x03 /* bytes to fill to align4 */
258 blt cr1, sf_bytewise /* trivial byte mover */
259
260 li r6, 4
261 subf r5, r5, r6
262 beq+ sf_aligned_w /* dest is word aligned */
263
264 #if USE_STSWX
265 mtxer r5
266 stswx r0, 0, r_dst
267 add r_dst, r5, r_dst
268 #else
269 mtctr r5 /* nope, then fill bytewise */
270 subi r_dst, r_dst, 1 /* until it is */
271 1: stbu r_val, 1(r_dst)
272 bdnz 1b
273
274 addi r_dst, r_dst, 1
275 #endif
276 subf r_len, r5, r_len
277
278 sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
279 #if USE_STSWX
280 mr r6, r0
281 mr r7, r0
282
283 srwi r5, r_len, 3
284 mtctr r5
285
286 slwi r5, r5, 3 /* adjust len */
287 subf. r_len, r5, r_len
288
289 1: stswi r6, r_dst, 8
290 addi r_dst, r_dst, 8
291 bdnz 1b
292 #else
293 srwi r5, r_len, 2 /* words to fill */
294 mtctr r5
295
296 slwi r5, r5, 2
297 subf. r_len, r5, r_len /* adjust len for fill */
298
299 subi r_dst, r_dst, 4
300 1: stwu r_val, 4(r_dst)
301 bdnz 1b
302 addi r_dst, r_dst, 4
303 #endif
304
305 sf_word_done: bne- sf_bytewise
306
307 sf_return: mr r3, r8 /* restore orig ptr */
308 blr /* for memset functionality */
309
310 sf_bytewise:
311 #if USE_STSWX
312 mr r5, r0
313 mr r6, r0
314 mr r7, r0
315
316 mtxer r_len
317 stswx r5, 0, r_dst
318 #else
319 mtctr r_len
320
321 subi r_dst, r_dst, 1
322 1: stbu r_val, 1(r_dst)
323 bdnz 1b
324 #endif
325 mr r3, r8 /* restore orig ptr */
326 blr /* for memset functionality */
327
328 /*----------------------------------------------------------------------*/
329
330 .data
331 cache_size: .long -1
332 cache_sh: .long 0
333
334 /*----------------------------------------------------------------------*/
335