bzero.S revision 1.1 1 1.1 mjl /* $NetBSD: bzero.S,v 1.1 2001/11/25 01:09:59 mjl Exp $ */
2 1.1 mjl
3 1.1 mjl /*-
4 1.1 mjl * Copyright (C) 2001 Martin J. Laubach <mjl (at) netbsd.org>
5 1.1 mjl * All rights reserved.
6 1.1 mjl *
7 1.1 mjl * Redistribution and use in source and binary forms, with or without
8 1.1 mjl * modification, are permitted provided that the following conditions
9 1.1 mjl * are met:
10 1.1 mjl * 1. Redistributions of source code must retain the above copyright
11 1.1 mjl * notice, this list of conditions and the following disclaimer.
12 1.1 mjl * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 mjl * notice, this list of conditions and the following disclaimer in the
14 1.1 mjl * documentation and/or other materials provided with the distribution.
15 1.1 mjl * 3. The name of the author may not be used to endorse or promote products
16 1.1 mjl * derived from this software without specific prior written permission.
17 1.1 mjl *
18 1.1 mjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 mjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 mjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 mjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 mjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 mjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 mjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 mjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 mjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 mjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 mjl */
29 1.1 mjl /*----------------------------------------------------------------------*/
30 1.1 mjl
31 1.1 mjl #include <machine/asm.h>
32 1.1 mjl
33 1.1 mjl #define USE_STSWX 0 /* don't. slower than trivial copy loop */
34 1.1 mjl
35 1.1 mjl /*----------------------------------------------------------------------*/
36 1.1 mjl /*
37 1.1 mjl void bzero(void *b r3, size_t len r4);
38 1.1 mjl void * memset(void *b r3, int c r4, size_t len r5);
39 1.1 mjl */
40 1.1 mjl /*----------------------------------------------------------------------*/
41 1.1 mjl
42 1.1 mjl #define r_dst r3
43 1.1 mjl #define r_len r4
44 1.1 mjl #define r_val r0
45 1.1 mjl
46 1.1 mjl .text
47 1.1 mjl .align 4
48 1.1 mjl ENTRY(bzero)
49 1.1 mjl li r_val, 0 /* Value to stuff in */
50 1.1 mjl b cb_memset
51 1.1 mjl
52 1.1 mjl ENTRY(memset)
53 1.1 mjl cmplwi cr1, r5, 0
54 1.1 mjl mr. r0, r4
55 1.1 mjl mr r8, r3
56 1.1 mjl beqlr- cr1 /* Nothing to do */
57 1.1 mjl
58 1.1 mjl rlwimi r0, r4, 8, 16, 23 /* word extend fill value */
59 1.1 mjl rlwimi r0, r0, 16, 0, 15
60 1.1 mjl mr r4, r5
61 1.1 mjl bne- simple_fill /* =! 0, use trivial fill */
62 1.1 mjl cb_memset:
63 1.1 mjl
64 1.1 mjl /*----------------------------------------------------------------------*/
65 1.1 mjl /* First find out cache line size */
66 1.1 mjl #ifdef PIC
67 1.1 mjl mflr r9
68 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
69 1.1 mjl mflr r10
70 1.1 mjl mtlr r9
71 1.1 mjl lwz r5,cache_size@got(r10)
72 1.1 mjl #else
73 1.1 mjl lis r5,cache_size@h
74 1.1 mjl ori r5,r5,cache_size@l
75 1.1 mjl #endif
76 1.1 mjl lwz r6, 0(r5)
77 1.1 mjl cmpwi r6, -1
78 1.1 mjl bne+ cb_cacheline_known
79 1.1 mjl
80 1.1 mjl /*----------------------------------------------------------------------*/
81 1.1 mjl #define CTL_MACHDEP 7
82 1.1 mjl #define CPU_CACHELINE 1
83 1.1 mjl
84 1.1 mjl #define STKFRAME_SZ 48
85 1.1 mjl #define MIB 8
86 1.1 mjl #define OLDPLEN 16
87 1.1 mjl #define R3_SAVE 20
88 1.1 mjl #define R4_SAVE 24
89 1.1 mjl #define R0_SAVE 28
90 1.1 mjl #define R8_SAVE 32
91 1.1 mjl
92 1.1 mjl mflr r6
93 1.1 mjl stw r6, 4(r1)
94 1.1 mjl stwu r1, -STKFRAME_SZ(r1)
95 1.1 mjl
96 1.1 mjl stw r8, R8_SAVE(r1)
97 1.1 mjl stw r3, R3_SAVE(r1)
98 1.1 mjl stw r4, R4_SAVE(r1)
99 1.1 mjl stw r0, R0_SAVE(r1)
100 1.1 mjl
101 1.1 mjl li r0, CTL_MACHDEP /* Construct MIB */
102 1.1 mjl stw r0, MIB(r1)
103 1.1 mjl li r0, CPU_CACHELINE
104 1.1 mjl stw r0, MIB+4(r1)
105 1.1 mjl
106 1.1 mjl li r0, 4 /* Oldlenp := 4 */
107 1.1 mjl stw r0, OLDPLEN(r1)
108 1.1 mjl
109 1.1 mjl addi r3, r1, MIB
110 1.1 mjl li r4, 2 /* namelen */
111 1.1 mjl /* r5 already contains &cache_size */
112 1.1 mjl addi r6, r1, OLDPLEN
113 1.1 mjl li r7, 0
114 1.1 mjl li r8, 0
115 1.1 mjl bl PIC_PLT(_C_LABEL(sysctl))
116 1.1 mjl
117 1.1 mjl lwz r8, R8_SAVE(r1)
118 1.1 mjl lwz r3, R3_SAVE(r1)
119 1.1 mjl lwz r4, R4_SAVE(r1)
120 1.1 mjl lwz r0, R0_SAVE(r1)
121 1.1 mjl
122 1.1 mjl #ifdef PIC
123 1.1 mjl bl _GLOBAL_OFFSET_TABLE_@local-4
124 1.1 mjl mflr r10
125 1.1 mjl lwz r9, cache_size@got(r10)
126 1.1 mjl lwz r9, 0(r9)
127 1.1 mjl #else
128 1.1 mjl lis r5, cache_size@ha
129 1.1 mjl lwz r9, cache_size@l(r5)
130 1.1 mjl #endif
131 1.1 mjl la r1, STKFRAME_SZ(r1)
132 1.1 mjl lwz r5, 4(r1)
133 1.1 mjl mtlr r5
134 1.1 mjl
135 1.1 mjl cntlzw r6, r9 /* compute shift value */
136 1.1 mjl li r5, 31
137 1.1 mjl subf r5, r6, r5
138 1.1 mjl
139 1.1 mjl #ifdef PIC
140 1.1 mjl lwz r6, cache_sh@got(r10)
141 1.1 mjl stw r5, 0(r6)
142 1.1 mjl #else
143 1.1 mjl lis r6, cache_sh@ha
144 1.1 mjl stw r5, cache_sh@l(r6)
145 1.1 mjl #endif
146 1.1 mjl /*----------------------------------------------------------------------*/
147 1.1 mjl /* Okay, we know the cache line size (r9) and shift value (r10) */
148 1.1 mjl cb_cacheline_known:
149 1.1 mjl #ifdef PIC
150 1.1 mjl lwz r5, cache_size@got(r10)
151 1.1 mjl lwz r9, 0(r5)
152 1.1 mjl lwz r5, cache_sh@got(r10)
153 1.1 mjl lwz r10, 0(r5)
154 1.1 mjl #else
155 1.1 mjl lis r9, cache_size@ha
156 1.1 mjl lwz r9, cache_size@l(r9)
157 1.1 mjl lis r10, cache_sh@ha
158 1.1 mjl lwz r10, cache_sh@l(r10)
159 1.1 mjl #endif
160 1.1 mjl /* Back in memory filling business */
161 1.1 mjl
162 1.1 mjl cmplwi cr1, r_len, 0 /* Nothing to do? */
163 1.1 mjl add r5, r9, r9
164 1.1 mjl cmplw r_len, r5 /* <= 2*CL bytes to move? */
165 1.1 mjl beqlr- cr1 /* then do nothing */
166 1.1 mjl
167 1.1 mjl blt+ simple_fill /* a trivial fill routine */
168 1.1 mjl
169 1.1 mjl /* Word align the block, fill bytewise until dst even*/
170 1.1 mjl
171 1.1 mjl andi. r5, r_dst, 0x03
172 1.1 mjl li r6, 4
173 1.1 mjl beq+ cb_aligned_w /* already aligned to word? */
174 1.1 mjl
175 1.1 mjl subf r5, r5, r6 /* bytes to fill to align4 */
176 1.1 mjl #if USE_STSWX
177 1.1 mjl mtxer r5
178 1.1 mjl stswx r0, 0, r_dst
179 1.1 mjl add r_dst, r5, r_dst
180 1.1 mjl #else
181 1.1 mjl mtctr r5
182 1.1 mjl
183 1.1 mjl subi r_dst, r_dst, 1
184 1.1 mjl 1: stbu r_val, 1(r_dst) /* Fill bytewise */
185 1.1 mjl bdnz 1b
186 1.1 mjl
187 1.1 mjl addi r_dst, r_dst, 1
188 1.1 mjl #endif
189 1.1 mjl subf r_len, r5, r_len
190 1.1 mjl
191 1.1 mjl cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
192 1.1 mjl
193 1.1 mjl /* I know I have something to do since we had > 2*CL initially */
194 1.1 mjl /* so no need to check for r_len = 0 */
195 1.1 mjl
196 1.1 mjl rlwinm. r5, r_dst, 30, 29, 31
197 1.1 mjl srwi r6, r9, 2
198 1.1 mjl beq cb_aligned_cb /* already on CL boundary? */
199 1.1 mjl
200 1.1 mjl subf r5, r5, r6 /* words to fill to alignment */
201 1.1 mjl mtctr r5
202 1.1 mjl slwi r5, r5, 2
203 1.1 mjl subf r_len, r5, r_len
204 1.1 mjl
205 1.1 mjl subi r_dst, r_dst, 4
206 1.1 mjl 1: stwu r_val, 4(r_dst) /* Fill wordwise */
207 1.1 mjl bdnz 1b
208 1.1 mjl addi r_dst, r_dst, 4
209 1.1 mjl
210 1.1 mjl cb_aligned_cb: /* no need to check r_len, see above */
211 1.1 mjl
212 1.1 mjl srw. r5, r_len, r10 /* Number of cache blocks */
213 1.1 mjl mtctr r5
214 1.1 mjl beq cblocks_done
215 1.1 mjl
216 1.1 mjl slw r5, r5, r10
217 1.1 mjl subf r_len, r5, r_len
218 1.1 mjl
219 1.1 mjl 1: dcbz 0, r_dst /* Clear blockwise */
220 1.1 mjl add r_dst, r_dst, r9
221 1.1 mjl bdnz 1b
222 1.1 mjl
223 1.1 mjl cblocks_done: /* still CL aligned, but less than CL bytes left */
224 1.1 mjl cmplwi cr1, r_len, 0
225 1.1 mjl cmplwi r_len, 8
226 1.1 mjl beq- cr1, sf_return
227 1.1 mjl
228 1.1 mjl blt- sf_bytewise /* <8 remaining? */
229 1.1 mjl b sf_aligned_w
230 1.1 mjl
231 1.1 mjl /*----------------------------------------------------------------------*/
232 1.1 mjl wbzero: li r_val, 0
233 1.1 mjl
234 1.1 mjl cmplwi r_len, 0
235 1.1 mjl beqlr- /* Nothing to do */
236 1.1 mjl
237 1.1 mjl simple_fill:
238 1.1 mjl #if USE_STSWX
239 1.1 mjl cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
240 1.1 mjl #else
241 1.1 mjl cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
242 1.1 mjl #endif
243 1.1 mjl andi. r5, r_dst, 0x03 /* bytes to fill to align4 */
244 1.1 mjl blt cr1, sf_bytewise /* trivial byte mover */
245 1.1 mjl
246 1.1 mjl li r6, 4
247 1.1 mjl subf r5, r5, r6
248 1.1 mjl beq+ sf_aligned_w /* dest is word aligned */
249 1.1 mjl
250 1.1 mjl #if USE_STSWX
251 1.1 mjl mtxer r5
252 1.1 mjl stswx r0, 0, r_dst
253 1.1 mjl add r_dst, r5, r_dst
254 1.1 mjl #else
255 1.1 mjl mtctr r5 /* nope, then fill bytewise */
256 1.1 mjl subi r_dst, r_dst, 1 /* until it is */
257 1.1 mjl 1: stbu r_val, 1(r_dst)
258 1.1 mjl bdnz 1b
259 1.1 mjl
260 1.1 mjl addi r_dst, r_dst, 1
261 1.1 mjl #endif
262 1.1 mjl subf r_len, r5, r_len
263 1.1 mjl
264 1.1 mjl sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
265 1.1 mjl #if USE_STSWX
266 1.1 mjl mr r6, r0
267 1.1 mjl mr r7, r0
268 1.1 mjl
269 1.1 mjl srwi r5, r_len, 3
270 1.1 mjl mtctr r5
271 1.1 mjl
272 1.1 mjl slwi r5, r5, 3 /* adjust len */
273 1.1 mjl subf. r_len, r5, r_len
274 1.1 mjl
275 1.1 mjl 1: stswi r6, r_dst, 8
276 1.1 mjl addi r_dst, r_dst, 8
277 1.1 mjl bdnz 1b
278 1.1 mjl #else
279 1.1 mjl srwi r5, r_len, 2 /* words to fill */
280 1.1 mjl mtctr r5
281 1.1 mjl
282 1.1 mjl slwi r5, r5, 2
283 1.1 mjl subf. r_len, r5, r_len /* adjust len for fill */
284 1.1 mjl
285 1.1 mjl subi r_dst, r_dst, 4
286 1.1 mjl 1: stwu r_val, 4(r_dst)
287 1.1 mjl bdnz 1b
288 1.1 mjl addi r_dst, r_dst, 4
289 1.1 mjl #endif
290 1.1 mjl
291 1.1 mjl sf_word_done: bne- sf_bytewise
292 1.1 mjl
293 1.1 mjl sf_return: mr r3, r8 /* restore orig ptr */
294 1.1 mjl blr /* for memset functionality */
295 1.1 mjl
296 1.1 mjl sf_bytewise:
297 1.1 mjl #if USE_STSWX
298 1.1 mjl mr r5, r0
299 1.1 mjl mr r6, r0
300 1.1 mjl mr r7, r0
301 1.1 mjl
302 1.1 mjl mtxer r_len
303 1.1 mjl stswx r5, 0, r_dst
304 1.1 mjl #else
305 1.1 mjl mtctr r_len
306 1.1 mjl
307 1.1 mjl subi r_dst, r_dst, 1
308 1.1 mjl 1: stbu r_val, 1(r_dst)
309 1.1 mjl bdnz 1b
310 1.1 mjl #endif
311 1.1 mjl mr r3, r8 /* restore orig ptr */
312 1.1 mjl blr /* for memset functionality */
313 1.1 mjl
314 1.1 mjl /*----------------------------------------------------------------------*/
315 1.1 mjl
316 1.1 mjl .data
317 1.1 mjl cache_size: .long -1
318 1.1 mjl cache_sh: .long 0
319 1.1 mjl
320 1.1 mjl /*----------------------------------------------------------------------*/
321