bzero.S revision 1.15 1 1.15 christos /* $NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $ */
2 1.1 mjl
3 1.1 mjl /*-
4 1.7 salo * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org>
5 1.1 mjl * All rights reserved.
6 1.1 mjl *
7 1.1 mjl * Redistribution and use in source and binary forms, with or without
8 1.1 mjl * modification, are permitted provided that the following conditions
9 1.1 mjl * are met:
10 1.1 mjl * 1. Redistributions of source code must retain the above copyright
11 1.1 mjl * notice, this list of conditions and the following disclaimer.
12 1.1 mjl * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 mjl * notice, this list of conditions and the following disclaimer in the
14 1.1 mjl * documentation and/or other materials provided with the distribution.
15 1.1 mjl * 3. The name of the author may not be used to endorse or promote products
16 1.1 mjl * derived from this software without specific prior written permission.
17 1.1 mjl *
18 1.1 mjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.1 mjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.1 mjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.1 mjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.1 mjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.1 mjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.1 mjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.1 mjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.1 mjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.1 mjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.1 mjl */
29 1.1 mjl /*----------------------------------------------------------------------*/
30 1.1 mjl
31 1.1 mjl #include <machine/asm.h>
32 1.8 matt
33 1.8 matt
34 1.8 matt #if defined(LIBC_SCCS) && !defined(lint)
35 1.15 christos __RCSID("$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $")
36 1.8 matt #endif /* LIBC_SCCS && !lint */
37 1.8 matt
38 1.13 matt #include "assym.h"
39 1.1 mjl
40 1.1 mjl #define USE_STSWX 0 /* don't. slower than trivial copy loop */
41 1.1 mjl
42 1.1 mjl /*----------------------------------------------------------------------*/
43 1.1 mjl /*
44 1.5 matt void bzero(void *b %r3, size_t len %r4);
45 1.5 matt void * memset(void *b %r3, int c %r4, size_t len %r5);
46 1.1 mjl */
47 1.1 mjl /*----------------------------------------------------------------------*/
48 1.1 mjl
49 1.5 matt #define r_dst %r3
50 1.5 matt #define r_len %r4
51 1.5 matt #define r_val %r0
52 1.1 mjl
53 1.1 mjl .text
54 1.1 mjl .align 4
55 1.1 mjl ENTRY(bzero)
56 1.1 mjl li r_val, 0 /* Value to stuff in */
57 1.15 christos cmplwi %cr1, %r4, 0 /* Zero length? */
58 1.15 christos beqlr- %cr1 /* Yes, do nothing */
59 1.1 mjl b cb_memset
60 1.8 matt END(bzero)
61 1.1 mjl
62 1.1 mjl ENTRY(memset)
63 1.11 matt cmplwi %cr1, %r5, 0
64 1.5 matt mr. %r0, %r4
65 1.5 matt mr %r8, %r3
66 1.11 matt beqlr- %cr1 /* Nothing to do */
67 1.1 mjl
68 1.5 matt rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
69 1.5 matt rlwimi %r0, %r0, 16, 0, 15
70 1.5 matt mr %r4, %r5
71 1.1 mjl bne- simple_fill /* =! 0, use trivial fill */
72 1.1 mjl cb_memset:
73 1.1 mjl
74 1.1 mjl /*----------------------------------------------------------------------*/
75 1.2 mjl #ifndef _KERNEL
76 1.14 joerg #ifdef __PIC__
77 1.13 matt /* First get cache line size */
78 1.10 matt mflr %r9
79 1.12 matt bcl 20,31,1f
80 1.13 matt 1: mflr %r10
81 1.10 matt mtlr %r9
82 1.13 matt addis %r10,%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@ha
83 1.13 matt lwz %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@l(%r10)
84 1.1 mjl #else
85 1.13 matt lis %r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@ha
86 1.13 matt lwz %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@l(%r10)
87 1.1 mjl #endif
88 1.13 matt cmplwi %cr1, %r9, 0 /* Unknown? */
89 1.13 matt beq- simple_fill /* a trivial fill routine */
90 1.2 mjl #else /* _KERNEL */
91 1.4 eeh #ifdef MULTIPROCESSOR
92 1.5 matt mfsprg %r10, 0 /* Get cpu_info pointer */
93 1.2 mjl #else
94 1.5 matt lis %r10, cpu_info_store@ha
95 1.5 matt addi %r10, %r10, cpu_info_store@l
96 1.2 mjl #endif
97 1.13 matt lwz %r9, CPU_CI+CACHE_INFO_DCACHE_LINE_SIZE(%r10) /* Load D$ line size */
98 1.13 matt #endif /* _KERNEL */
99 1.5 matt cntlzw %r10, %r9 /* Calculate shift.. */
100 1.5 matt li %r6, 31
101 1.5 matt subf %r10, %r10, %r6
102 1.1 mjl /* Back in memory filling business */
103 1.1 mjl
104 1.11 matt cmplwi %cr1, r_len, 0 /* Nothing to do? */
105 1.5 matt add %r5, %r9, %r9
106 1.5 matt cmplw r_len, %r5 /* <= 2*CL bytes to move? */
107 1.11 matt beqlr- %cr1 /* then do nothing */
108 1.1 mjl
109 1.1 mjl blt+ simple_fill /* a trivial fill routine */
110 1.1 mjl
111 1.1 mjl /* Word align the block, fill bytewise until dst even*/
112 1.1 mjl
113 1.5 matt andi. %r5, r_dst, 0x03
114 1.5 matt li %r6, 4
115 1.1 mjl beq+ cb_aligned_w /* already aligned to word? */
116 1.1 mjl
117 1.5 matt subf %r5, %r5, %r6 /* bytes to fill to align4 */
118 1.1 mjl #if USE_STSWX
119 1.5 matt mtxer %r5
120 1.5 matt stswx %r0, 0, r_dst
121 1.5 matt add r_dst, %r5, r_dst
122 1.1 mjl #else
123 1.5 matt mtctr %r5
124 1.1 mjl
125 1.1 mjl subi r_dst, r_dst, 1
126 1.1 mjl 1: stbu r_val, 1(r_dst) /* Fill bytewise */
127 1.1 mjl bdnz 1b
128 1.1 mjl
129 1.1 mjl addi r_dst, r_dst, 1
130 1.1 mjl #endif
131 1.5 matt subf r_len, %r5, r_len
132 1.1 mjl
133 1.1 mjl cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
134 1.1 mjl
135 1.1 mjl /* I know I have something to do since we had > 2*CL initially */
136 1.1 mjl /* so no need to check for r_len = 0 */
137 1.1 mjl
138 1.6 hannken subi %r6, %r9, 1 /* CL mask */
139 1.6 hannken and. %r5, r_dst, %r6
140 1.6 hannken srwi %r5, %r5, 2
141 1.5 matt srwi %r6, %r9, 2
142 1.1 mjl beq cb_aligned_cb /* already on CL boundary? */
143 1.1 mjl
144 1.5 matt subf %r5, %r5, %r6 /* words to fill to alignment */
145 1.5 matt mtctr %r5
146 1.5 matt slwi %r5, %r5, 2
147 1.5 matt subf r_len, %r5, r_len
148 1.1 mjl
149 1.1 mjl subi r_dst, r_dst, 4
150 1.1 mjl 1: stwu r_val, 4(r_dst) /* Fill wordwise */
151 1.1 mjl bdnz 1b
152 1.1 mjl addi r_dst, r_dst, 4
153 1.1 mjl
154 1.1 mjl cb_aligned_cb: /* no need to check r_len, see above */
155 1.1 mjl
156 1.5 matt srw. %r5, r_len, %r10 /* Number of cache blocks */
157 1.5 matt mtctr %r5
158 1.1 mjl beq cblocks_done
159 1.1 mjl
160 1.5 matt slw %r5, %r5, %r10
161 1.5 matt subf r_len, %r5, r_len
162 1.1 mjl
163 1.1 mjl 1: dcbz 0, r_dst /* Clear blockwise */
164 1.5 matt add r_dst, r_dst, %r9
165 1.1 mjl bdnz 1b
166 1.1 mjl
167 1.1 mjl cblocks_done: /* still CL aligned, but less than CL bytes left */
168 1.11 matt cmplwi %cr1, r_len, 0
169 1.1 mjl cmplwi r_len, 8
170 1.11 matt beq- %cr1, sf_return
171 1.1 mjl
172 1.1 mjl blt- sf_bytewise /* <8 remaining? */
173 1.1 mjl b sf_aligned_w
174 1.1 mjl
175 1.1 mjl /*----------------------------------------------------------------------*/
176 1.1 mjl wbzero: li r_val, 0
177 1.1 mjl
178 1.1 mjl cmplwi r_len, 0
179 1.1 mjl beqlr- /* Nothing to do */
180 1.1 mjl
181 1.1 mjl simple_fill:
182 1.1 mjl #if USE_STSWX
183 1.11 matt cmplwi %cr1, r_len, 12 /* < 12 bytes to move? */
184 1.1 mjl #else
185 1.11 matt cmplwi %cr1, r_len, 8 /* < 8 bytes to move? */
186 1.1 mjl #endif
187 1.5 matt andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
188 1.11 matt blt %cr1, sf_bytewise /* trivial byte mover */
189 1.1 mjl
190 1.5 matt li %r6, 4
191 1.5 matt subf %r5, %r5, %r6
192 1.1 mjl beq+ sf_aligned_w /* dest is word aligned */
193 1.1 mjl
194 1.1 mjl #if USE_STSWX
195 1.5 matt mtxer %r5
196 1.5 matt stswx %r0, 0, r_dst
197 1.5 matt add r_dst, %r5, r_dst
198 1.1 mjl #else
199 1.5 matt mtctr %r5 /* nope, then fill bytewise */
200 1.1 mjl subi r_dst, r_dst, 1 /* until it is */
201 1.1 mjl 1: stbu r_val, 1(r_dst)
202 1.1 mjl bdnz 1b
203 1.1 mjl
204 1.1 mjl addi r_dst, r_dst, 1
205 1.1 mjl #endif
206 1.5 matt subf r_len, %r5, r_len
207 1.1 mjl
208 1.1 mjl sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
209 1.1 mjl #if USE_STSWX
210 1.5 matt mr %r6, %r0
211 1.5 matt mr %r7, %r0
212 1.1 mjl
213 1.5 matt srwi %r5, r_len, 3
214 1.5 matt mtctr %r5
215 1.1 mjl
216 1.5 matt slwi %r5, %r5, 3 /* adjust len */
217 1.5 matt subf. r_len, %r5, r_len
218 1.1 mjl
219 1.5 matt 1: stswi %r6, r_dst, 8
220 1.1 mjl addi r_dst, r_dst, 8
221 1.1 mjl bdnz 1b
222 1.1 mjl #else
223 1.5 matt srwi %r5, r_len, 2 /* words to fill */
224 1.5 matt mtctr %r5
225 1.1 mjl
226 1.5 matt slwi %r5, %r5, 2
227 1.5 matt subf. r_len, %r5, r_len /* adjust len for fill */
228 1.1 mjl
229 1.1 mjl subi r_dst, r_dst, 4
230 1.1 mjl 1: stwu r_val, 4(r_dst)
231 1.1 mjl bdnz 1b
232 1.1 mjl addi r_dst, r_dst, 4
233 1.1 mjl #endif
234 1.1 mjl
235 1.1 mjl sf_word_done: bne- sf_bytewise
236 1.1 mjl
237 1.5 matt sf_return: mr %r3, %r8 /* restore orig ptr */
238 1.1 mjl blr /* for memset functionality */
239 1.1 mjl
240 1.1 mjl sf_bytewise:
241 1.1 mjl #if USE_STSWX
242 1.5 matt mr %r5, %r0
243 1.5 matt mr %r6, %r0
244 1.5 matt mr %r7, %r0
245 1.1 mjl
246 1.1 mjl mtxer r_len
247 1.5 matt stswx %r5, 0, r_dst
248 1.1 mjl #else
249 1.1 mjl mtctr r_len
250 1.1 mjl
251 1.1 mjl subi r_dst, r_dst, 1
252 1.1 mjl 1: stbu r_val, 1(r_dst)
253 1.1 mjl bdnz 1b
254 1.1 mjl #endif
255 1.5 matt mr %r3, %r8 /* restore orig ptr */
256 1.1 mjl blr /* for memset functionality */
257 1.8 matt END(memset)
258 1.1 mjl
259 1.1 mjl /*----------------------------------------------------------------------*/
260 1.3 mjl #ifndef _KERNEL
261 1.1 mjl .data
262 1.12 matt .p2align 2
263 1.4 eeh cache_info: .long -1, -1, -1, -1
264 1.1 mjl cache_sh: .long 0
265 1.1 mjl
266 1.3 mjl #endif
267 1.1 mjl /*----------------------------------------------------------------------*/
268