memset.S revision 1.2 1 1.2 apb /* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
5 1.1 christos *
6 1.1 christos * Redistribution and use in source and binary forms, with or without
7 1.1 christos * modification, are permitted provided that the following conditions
8 1.1 christos * are met:
9 1.1 christos * 1. Redistributions of source code must retain the above copyright
10 1.1 christos * notice, this list of conditions and the following disclaimer.
11 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 christos * notice, this list of conditions and the following disclaimer in the
13 1.1 christos * documentation and/or other materials provided with the distribution.
14 1.1 christos * 3. The name of the author may not be used to endorse or promote products
15 1.1 christos * derived from this software without specific prior written permission.
16 1.1 christos *
17 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 1.1 christos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 1.1 christos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 1.1 christos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 1.1 christos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 1.1 christos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 1.1 christos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 1.1 christos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 1.1 christos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 1.1 christos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 1.1 christos */
28 1.1 christos
29 1.1 christos #include <machine/asm.h>
30 1.1 christos
31 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
32 1.2 apb RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $")
33 1.1 christos #endif
34 1.1 christos
35 1.1 christos #define REG_PTR r0
36 1.1 christos #define REG_TMP1 r1
37 1.1 christos
38 1.1 christos #ifdef BZERO
39 1.1 christos # define REG_C r2
40 1.1 christos # define REG_DST r4
41 1.1 christos # define REG_LEN r5
42 1.1 christos #else
43 1.1 christos # define REG_DST0 r3
44 1.1 christos # define REG_DST r4
45 1.1 christos # define REG_C r5
46 1.1 christos # define REG_LEN r6
47 1.1 christos #endif
48 1.1 christos
49 1.1 christos #ifdef BZERO
50 1.1 christos ENTRY(bzero)
51 1.1 christos #else
52 1.1 christos ENTRY(memset)
53 1.1 christos mov REG_DST,REG_DST0 /* for return value */
54 1.1 christos #endif
55 1.1 christos /* small amount to fill ? */
56 1.1 christos mov #28,REG_TMP1
57 1.1 christos cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
58 1.1 christos bt/s large
59 1.1 christos mov #12,REG_TMP1 /* if (len >= 12) goto small; */
60 1.1 christos cmp/hs REG_TMP1,REG_LEN
61 1.1 christos bt/s small
62 1.1 christos #ifdef BZERO
63 1.1 christos mov #0,REG_C
64 1.1 christos #endif
65 1.1 christos /* very little fill (0 ~ 11 bytes) */
66 1.1 christos tst REG_LEN,REG_LEN
67 1.1 christos add REG_DST,REG_LEN
68 1.1 christos bt/s done
69 1.1 christos add #1,REG_DST
70 1.1 christos
71 1.1 christos /* unroll 4 loops */
72 1.1 christos cmp/eq REG_DST,REG_LEN
73 1.1 christos 1: mov.b REG_C,@-REG_LEN
74 1.1 christos bt/s done
75 1.1 christos cmp/eq REG_DST,REG_LEN
76 1.1 christos mov.b REG_C,@-REG_LEN
77 1.1 christos bt/s done
78 1.1 christos cmp/eq REG_DST,REG_LEN
79 1.1 christos mov.b REG_C,@-REG_LEN
80 1.1 christos bt/s done
81 1.1 christos cmp/eq REG_DST,REG_LEN
82 1.1 christos mov.b REG_C,@-REG_LEN
83 1.1 christos bf/s 1b
84 1.1 christos cmp/eq REG_DST,REG_LEN
85 1.1 christos done:
86 1.1 christos #ifdef BZERO
87 1.1 christos rts
88 1.1 christos nop
89 1.1 christos #else
90 1.1 christos rts
91 1.1 christos mov REG_DST0,r0
92 1.1 christos #endif
93 1.1 christos
94 1.1 christos
95 1.1 christos small:
96 1.1 christos mov REG_DST,r0
97 1.1 christos tst #1,r0
98 1.1 christos bt/s small_aligned
99 1.1 christos mov REG_DST,REG_TMP1
100 1.1 christos shll REG_LEN
101 1.1 christos mova 1f,r0 /* 1f must be 4bytes aligned! */
102 1.1 christos add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
103 1.1 christos sub REG_LEN,r0
104 1.1 christos jmp @r0
105 1.1 christos mov REG_C,r0
106 1.1 christos
107 1.1 christos .align 2
108 1.1 christos mov.b r0,@(15,REG_TMP1)
109 1.1 christos mov.b r0,@(14,REG_TMP1)
110 1.1 christos mov.b r0,@(13,REG_TMP1)
111 1.1 christos mov.b r0,@(12,REG_TMP1)
112 1.1 christos mov.b r0,@(11,REG_TMP1)
113 1.1 christos mov.b r0,@(10,REG_TMP1)
114 1.1 christos mov.b r0,@(9,REG_TMP1)
115 1.1 christos mov.b r0,@(8,REG_TMP1)
116 1.1 christos mov.b r0,@(7,REG_TMP1)
117 1.1 christos mov.b r0,@(6,REG_TMP1)
118 1.1 christos mov.b r0,@(5,REG_TMP1)
119 1.1 christos mov.b r0,@(4,REG_TMP1)
120 1.1 christos mov.b r0,@(3,REG_TMP1)
121 1.1 christos mov.b r0,@(2,REG_TMP1)
122 1.1 christos mov.b r0,@(1,REG_TMP1)
123 1.1 christos mov.b r0,@REG_TMP1
124 1.1 christos mov.b r0,@(15,REG_DST)
125 1.1 christos mov.b r0,@(14,REG_DST)
126 1.1 christos mov.b r0,@(13,REG_DST)
127 1.1 christos mov.b r0,@(12,REG_DST)
128 1.1 christos mov.b r0,@(11,REG_DST)
129 1.1 christos mov.b r0,@(10,REG_DST)
130 1.1 christos mov.b r0,@(9,REG_DST)
131 1.1 christos mov.b r0,@(8,REG_DST)
132 1.1 christos mov.b r0,@(7,REG_DST)
133 1.1 christos mov.b r0,@(6,REG_DST)
134 1.1 christos mov.b r0,@(5,REG_DST)
135 1.1 christos mov.b r0,@(4,REG_DST)
136 1.1 christos mov.b r0,@(3,REG_DST)
137 1.1 christos mov.b r0,@(2,REG_DST)
138 1.1 christos mov.b r0,@(1,REG_DST)
139 1.1 christos #ifdef BZERO
140 1.1 christos rts
141 1.1 christos 1: mov.b r0,@REG_DST
142 1.1 christos #else
143 1.1 christos mov.b r0,@REG_DST
144 1.1 christos 1: rts
145 1.1 christos mov REG_DST0,r0
146 1.1 christos #endif
147 1.1 christos
148 1.1 christos
149 1.1 christos /* 2 bytes aligned small fill */
150 1.1 christos small_aligned:
151 1.1 christos #ifndef BZERO
152 1.1 christos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
153 1.1 christos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
154 1.1 christos or REG_TMP1,REG_C /* REG_C = ????xxxx */
155 1.1 christos #endif
156 1.1 christos
157 1.1 christos mov REG_LEN,r0
158 1.1 christos tst #1,r0 /* len is aligned? */
159 1.1 christos bt/s 1f
160 1.1 christos add #-1,r0
161 1.1 christos mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
162 1.1 christos mov r0,REG_LEN
163 1.1 christos 1:
164 1.1 christos
165 1.1 christos mova 1f,r0 /* 1f must be 4bytes aligned! */
166 1.1 christos sub REG_LEN,r0
167 1.1 christos jmp @r0
168 1.1 christos mov REG_C,r0
169 1.1 christos
170 1.1 christos .align 2
171 1.1 christos mov.w r0,@(30,REG_DST)
172 1.1 christos mov.w r0,@(28,REG_DST)
173 1.1 christos mov.w r0,@(26,REG_DST)
174 1.1 christos mov.w r0,@(24,REG_DST)
175 1.1 christos mov.w r0,@(22,REG_DST)
176 1.1 christos mov.w r0,@(20,REG_DST)
177 1.1 christos mov.w r0,@(18,REG_DST)
178 1.1 christos mov.w r0,@(16,REG_DST)
179 1.1 christos mov.w r0,@(14,REG_DST)
180 1.1 christos mov.w r0,@(12,REG_DST)
181 1.1 christos mov.w r0,@(10,REG_DST)
182 1.1 christos mov.w r0,@(8,REG_DST)
183 1.1 christos mov.w r0,@(6,REG_DST)
184 1.1 christos mov.w r0,@(4,REG_DST)
185 1.1 christos mov.w r0,@(2,REG_DST)
186 1.1 christos #ifdef BZERO
187 1.1 christos rts
188 1.1 christos 1: mov.w r0,@REG_DST
189 1.1 christos #else
190 1.1 christos mov.w r0,@REG_DST
191 1.1 christos 1: rts
192 1.1 christos mov REG_DST0,r0
193 1.1 christos #endif
194 1.1 christos
195 1.1 christos
196 1.1 christos
197 1.1 christos .align 2
198 1.1 christos large:
199 1.1 christos #ifdef BZERO
200 1.1 christos mov #0,REG_C
201 1.1 christos #else
202 1.1 christos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
203 1.1 christos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
204 1.1 christos or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
205 1.1 christos swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
206 1.1 christos xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
207 1.1 christos #endif
208 1.1 christos
209 1.1 christos mov #3,REG_TMP1
210 1.1 christos tst REG_TMP1,REG_DST
211 1.1 christos mov REG_DST,REG_PTR
212 1.1 christos bf/s unaligned_dst
213 1.1 christos add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
214 1.1 christos tst REG_TMP1,REG_LEN
215 1.1 christos bf/s unaligned_len
216 1.1 christos
217 1.1 christos aligned:
218 1.1 christos /* fill 32*n bytes */
219 1.1 christos mov #32,REG_TMP1
220 1.1 christos cmp/hi REG_LEN,REG_TMP1
221 1.1 christos bt 9f
222 1.1 christos .align 2
223 1.1 christos 1: sub REG_TMP1,REG_PTR
224 1.1 christos mov.l REG_C,@REG_PTR
225 1.1 christos sub REG_TMP1,REG_LEN
226 1.1 christos mov.l REG_C,@(4,REG_PTR)
227 1.1 christos cmp/hi REG_LEN,REG_TMP1
228 1.1 christos mov.l REG_C,@(8,REG_PTR)
229 1.1 christos mov.l REG_C,@(12,REG_PTR)
230 1.1 christos mov.l REG_C,@(16,REG_PTR)
231 1.1 christos mov.l REG_C,@(20,REG_PTR)
232 1.1 christos mov.l REG_C,@(24,REG_PTR)
233 1.1 christos bf/s 1b
234 1.1 christos mov.l REG_C,@(28,REG_PTR)
235 1.1 christos 9:
236 1.1 christos
237 1.1 christos /* fill left 4*n bytes */
238 1.1 christos cmp/eq REG_DST,REG_PTR
239 1.1 christos bt 9f
240 1.1 christos add #4,REG_DST
241 1.1 christos cmp/eq REG_DST,REG_PTR
242 1.1 christos 1: mov.l REG_C,@-REG_PTR
243 1.1 christos bt/s 9f
244 1.1 christos cmp/eq REG_DST,REG_PTR
245 1.1 christos mov.l REG_C,@-REG_PTR
246 1.1 christos bt/s 9f
247 1.1 christos cmp/eq REG_DST,REG_PTR
248 1.1 christos mov.l REG_C,@-REG_PTR
249 1.1 christos bt/s 9f
250 1.1 christos cmp/eq REG_DST,REG_PTR
251 1.1 christos mov.l REG_C,@-REG_PTR
252 1.1 christos bf/s 1b
253 1.1 christos cmp/eq REG_DST,REG_PTR
254 1.1 christos 9:
255 1.1 christos #ifdef BZERO
256 1.1 christos rts
257 1.1 christos nop
258 1.1 christos #else
259 1.1 christos rts
260 1.1 christos mov REG_DST0,r0
261 1.1 christos #endif
262 1.1 christos
263 1.1 christos
264 1.1 christos unaligned_dst:
265 1.1 christos mov #1,REG_TMP1
266 1.1 christos tst REG_TMP1,REG_DST /* if (dst & 1) { */
267 1.1 christos add #1,REG_TMP1
268 1.1 christos bt/s 2f
269 1.1 christos tst REG_TMP1,REG_DST
270 1.1 christos mov.b REG_C,@REG_DST /* *dst++ = c; */
271 1.1 christos add #1,REG_DST
272 1.1 christos tst REG_TMP1,REG_DST
273 1.1 christos 2: /* } */
274 1.1 christos /* if (dst & 2) { */
275 1.1 christos bt 4f
276 1.2 apb mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */
277 1.1 christos add #2,REG_DST
278 1.1 christos 4: /* } */
279 1.1 christos
280 1.1 christos
281 1.1 christos tst #3,REG_PTR /* if (ptr & 3) { */
282 1.1 christos bt/s 4f /* */
283 1.1 christos unaligned_len:
284 1.1 christos tst #1,REG_PTR /* if (ptr & 1) { */
285 1.1 christos bt/s 2f
286 1.1 christos tst #2,REG_PTR
287 1.1 christos mov.b REG_C,@-REG_PTR /* --ptr = c; */
288 1.1 christos 2: /* } */
289 1.1 christos /* if (ptr & 2) { */
290 1.1 christos bt 4f
291 1.2 apb mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */
292 1.1 christos 4: /* } */
293 1.1 christos /* } */
294 1.1 christos
295 1.1 christos mov REG_PTR,REG_LEN
296 1.1 christos bra aligned
297 1.1 christos sub REG_DST,REG_LEN
298 1.1 christos
299