memset.S revision 1.1 1 /* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <machine/asm.h>
30
31 #if defined(LIBC_SCCS) && !defined(lint)
32 RCSID("$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $")
33 #endif
34
35 #define REG_PTR r0
36 #define REG_TMP1 r1
37
38 #ifdef BZERO
39 # define REG_C r2
40 # define REG_DST r4
41 # define REG_LEN r5
42 #else
43 # define REG_DST0 r3
44 # define REG_DST r4
45 # define REG_C r5
46 # define REG_LEN r6
47 #endif
48
49 #ifdef BZERO
50 ENTRY(bzero)
51 #else
52 ENTRY(memset)
53 mov REG_DST,REG_DST0 /* for return value */
54 #endif
55 /* small amount to fill ? */
56 mov #28,REG_TMP1
57 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
58 bt/s large
59 mov #12,REG_TMP1 /* if (len >= 12) goto small; */
60 cmp/hs REG_TMP1,REG_LEN
61 bt/s small
62 #ifdef BZERO
63 mov #0,REG_C
64 #endif
65 /* very little fill (0 ~ 11 bytes) */
66 tst REG_LEN,REG_LEN
67 add REG_DST,REG_LEN
68 bt/s done
69 add #1,REG_DST
70
71 /* unroll 4 loops */
72 cmp/eq REG_DST,REG_LEN
73 1: mov.b REG_C,@-REG_LEN
74 bt/s done
75 cmp/eq REG_DST,REG_LEN
76 mov.b REG_C,@-REG_LEN
77 bt/s done
78 cmp/eq REG_DST,REG_LEN
79 mov.b REG_C,@-REG_LEN
80 bt/s done
81 cmp/eq REG_DST,REG_LEN
82 mov.b REG_C,@-REG_LEN
83 bf/s 1b
84 cmp/eq REG_DST,REG_LEN
85 done:
86 #ifdef BZERO
87 rts
88 nop
89 #else
90 rts
91 mov REG_DST0,r0
92 #endif
93
94
95 small:
96 mov REG_DST,r0
97 tst #1,r0
98 bt/s small_aligned
99 mov REG_DST,REG_TMP1
100 shll REG_LEN
101 mova 1f,r0 /* 1f must be 4bytes aligned! */
102 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
103 sub REG_LEN,r0
104 jmp @r0
105 mov REG_C,r0
106
107 .align 2
108 mov.b r0,@(15,REG_TMP1)
109 mov.b r0,@(14,REG_TMP1)
110 mov.b r0,@(13,REG_TMP1)
111 mov.b r0,@(12,REG_TMP1)
112 mov.b r0,@(11,REG_TMP1)
113 mov.b r0,@(10,REG_TMP1)
114 mov.b r0,@(9,REG_TMP1)
115 mov.b r0,@(8,REG_TMP1)
116 mov.b r0,@(7,REG_TMP1)
117 mov.b r0,@(6,REG_TMP1)
118 mov.b r0,@(5,REG_TMP1)
119 mov.b r0,@(4,REG_TMP1)
120 mov.b r0,@(3,REG_TMP1)
121 mov.b r0,@(2,REG_TMP1)
122 mov.b r0,@(1,REG_TMP1)
123 mov.b r0,@REG_TMP1
124 mov.b r0,@(15,REG_DST)
125 mov.b r0,@(14,REG_DST)
126 mov.b r0,@(13,REG_DST)
127 mov.b r0,@(12,REG_DST)
128 mov.b r0,@(11,REG_DST)
129 mov.b r0,@(10,REG_DST)
130 mov.b r0,@(9,REG_DST)
131 mov.b r0,@(8,REG_DST)
132 mov.b r0,@(7,REG_DST)
133 mov.b r0,@(6,REG_DST)
134 mov.b r0,@(5,REG_DST)
135 mov.b r0,@(4,REG_DST)
136 mov.b r0,@(3,REG_DST)
137 mov.b r0,@(2,REG_DST)
138 mov.b r0,@(1,REG_DST)
139 #ifdef BZERO
140 rts
141 1: mov.b r0,@REG_DST
142 #else
143 mov.b r0,@REG_DST
144 1: rts
145 mov REG_DST0,r0
146 #endif
147
148
149 /* 2 bytes aligned small fill */
150 small_aligned:
151 #ifndef BZERO
152 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
153 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
154 or REG_TMP1,REG_C /* REG_C = ????xxxx */
155 #endif
156
157 mov REG_LEN,r0
158 tst #1,r0 /* len is aligned? */
159 bt/s 1f
160 add #-1,r0
161 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
162 mov r0,REG_LEN
163 1:
164
165 mova 1f,r0 /* 1f must be 4bytes aligned! */
166 sub REG_LEN,r0
167 jmp @r0
168 mov REG_C,r0
169
170 .align 2
171 mov.w r0,@(30,REG_DST)
172 mov.w r0,@(28,REG_DST)
173 mov.w r0,@(26,REG_DST)
174 mov.w r0,@(24,REG_DST)
175 mov.w r0,@(22,REG_DST)
176 mov.w r0,@(20,REG_DST)
177 mov.w r0,@(18,REG_DST)
178 mov.w r0,@(16,REG_DST)
179 mov.w r0,@(14,REG_DST)
180 mov.w r0,@(12,REG_DST)
181 mov.w r0,@(10,REG_DST)
182 mov.w r0,@(8,REG_DST)
183 mov.w r0,@(6,REG_DST)
184 mov.w r0,@(4,REG_DST)
185 mov.w r0,@(2,REG_DST)
186 #ifdef BZERO
187 rts
188 1: mov.w r0,@REG_DST
189 #else
190 mov.w r0,@REG_DST
191 1: rts
192 mov REG_DST0,r0
193 #endif
194
195
196
197 .align 2
198 large:
199 #ifdef BZERO
200 mov #0,REG_C
201 #else
202 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
203 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
204 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
205 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
206 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
207 #endif
208
209 mov #3,REG_TMP1
210 tst REG_TMP1,REG_DST
211 mov REG_DST,REG_PTR
212 bf/s unaligned_dst
213 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
214 tst REG_TMP1,REG_LEN
215 bf/s unaligned_len
216
217 aligned:
218 /* fill 32*n bytes */
219 mov #32,REG_TMP1
220 cmp/hi REG_LEN,REG_TMP1
221 bt 9f
222 .align 2
223 1: sub REG_TMP1,REG_PTR
224 mov.l REG_C,@REG_PTR
225 sub REG_TMP1,REG_LEN
226 mov.l REG_C,@(4,REG_PTR)
227 cmp/hi REG_LEN,REG_TMP1
228 mov.l REG_C,@(8,REG_PTR)
229 mov.l REG_C,@(12,REG_PTR)
230 mov.l REG_C,@(16,REG_PTR)
231 mov.l REG_C,@(20,REG_PTR)
232 mov.l REG_C,@(24,REG_PTR)
233 bf/s 1b
234 mov.l REG_C,@(28,REG_PTR)
235 9:
236
237 /* fill left 4*n bytes */
238 cmp/eq REG_DST,REG_PTR
239 bt 9f
240 add #4,REG_DST
241 cmp/eq REG_DST,REG_PTR
242 1: mov.l REG_C,@-REG_PTR
243 bt/s 9f
244 cmp/eq REG_DST,REG_PTR
245 mov.l REG_C,@-REG_PTR
246 bt/s 9f
247 cmp/eq REG_DST,REG_PTR
248 mov.l REG_C,@-REG_PTR
249 bt/s 9f
250 cmp/eq REG_DST,REG_PTR
251 mov.l REG_C,@-REG_PTR
252 bf/s 1b
253 cmp/eq REG_DST,REG_PTR
254 9:
255 #ifdef BZERO
256 rts
257 nop
258 #else
259 rts
260 mov REG_DST0,r0
261 #endif
262
263
264 unaligned_dst:
265 mov #1,REG_TMP1
266 tst REG_TMP1,REG_DST /* if (dst & 1) { */
267 add #1,REG_TMP1
268 bt/s 2f
269 tst REG_TMP1,REG_DST
270 mov.b REG_C,@REG_DST /* *dst++ = c; */
271 add #1,REG_DST
272 tst REG_TMP1,REG_DST
273 2: /* } */
274 /* if (dst & 2) { */
275 bt 4f
276 mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
277 add #2,REG_DST
278 4: /* } */
279
280
281 tst #3,REG_PTR /* if (ptr & 3) { */
282 bt/s 4f /* */
283 unaligned_len:
284 tst #1,REG_PTR /* if (ptr & 1) { */
285 bt/s 2f
286 tst #2,REG_PTR
287 mov.b REG_C,@-REG_PTR /* --ptr = c; */
288 2: /* } */
289 /* if (ptr & 2) { */
290 bt 4f
291 mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
292 4: /* } */
293 /* } */
294
295 mov REG_PTR,REG_LEN
296 bra aligned
297 sub REG_DST,REG_LEN
298
299