memcpy.S revision 1.2.94.2 1 /* $NetBSD: memcpy.S,v 1.2.94.2 2020/04/21 19:37:46 martin Exp $ */
2
3 /*
4 * Copyright (c) 2000 SHIMIZU Ryo <ryo (at) misakimix.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <machine/asm.h>
31
32 #if defined(LIBC_SCCS) && !defined(lint)
33 RCSID("$NetBSD: memcpy.S,v 1.2.94.2 2020/04/21 19:37:46 martin Exp $")
34 #endif
35
36 #if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
37 #define MEMCOPY
38 #endif
39
40 #if defined(MEMCOPY) || defined(MEMMOVE)
41 #define REG_DST0 r3
42 #define REG_SRC r5
43 #define REG_DST r4
44 #else
45 #define REG_SRC r4
46 #define REG_DST r5
47 #endif
48
49 #define REG_LEN r6
50
51 #if defined(MEMCOPY)
52 ENTRY(memcpy)
53 #elif defined(MEMMOVE)
54 ENTRY(memmove)
55 #elif defined(BCOPY)
56 ENTRY(bcopy)
57 #endif
58 #ifdef REG_DST0
59 mov REG_DST,REG_DST0
60 #endif
61 cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */
62 bt/s bcopy_return
63 cmp/hi REG_DST,REG_SRC
64 bf/s bcopy_overlap
65
66 mov REG_SRC,r0
67 xor REG_DST,r0
68 and #3,r0
69 mov r0,r1
70 tst r0,r0 /* (src ^ dst) & 3 */
71 bf/s word_align
72
73 longword_align:
74 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
75 bt/s bcopy_return
76
77
78 mov REG_SRC,r0
79 tst #1,r0 /* if ( src & 1 ) */
80 bt 1f
81 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
82 add #-1,REG_LEN
83 mov.b r0,@REG_DST
84 add #1,REG_DST
85 1:
86
87
88 mov #1,r0
89 cmp/hi r0,REG_LEN /* if ( (len > 1) && */
90 bf/s 1f
91 mov REG_SRC,r0
92 tst #2,r0 /* (src & 2) { */
93 bt 1f
94 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
95 add #-2,REG_LEN /* len -= 2; */
96 mov.w r0,@REG_DST
97 add #2,REG_DST /* } */
98 1:
99
100
101 mov #3,r1
102 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
103 bf/s no_align_delay
104 tst REG_LEN,REG_LEN
105 2:
106 mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
107 add #-4,REG_LEN /* len -= 4; */
108 mov.l r0,@REG_DST
109 cmp/hi r1,REG_LEN
110 bt/s 2b
111 add #4,REG_DST /* } */
112
113 bra no_align_delay
114 tst REG_LEN,REG_LEN
115
116
117 word_align:
118 mov r1,r0
119 tst #1,r0
120 bf/s no_align_delay
121 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
122 bt bcopy_return
123
124
125 mov REG_SRC,r0 /* if ( src & 1 ) */
126 tst #1,r0
127 bt 1f
128 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
129 add #-1,REG_LEN
130 mov.b r0,@REG_DST
131 add #1,REG_DST
132 1:
133
134
135 mov #1,r1
136 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
137 bf/s no_align_delay
138 tst REG_LEN,REG_LEN
139 2:
140 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
141 add #-2,REG_LEN /* len -= 2; */
142 mov.w r0,@REG_DST
143 cmp/hi r1,REG_LEN
144 bt/s 2b
145 add #2,REG_DST /* } */
146
147
148 no_align:
149 tst REG_LEN,REG_LEN /* while ( len!= ) { */
150 no_align_delay:
151 bt bcopy_return
152 1:
153 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
154 add #-1,REG_LEN /* len--; */
155 mov.b r0,@REG_DST
156 tst REG_LEN,REG_LEN
157 bf/s 1b
158 add #1,REG_DST /* } */
159 bcopy_return:
160 rts
161 #ifdef REG_DST0
162 mov REG_DST0,r0
163 #else
164 nop
165 #endif
166
167
168 bcopy_overlap:
169 add REG_LEN,REG_SRC
170 add REG_LEN,REG_DST
171
172 mov REG_SRC,r0
173 xor REG_DST,r0
174 and #3,r0
175 mov r0,r1
176 tst r0,r0 /* (src ^ dst) & 3 */
177 bf/s ov_word_align
178
179 ov_longword_align:
180 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
181 bt/s bcopy_return
182
183
184 mov REG_SRC,r0
185 tst #1,r0 /* if ( src & 1 ) */
186 bt 1f
187 add #-1,REG_SRC /* *--dst = *--src; */
188 mov.b @REG_SRC,r0
189 mov.b r0,@-REG_DST
190 add #-1,REG_LEN
191 1:
192
193
194 mov #1,r0
195 cmp/hi r0,REG_LEN /* if ( (len > 1) && */
196 bf/s 1f
197 mov REG_SRC,r0
198 tst #2,r0 /* (src & 2) { */
199 bt 1f
200 add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
201 mov.w @REG_SRC,r0
202 add #-2,REG_LEN /* len -= 2; */
203 mov.w r0,@-REG_DST /* } */
204 1:
205
206
207 mov #3,r1
208 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
209 bf/s ov_no_align_delay
210 tst REG_LEN,REG_LEN
211 2:
212 add #-4,REG_SRC
213 mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
214 add #-4,REG_LEN /* len -= 4; */
215 cmp/hi r1,REG_LEN
216 bt/s 2b
217 mov.l r0,@-REG_DST /* } */
218
219 bra ov_no_align_delay
220 tst REG_LEN,REG_LEN
221
222
223 ov_word_align:
224 mov r1,r0
225 tst #1,r0
226 bf/s ov_no_align_delay
227 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
228 bt bcopy_return
229
230
231 mov REG_SRC,r0 /* if ( src & 1 ) */
232 tst #1,r0
233 bt 1f
234 add #-1,REG_SRC
235 mov.b @REG_SRC,r0 /* *--dst = *--src; */
236 add #-1,REG_LEN
237 mov.b r0,@-REG_DST
238 1:
239
240
241 mov #1,r1
242 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
243 bf/s ov_no_align_delay
244 tst REG_LEN,REG_LEN
245 2:
246 add #-2,REG_SRC
247 mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
248 add #-2,REG_LEN /* len -= 2; */
249 cmp/hi r1,REG_LEN
250 bt/s 2b
251 mov.w r0,@-REG_DST /* } */
252
253
254 ov_no_align:
255 tst REG_LEN,REG_LEN /* while ( len!= ) { */
256 ov_no_align_delay:
257 bt 9f
258 1:
259 add #-1,REG_SRC
260 mov.b @REG_SRC,r0 /* *--dst = *--src; */
261 add #-1,REG_LEN /* len--; */
262 tst REG_LEN,REG_LEN
263 bf/s 1b
264 mov.b r0,@-REG_DST /* } */
265 9:
266 rts
267 #ifdef REG_DST0
268 mov REG_DST0,r0
269 #else
270 nop
271 #endif
272