memcpy.S revision 1.3 1 1.3 msaitoh /* $NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $ */
2 1.1 christos
3 1.2 uwe /*
4 1.3 msaitoh * Copyright (c) 2000 SHIMIZU Ryo
5 1.2 uwe * All rights reserved.
6 1.2 uwe *
7 1.2 uwe * Redistribution and use in source and binary forms, with or without
8 1.2 uwe * modification, are permitted provided that the following conditions
9 1.2 uwe * are met:
10 1.2 uwe * 1. Redistributions of source code must retain the above copyright
11 1.2 uwe * notice, this list of conditions and the following disclaimer.
12 1.2 uwe * 2. Redistributions in binary form must reproduce the above copyright
13 1.2 uwe * notice, this list of conditions and the following disclaimer in the
14 1.2 uwe * documentation and/or other materials provided with the distribution.
15 1.2 uwe * 3. The name of the author may not be used to endorse or promote products
16 1.2 uwe * derived from this software without specific prior written permission.
17 1.2 uwe *
18 1.2 uwe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 1.2 uwe * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 1.2 uwe * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 1.2 uwe * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 1.2 uwe * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 1.2 uwe * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 1.2 uwe * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 1.2 uwe * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 1.2 uwe * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 1.2 uwe * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 1.2 uwe */
29 1.2 uwe
30 1.2 uwe #include <machine/asm.h>
31 1.2 uwe
32 1.2 uwe #if defined(LIBC_SCCS) && !defined(lint)
33 1.3 msaitoh RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $")
34 1.2 uwe #endif
35 1.2 uwe
36 1.2 uwe #if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
37 1.1 christos #define MEMCOPY
38 1.2 uwe #endif
39 1.2 uwe
40 1.2 uwe #if defined(MEMCOPY) || defined(MEMMOVE)
41 1.2 uwe #define REG_DST0 r3
42 1.2 uwe #define REG_SRC r5
43 1.2 uwe #define REG_DST r4
44 1.2 uwe #else
45 1.2 uwe #define REG_SRC r4
46 1.2 uwe #define REG_DST r5
47 1.2 uwe #endif
48 1.2 uwe
49 1.2 uwe #define REG_LEN r6
50 1.2 uwe
51 1.2 uwe #if defined(MEMCOPY)
52 1.2 uwe ENTRY(memcpy)
53 1.2 uwe #elif defined(MEMMOVE)
54 1.2 uwe ENTRY(memmove)
55 1.2 uwe #elif defined(BCOPY)
56 1.2 uwe ENTRY(bcopy)
57 1.2 uwe #endif
58 1.2 uwe #ifdef REG_DST0
59 1.2 uwe mov REG_DST,REG_DST0
60 1.2 uwe #endif
61 1.2 uwe cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */
62 1.2 uwe bt/s bcopy_return
63 1.2 uwe cmp/hi REG_DST,REG_SRC
64 1.2 uwe bf/s bcopy_overlap
65 1.2 uwe
66 1.2 uwe mov REG_SRC,r0
67 1.2 uwe xor REG_DST,r0
68 1.2 uwe and #3,r0
69 1.2 uwe mov r0,r1
70 1.2 uwe tst r0,r0 /* (src ^ dst) & 3 */
71 1.2 uwe bf/s word_align
72 1.2 uwe
73 1.2 uwe longword_align:
74 1.2 uwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
75 1.2 uwe bt/s bcopy_return
76 1.2 uwe
77 1.2 uwe
78 1.2 uwe mov REG_SRC,r0
79 1.2 uwe tst #1,r0 /* if ( src & 1 ) */
80 1.2 uwe bt 1f
81 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
82 1.2 uwe add #-1,REG_LEN
83 1.2 uwe mov.b r0,@REG_DST
84 1.2 uwe add #1,REG_DST
85 1.2 uwe 1:
86 1.2 uwe
87 1.2 uwe
88 1.2 uwe mov #1,r0
89 1.2 uwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */
90 1.2 uwe bf/s 1f
91 1.2 uwe mov REG_SRC,r0
92 1.2 uwe tst #2,r0 /* (src & 2) { */
93 1.2 uwe bt 1f
94 1.2 uwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
95 1.2 uwe add #-2,REG_LEN /* len -= 2; */
96 1.2 uwe mov.w r0,@REG_DST
97 1.2 uwe add #2,REG_DST /* } */
98 1.2 uwe 1:
99 1.2 uwe
100 1.2 uwe
101 1.2 uwe mov #3,r1
102 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
103 1.2 uwe bf/s no_align_delay
104 1.2 uwe tst REG_LEN,REG_LEN
105 1.2 uwe 2:
106 1.2 uwe mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
107 1.2 uwe add #-4,REG_LEN /* len -= 4; */
108 1.2 uwe mov.l r0,@REG_DST
109 1.2 uwe cmp/hi r1,REG_LEN
110 1.2 uwe bt/s 2b
111 1.2 uwe add #4,REG_DST /* } */
112 1.2 uwe
113 1.2 uwe bra no_align_delay
114 1.2 uwe tst REG_LEN,REG_LEN
115 1.2 uwe
116 1.2 uwe
117 1.2 uwe word_align:
118 1.2 uwe mov r1,r0
119 1.2 uwe tst #1,r0
120 1.2 uwe bf/s no_align_delay
121 1.2 uwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
122 1.2 uwe bt bcopy_return
123 1.2 uwe
124 1.2 uwe
125 1.2 uwe mov REG_SRC,r0 /* if ( src & 1 ) */
126 1.2 uwe tst #1,r0
127 1.2 uwe bt 1f
128 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
129 1.2 uwe add #-1,REG_LEN
130 1.2 uwe mov.b r0,@REG_DST
131 1.2 uwe add #1,REG_DST
132 1.2 uwe 1:
133 1.2 uwe
134 1.2 uwe
135 1.2 uwe mov #1,r1
136 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
137 1.2 uwe bf/s no_align_delay
138 1.2 uwe tst REG_LEN,REG_LEN
139 1.2 uwe 2:
140 1.2 uwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
141 1.2 uwe add #-2,REG_LEN /* len -= 2; */
142 1.2 uwe mov.w r0,@REG_DST
143 1.2 uwe cmp/hi r1,REG_LEN
144 1.2 uwe bt/s 2b
145 1.2 uwe add #2,REG_DST /* } */
146 1.2 uwe
147 1.2 uwe
148 1.2 uwe no_align:
149 1.2 uwe tst REG_LEN,REG_LEN /* while ( len!= ) { */
150 1.2 uwe no_align_delay:
151 1.2 uwe bt bcopy_return
152 1.2 uwe 1:
153 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
154 1.2 uwe add #-1,REG_LEN /* len--; */
155 1.2 uwe mov.b r0,@REG_DST
156 1.2 uwe tst REG_LEN,REG_LEN
157 1.2 uwe bf/s 1b
158 1.2 uwe add #1,REG_DST /* } */
159 1.2 uwe bcopy_return:
160 1.2 uwe rts
161 1.2 uwe #ifdef REG_DST0
162 1.2 uwe mov REG_DST0,r0
163 1.2 uwe #else
164 1.2 uwe nop
165 1.2 uwe #endif
166 1.2 uwe
167 1.2 uwe
168 1.2 uwe bcopy_overlap:
169 1.2 uwe add REG_LEN,REG_SRC
170 1.2 uwe add REG_LEN,REG_DST
171 1.2 uwe
172 1.2 uwe mov REG_SRC,r0
173 1.2 uwe xor REG_DST,r0
174 1.2 uwe and #3,r0
175 1.2 uwe mov r0,r1
176 1.2 uwe tst r0,r0 /* (src ^ dst) & 3 */
177 1.2 uwe bf/s ov_word_align
178 1.2 uwe
179 1.2 uwe ov_longword_align:
180 1.2 uwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
181 1.2 uwe bt/s bcopy_return
182 1.2 uwe
183 1.2 uwe
184 1.2 uwe mov REG_SRC,r0
185 1.2 uwe tst #1,r0 /* if ( src & 1 ) */
186 1.2 uwe bt 1f
187 1.2 uwe add #-1,REG_SRC /* *--dst = *--src; */
188 1.2 uwe mov.b @REG_SRC,r0
189 1.2 uwe mov.b r0,@-REG_DST
190 1.2 uwe add #-1,REG_LEN
191 1.2 uwe 1:
192 1.2 uwe
193 1.2 uwe
194 1.2 uwe mov #1,r0
195 1.2 uwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */
196 1.2 uwe bf/s 1f
197 1.2 uwe mov REG_SRC,r0
198 1.2 uwe tst #2,r0 /* (src & 2) { */
199 1.2 uwe bt 1f
200 1.2 uwe add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
201 1.2 uwe mov.w @REG_SRC,r0
202 1.2 uwe add #-2,REG_LEN /* len -= 2; */
203 1.2 uwe mov.w r0,@-REG_DST /* } */
204 1.2 uwe 1:
205 1.2 uwe
206 1.2 uwe
207 1.2 uwe mov #3,r1
208 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
209 1.2 uwe bf/s ov_no_align_delay
210 1.2 uwe tst REG_LEN,REG_LEN
211 1.2 uwe 2:
212 1.2 uwe add #-4,REG_SRC
213 1.2 uwe mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
214 1.2 uwe add #-4,REG_LEN /* len -= 4; */
215 1.2 uwe cmp/hi r1,REG_LEN
216 1.2 uwe bt/s 2b
217 1.2 uwe mov.l r0,@-REG_DST /* } */
218 1.2 uwe
219 1.2 uwe bra ov_no_align_delay
220 1.2 uwe tst REG_LEN,REG_LEN
221 1.2 uwe
222 1.2 uwe
223 1.2 uwe ov_word_align:
224 1.2 uwe mov r1,r0
225 1.2 uwe tst #1,r0
226 1.2 uwe bf/s ov_no_align_delay
227 1.2 uwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
228 1.2 uwe bt bcopy_return
229 1.2 uwe
230 1.2 uwe
231 1.2 uwe mov REG_SRC,r0 /* if ( src & 1 ) */
232 1.2 uwe tst #1,r0
233 1.2 uwe bt 1f
234 1.2 uwe add #-1,REG_SRC
235 1.2 uwe mov.b @REG_SRC,r0 /* *--dst = *--src; */
236 1.2 uwe add #-1,REG_LEN
237 1.2 uwe mov.b r0,@-REG_DST
238 1.2 uwe 1:
239 1.2 uwe
240 1.2 uwe
241 1.2 uwe mov #1,r1
242 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
243 1.2 uwe bf/s ov_no_align_delay
244 1.2 uwe tst REG_LEN,REG_LEN
245 1.2 uwe 2:
246 1.2 uwe add #-2,REG_SRC
247 1.2 uwe mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
248 1.2 uwe add #-2,REG_LEN /* len -= 2; */
249 1.2 uwe cmp/hi r1,REG_LEN
250 1.2 uwe bt/s 2b
251 1.2 uwe mov.w r0,@-REG_DST /* } */
252 1.2 uwe
253 1.2 uwe
254 1.2 uwe ov_no_align:
255 1.2 uwe tst REG_LEN,REG_LEN /* while ( len!= ) { */
256 1.2 uwe ov_no_align_delay:
257 1.2 uwe bt 9f
258 1.2 uwe 1:
259 1.2 uwe add #-1,REG_SRC
260 1.2 uwe mov.b @REG_SRC,r0 /* *--dst = *--src; */
261 1.2 uwe add #-1,REG_LEN /* len--; */
262 1.2 uwe tst REG_LEN,REG_LEN
263 1.2 uwe bf/s 1b
264 1.2 uwe mov.b r0,@-REG_DST /* } */
265 1.2 uwe 9:
266 1.2 uwe rts
267 1.2 uwe #ifdef REG_DST0
268 1.2 uwe mov REG_DST0,r0
269 1.2 uwe #else
270 1.2 uwe nop
271 1.2 uwe #endif
272