bcopy.S revision 1.1 1 1.1 christos /* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1995 Carnegie-Mellon University.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * Author: Trevor Blackwell. Support for use as memcpy() and memmove()
8 1.1 christos * added by Chris Demetriou.
9 1.1 christos *
10 1.1 christos * Permission to use, copy, modify and distribute this software and
11 1.1 christos * its documentation is hereby granted, provided that both the copyright
12 1.1 christos * notice and this permission notice appear in all copies of the
13 1.1 christos * software, derivative works or modified versions, and any portions
14 1.1 christos * thereof, and that both notices appear in supporting documentation.
15 1.1 christos *
16 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 1.1 christos * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 1.1 christos *
20 1.1 christos * Carnegie Mellon requests users of this software to return to
21 1.1 christos *
22 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
23 1.1 christos * School of Computer Science
24 1.1 christos * Carnegie Mellon University
25 1.1 christos * Pittsburgh PA 15213-3890
26 1.1 christos *
27 1.1 christos * any improvements or extensions that they make and grant Carnegie the
28 1.1 christos * rights to redistribute these changes.
29 1.1 christos */
30 1.1 christos
31 1.1 christos #include <machine/asm.h>
32 1.1 christos
33 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
34 1.1 christos #ifdef MEMCOPY
35 1.1 christos #define FUNCTION memcpy
36 1.1 christos #else
37 1.1 christos #define FUNCTION memmove
38 1.1 christos #endif
39 1.1 christos #define SRCREG a1
40 1.1 christos #define DSTREG a0
41 1.1 christos #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
42 1.1 christos #define FUNCTION bcopy
43 1.1 christos #define SRCREG a0
44 1.1 christos #define DSTREG a1
45 1.1 christos #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
46 1.1 christos
47 1.1 christos #define SIZEREG a2
48 1.1 christos
49 1.1 christos /*
50 1.1 christos * Copy bytes.
51 1.1 christos *
52 1.1 christos * void bcopy(char *from, char *to, size_t len);
53 1.1 christos * char *memcpy(void *to, const void *from, size_t len);
54 1.1 christos * char *memmove(void *to, const void *from, size_t len);
55 1.1 christos *
56 1.1 christos * No matter how invoked, the source and destination registers
57 1.1 christos * for calculation. There's no point in copying them to "working"
58 1.1 christos * registers, since the code uses their values "in place," and
59 1.1 christos * copying them would be slower.
60 1.1 christos */
61 1.1 christos
62 1.1 christos LEAF(FUNCTION,3)
63 1.1 christos
64 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
65 1.1 christos /* set up return value, while we still can */
66 1.1 christos mov DSTREG,v0
67 1.1 christos #endif
68 1.1 christos
69 1.1 christos /* Check for negative length */
70 1.1 christos ble SIZEREG,bcopy_done
71 1.1 christos
72 1.1 christos /* Check for overlap */
73 1.1 christos subq DSTREG,SRCREG,t5
74 1.1 christos cmpult t5,SIZEREG,t5
75 1.1 christos bne t5,bcopy_overlap
76 1.1 christos
77 1.1 christos /* a3 = end address */
78 1.1 christos addq SRCREG,SIZEREG,a3
79 1.1 christos
80 1.1 christos /* Get the first word */
81 1.1 christos ldq_u t2,0(SRCREG)
82 1.1 christos
83 1.1 christos /* Do they have the same alignment? */
84 1.1 christos xor SRCREG,DSTREG,t0
85 1.1 christos and t0,7,t0
86 1.1 christos and DSTREG,7,t1
87 1.1 christos bne t0,bcopy_different_alignment
88 1.1 christos
89 1.1 christos /* src & dst have same alignment */
90 1.1 christos beq t1,bcopy_all_aligned
91 1.1 christos
92 1.1 christos ldq_u t3,0(DSTREG)
93 1.1 christos addq SIZEREG,t1,SIZEREG
94 1.1 christos mskqh t2,SRCREG,t2
95 1.1 christos mskql t3,SRCREG,t3
96 1.1 christos or t2,t3,t2
97 1.1 christos
98 1.1 christos /* Dst is 8-byte aligned */
99 1.1 christos
100 1.1 christos bcopy_all_aligned:
101 1.1 christos /* If less than 8 bytes,skip loop */
102 1.1 christos subq SIZEREG,1,t0
103 1.1 christos and SIZEREG,7,SIZEREG
104 1.1 christos bic t0,7,t0
105 1.1 christos beq t0,bcopy_samealign_lp_end
106 1.1 christos
107 1.1 christos bcopy_samealign_lp:
108 1.1 christos stq_u t2,0(DSTREG)
109 1.1 christos addq DSTREG,8,DSTREG
110 1.1 christos ldq_u t2,8(SRCREG)
111 1.1 christos subq t0,8,t0
112 1.1 christos addq SRCREG,8,SRCREG
113 1.1 christos bne t0,bcopy_samealign_lp
114 1.1 christos
115 1.1 christos bcopy_samealign_lp_end:
116 1.1 christos /* If we're done, exit */
117 1.1 christos bne SIZEREG,bcopy_small_left
118 1.1 christos stq_u t2,0(DSTREG)
119 1.1 christos RET
120 1.1 christos
121 1.1 christos bcopy_small_left:
122 1.1 christos mskql t2,SIZEREG,t4
123 1.1 christos ldq_u t3,0(DSTREG)
124 1.1 christos mskqh t3,SIZEREG,t3
125 1.1 christos or t4,t3,t4
126 1.1 christos stq_u t4,0(DSTREG)
127 1.1 christos RET
128 1.1 christos
129 1.1 christos bcopy_different_alignment:
130 1.1 christos /*
131 1.1 christos * this is the fun part
132 1.1 christos */
133 1.1 christos addq SRCREG,SIZEREG,a3
134 1.1 christos cmpule SIZEREG,8,t0
135 1.1 christos bne t0,bcopy_da_finish
136 1.1 christos
137 1.1 christos beq t1,bcopy_da_noentry
138 1.1 christos
139 1.1 christos /* Do the initial partial word */
140 1.1 christos subq zero,DSTREG,t0
141 1.1 christos and t0,7,t0
142 1.1 christos ldq_u t3,7(SRCREG)
143 1.1 christos extql t2,SRCREG,t2
144 1.1 christos extqh t3,SRCREG,t3
145 1.1 christos or t2,t3,t5
146 1.1 christos insql t5,DSTREG,t5
147 1.1 christos ldq_u t6,0(DSTREG)
148 1.1 christos mskql t6,DSTREG,t6
149 1.1 christos or t5,t6,t5
150 1.1 christos stq_u t5,0(DSTREG)
151 1.1 christos addq SRCREG,t0,SRCREG
152 1.1 christos addq DSTREG,t0,DSTREG
153 1.1 christos subq SIZEREG,t0,SIZEREG
154 1.1 christos ldq_u t2,0(SRCREG)
155 1.1 christos
156 1.1 christos bcopy_da_noentry:
157 1.1 christos subq SIZEREG,1,t0
158 1.1 christos bic t0,7,t0
159 1.1 christos and SIZEREG,7,SIZEREG
160 1.1 christos beq t0,bcopy_da_finish2
161 1.1 christos
162 1.1 christos bcopy_da_lp:
163 1.1 christos ldq_u t3,7(SRCREG)
164 1.1 christos addq SRCREG,8,SRCREG
165 1.1 christos extql t2,SRCREG,t4
166 1.1 christos extqh t3,SRCREG,t5
167 1.1 christos subq t0,8,t0
168 1.1 christos or t4,t5,t5
169 1.1 christos stq t5,0(DSTREG)
170 1.1 christos addq DSTREG,8,DSTREG
171 1.1 christos beq t0,bcopy_da_finish1
172 1.1 christos ldq_u t2,7(SRCREG)
173 1.1 christos addq SRCREG,8,SRCREG
174 1.1 christos extql t3,SRCREG,t4
175 1.1 christos extqh t2,SRCREG,t5
176 1.1 christos subq t0,8,t0
177 1.1 christos or t4,t5,t5
178 1.1 christos stq t5,0(DSTREG)
179 1.1 christos addq DSTREG,8,DSTREG
180 1.1 christos bne t0,bcopy_da_lp
181 1.1 christos
182 1.1 christos bcopy_da_finish2:
183 1.1 christos /* Do the last new word */
184 1.1 christos mov t2,t3
185 1.1 christos
186 1.1 christos bcopy_da_finish1:
187 1.1 christos /* Do the last partial word */
188 1.1 christos ldq_u t2,-1(a3)
189 1.1 christos extql t3,SRCREG,t3
190 1.1 christos extqh t2,SRCREG,t2
191 1.1 christos or t2,t3,t2
192 1.1 christos br zero,bcopy_samealign_lp_end
193 1.1 christos
194 1.1 christos bcopy_da_finish:
195 1.1 christos /* Do the last word in the next source word */
196 1.1 christos ldq_u t3,-1(a3)
197 1.1 christos extql t2,SRCREG,t2
198 1.1 christos extqh t3,SRCREG,t3
199 1.1 christos or t2,t3,t2
200 1.1 christos insqh t2,DSTREG,t3
201 1.1 christos insql t2,DSTREG,t2
202 1.1 christos lda t4,-1(zero)
203 1.1 christos mskql t4,SIZEREG,t5
204 1.1 christos cmovne t5,t5,t4
205 1.1 christos insqh t4,DSTREG,t5
206 1.1 christos insql t4,DSTREG,t4
207 1.1 christos addq DSTREG,SIZEREG,a4
208 1.1 christos ldq_u t6,0(DSTREG)
209 1.1 christos ldq_u t7,-1(a4)
210 1.1 christos bic t6,t4,t6
211 1.1 christos bic t7,t5,t7
212 1.1 christos and t2,t4,t2
213 1.1 christos and t3,t5,t3
214 1.1 christos or t2,t6,t2
215 1.1 christos or t3,t7,t3
216 1.1 christos stq_u t3,-1(a4)
217 1.1 christos stq_u t2,0(DSTREG)
218 1.1 christos RET
219 1.1 christos
220 1.1 christos bcopy_overlap:
221 1.1 christos /*
222 1.1 christos * Basically equivalent to previous case, only backwards.
223 1.1 christos * Not quite as highly optimized
224 1.1 christos */
225 1.1 christos addq SRCREG,SIZEREG,a3
226 1.1 christos addq DSTREG,SIZEREG,a4
227 1.1 christos
228 1.1 christos /* less than 8 bytes - don't worry about overlap */
229 1.1 christos cmpule SIZEREG,8,t0
230 1.1 christos bne t0,bcopy_ov_short
231 1.1 christos
232 1.1 christos /* Possibly do a partial first word */
233 1.1 christos and a4,7,t4
234 1.1 christos beq t4,bcopy_ov_nostart2
235 1.1 christos subq a3,t4,a3
236 1.1 christos subq a4,t4,a4
237 1.1 christos ldq_u t1,0(a3)
238 1.1 christos subq SIZEREG,t4,SIZEREG
239 1.1 christos ldq_u t2,7(a3)
240 1.1 christos ldq t3,0(a4)
241 1.1 christos extql t1,a3,t1
242 1.1 christos extqh t2,a3,t2
243 1.1 christos or t1,t2,t1
244 1.1 christos mskqh t3,t4,t3
245 1.1 christos mskql t1,t4,t1
246 1.1 christos or t1,t3,t1
247 1.1 christos stq t1,0(a4)
248 1.1 christos
249 1.1 christos bcopy_ov_nostart2:
250 1.1 christos bic SIZEREG,7,t4
251 1.1 christos and SIZEREG,7,SIZEREG
252 1.1 christos beq t4,bcopy_ov_lp_end
253 1.1 christos
254 1.1 christos bcopy_ov_lp:
255 1.1 christos /* This could be more pipelined, but it doesn't seem worth it */
256 1.1 christos ldq_u t0,-8(a3)
257 1.1 christos subq a4,8,a4
258 1.1 christos ldq_u t1,-1(a3)
259 1.1 christos subq a3,8,a3
260 1.1 christos extql t0,a3,t0
261 1.1 christos extqh t1,a3,t1
262 1.1 christos subq t4,8,t4
263 1.1 christos or t0,t1,t0
264 1.1 christos stq t0,0(a4)
265 1.1 christos bne t4,bcopy_ov_lp
266 1.1 christos
267 1.1 christos bcopy_ov_lp_end:
268 1.1 christos beq SIZEREG,bcopy_done
269 1.1 christos
270 1.1 christos ldq_u t0,0(SRCREG)
271 1.1 christos ldq_u t1,7(SRCREG)
272 1.1 christos ldq_u t2,0(DSTREG)
273 1.1 christos extql t0,SRCREG,t0
274 1.1 christos extqh t1,SRCREG,t1
275 1.1 christos or t0,t1,t0
276 1.1 christos insql t0,DSTREG,t0
277 1.1 christos mskql t2,DSTREG,t2
278 1.1 christos or t2,t0,t2
279 1.1 christos stq_u t2,0(DSTREG)
280 1.1 christos
281 1.1 christos bcopy_done:
282 1.1 christos RET
283 1.1 christos
284 1.1 christos bcopy_ov_short:
285 1.1 christos ldq_u t2,0(SRCREG)
286 1.1 christos br zero,bcopy_da_finish
287 1.1 christos
288 1.1 christos END(FUNCTION)
289