bcopy.S revision 1.1 1 /* $NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <mips/asm.h>
39 #define _LOCORE /* XXX not really, just assembly-code source */
40 #include <machine/endian.h>
41
42
43 #if defined(LIBC_SCCS) && !defined(lint)
44 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
45 ASMSTR("$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $")
46 #endif /* LIBC_SCCS and not lint */
47
48 #ifdef __ABICALLS__
49 .abicalls
50 #endif
51
52 /*
53 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
54 *
55 * a0 src address
56 * a1 dst address
57 * a2 length
58 */
59
60 #if defined(MEMCOPY) || defined(MEMMOVE)
61 #ifdef MEMCOPY
62 #define FUNCTION memcpy
63 #else
64 #define FUNCTION memmove
65 #endif
66 #define SRCREG a1
67 #define DSTREG a0
68 #else
69 #define FUNCTION bcopy
70 #define SRCREG a0
71 #define DSTREG a1
72 #endif
73
74 #define SIZEREG a2
75
76 LEAF(FUNCTION)
77 .set noat
78 .set noreorder
79
80 #if defined(MEMCOPY) || defined(MEMMOVE)
81 /* set up return value, while we still can */
82 move v0,DSTREG
83 #endif
84 /*
85 * Make sure we can copy forwards.
86 */
87 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
88 bne t0,zero,6f # copy backwards
89
90 /*
91 * There are four alignment cases (with frequency)
92 * (Based on measurements taken with a DECstation 5000/200
93 * inside a Mach kernel.)
94 *
95 * aligned -> aligned (mostly)
96 * unaligned -> aligned (sometimes)
97 * aligned,unaligned -> unaligned (almost never)
98 *
99 * Note that we could add another case that checks if
100 * the destination and source are unaligned but the
101 * copy is alignable. eg if src and dest are both
102 * on a halfword boundary.
103 */
104 andi t1,DSTREG,3 # get last 3 bits of dest
105 bne t1,zero,3f
106 andi t0,SRCREG,3 # get last 3 bits of src
107 bne t0,zero,5f
108
109 /*
110 * Forward aligned->aligned copy, 8*4 bytes at a time.
111 */
112 li AT,-32
113 and t0,SIZEREG,AT # count truncated to multiple of 32 */
114 addu a3,SRCREG,t0 # run fast loop up to this address
115 sltu AT,SRCREG,a3 # any work to do?
116 beq AT,zero,2f
117 subu SIZEREG,t0
118
119 /*
120 * loop body
121 */
122 1: # cp
123 lw t3,0(SRCREG)
124 lw v1,4(SRCREG)
125 lw t0,8(SRCREG)
126 lw t1,12(SRCREG)
127 addu SRCREG,32
128 sw t3,0(DSTREG)
129 sw v1,4(DSTREG)
130 sw t0,8(DSTREG)
131 sw t1,12(DSTREG)
132 lw t1,-4(SRCREG)
133 lw t0,-8(SRCREG)
134 lw v1,-12(SRCREG)
135 lw t3,-16(SRCREG)
136 addu DSTREG,32
137 sw t1,-4(DSTREG)
138 sw t0,-8(DSTREG)
139 sw v1,-12(DSTREG)
140 bne SRCREG,a3,1b
141 sw t3,-16(DSTREG)
142
143 /*
144 * Copy a word at a time, no loop unrolling.
145 */
146 2: # wordcopy
147 andi t2,SIZEREG,3 # get byte count / 4
148 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
149 beq t2,zero,3f
150 addu t0,SRCREG,t2 # stop at t0
151 subu SIZEREG,SIZEREG,t2
152 1:
153 lw t3,0(SRCREG)
154 addu SRCREG,4
155 sw t3,0(DSTREG)
156 bne SRCREG,t0,1b
157 addu DSTREG,4
158
159 3: # bytecopy
160 beq SIZEREG,zero,4f # nothing left to do?
161 nop
162 1:
163 lb t3,0(SRCREG)
164 addu SRCREG,1
165 sb t3,0(DSTREG)
166 subu SIZEREG,1
167 bgtz SIZEREG,1b
168 addu DSTREG,1
169
170 4: # copydone
171 j ra
172 nop
173
174 /*
175 * Copy from unaligned source to aligned dest.
176 */
177 5: # destaligned
178 andi t0,SIZEREG,3 # t0 = bytecount mod 4
179 subu a3,SIZEREG,t0 # number of words to transfer
180 beq a3,zero,3b
181 nop
182 move SIZEREG,t0 # this many to do after we are done
183 addu a3,SRCREG,a3 # stop point
184
185 1:
186 LWHI t3,0(SRCREG)
187 LWLO t3,3(SRCREG)
188 addi SRCREG,4
189 sw t3,0(DSTREG)
190 bne SRCREG,a3,1b
191 addi DSTREG,4
192
193 j 3b
194 nop
195
196 6: # backcopy -- based on above
197 addu SRCREG,SIZEREG
198 addu DSTREG,SIZEREG
199 andi t1,DSTREG,3 # get last 3 bits of dest
200 bne t1,zero,3f
201 andi t0,SRCREG,3 # get last 3 bits of src
202 bne t0,zero,5f
203
204 /*
205 * Forward aligned->aligned copy, 8*4 bytes at a time.
206 */
207 li AT,-32
208 and t0,SIZEREG,AT # count truncated to multiple of 32
209 beq t0,zero,2f # any work to do?
210 subu SIZEREG,t0
211 subu a3,SRCREG,t0
212
213 /*
214 * loop body
215 */
216 1: # cp
217 lw t3,-16(SRCREG)
218 lw v1,-12(SRCREG)
219 lw t0,-8(SRCREG)
220 lw t1,-4(SRCREG)
221 subu SRCREG,32
222 sw t3,-16(DSTREG)
223 sw v1,-12(DSTREG)
224 sw t0,-8(DSTREG)
225 sw t1,-4(DSTREG)
226 lw t1,12(SRCREG)
227 lw t0,8(SRCREG)
228 lw v1,4(SRCREG)
229 lw t3,0(SRCREG)
230 subu DSTREG,32
231 sw t1,12(DSTREG)
232 sw t0,8(DSTREG)
233 sw v1,4(DSTREG)
234 bne SRCREG,a3,1b
235 sw t3,0(DSTREG)
236
237 /*
238 * Copy a word at a time, no loop unrolling.
239 */
240 2: # wordcopy
241 andi t2,SIZEREG,3 # get byte count / 4
242 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
243 beq t2,zero,3f
244 subu t0,SRCREG,t2 # stop at t0
245 subu SIZEREG,SIZEREG,t2
246 1:
247 lw t3,-4(SRCREG)
248 subu SRCREG,4
249 sw t3,-4(DSTREG)
250 bne SRCREG,t0,1b
251 subu DSTREG,4
252
253 3: # bytecopy
254 beq SIZEREG,zero,4f # nothing left to do?
255 nop
256 1:
257 lb t3,-1(SRCREG)
258 subu SRCREG,1
259 sb t3,-1(DSTREG)
260 subu SIZEREG,1
261 bgtz SIZEREG,1b
262 subu DSTREG,1
263
264 4: # copydone
265 j ra
266 nop
267
268 /*
269 * Copy from unaligned source to aligned dest.
270 */
271 5: # destaligned
272 andi t0,SIZEREG,3 # t0 = bytecount mod 4
273 subu a3,SIZEREG,t0 # number of words to transfer
274 beq a3,zero,3b
275 nop
276 move SIZEREG,t0 # this many to do after we are done
277 subu a3,SRCREG,a3 # stop point
278
279 1:
280 LWHI t3,-4(SRCREG)
281 LWLO t3,-1(SRCREG)
282 subu SRCREG,4
283 sw t3,-4(DSTREG)
284 bne SRCREG,a3,1b
285 subu DSTREG,4
286
287 j 3b
288 nop
289
290 .set reorder
291 .set at
292 END(FUNCTION)
293