bcopy.S revision 1.2 1 /* $NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <mips/asm.h>
39 #ifndef _LOCORE
40 #define _LOCORE /* XXX not really, just assembly-code source */
41 #endif
42 #include <machine/endian.h>
43
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
47 ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $")
48 #endif /* LIBC_SCCS and not lint */
49
50 #ifdef __ABICALLS__
51 .abicalls
52 #endif
53
54 /*
55 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
56 *
57 * a0 src address
58 * a1 dst address
59 * a2 length
60 */
61
62 #if defined(MEMCOPY) || defined(MEMMOVE)
63 #ifdef MEMCOPY
64 #define FUNCTION memcpy
65 #else
66 #define FUNCTION memmove
67 #endif
68 #define SRCREG a1
69 #define DSTREG a0
70 #else
71 #define FUNCTION bcopy
72 #define SRCREG a0
73 #define DSTREG a1
74 #endif
75
76 #define SIZEREG a2
77
78 LEAF(FUNCTION)
79 .set noat
80 .set noreorder
81
82 #if defined(MEMCOPY) || defined(MEMMOVE)
83 /* set up return value, while we still can */
84 move v0,DSTREG
85 #endif
86 /*
87 * Make sure we can copy forwards.
88 */
89 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
90 bne t0,zero,6f # copy backwards
91
92 /*
93 * There are four alignment cases (with frequency)
94 * (Based on measurements taken with a DECstation 5000/200
95 * inside a Mach kernel.)
96 *
97 * aligned -> aligned (mostly)
98 * unaligned -> aligned (sometimes)
99 * aligned,unaligned -> unaligned (almost never)
100 *
101 * Note that we could add another case that checks if
102 * the destination and source are unaligned but the
103 * copy is alignable. eg if src and dest are both
104 * on a halfword boundary.
105 */
106 andi t1,DSTREG,3 # get last 3 bits of dest
107 bne t1,zero,3f
108 andi t0,SRCREG,3 # get last 3 bits of src
109 bne t0,zero,5f
110
111 /*
112 * Forward aligned->aligned copy, 8*4 bytes at a time.
113 */
114 li AT,-32
115 and t0,SIZEREG,AT # count truncated to multiple of 32 */
116 addu a3,SRCREG,t0 # run fast loop up to this address
117 sltu AT,SRCREG,a3 # any work to do?
118 beq AT,zero,2f
119 subu SIZEREG,t0
120
121 /*
122 * loop body
123 */
124 1: # cp
125 lw t3,0(SRCREG)
126 lw v1,4(SRCREG)
127 lw t0,8(SRCREG)
128 lw t1,12(SRCREG)
129 addu SRCREG,32
130 sw t3,0(DSTREG)
131 sw v1,4(DSTREG)
132 sw t0,8(DSTREG)
133 sw t1,12(DSTREG)
134 lw t1,-4(SRCREG)
135 lw t0,-8(SRCREG)
136 lw v1,-12(SRCREG)
137 lw t3,-16(SRCREG)
138 addu DSTREG,32
139 sw t1,-4(DSTREG)
140 sw t0,-8(DSTREG)
141 sw v1,-12(DSTREG)
142 bne SRCREG,a3,1b
143 sw t3,-16(DSTREG)
144
145 /*
146 * Copy a word at a time, no loop unrolling.
147 */
148 2: # wordcopy
149 andi t2,SIZEREG,3 # get byte count / 4
150 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
151 beq t2,zero,3f
152 addu t0,SRCREG,t2 # stop at t0
153 subu SIZEREG,SIZEREG,t2
154 1:
155 lw t3,0(SRCREG)
156 addu SRCREG,4
157 sw t3,0(DSTREG)
158 bne SRCREG,t0,1b
159 addu DSTREG,4
160
161 3: # bytecopy
162 beq SIZEREG,zero,4f # nothing left to do?
163 nop
164 1:
165 lb t3,0(SRCREG)
166 addu SRCREG,1
167 sb t3,0(DSTREG)
168 subu SIZEREG,1
169 bgtz SIZEREG,1b
170 addu DSTREG,1
171
172 4: # copydone
173 j ra
174 nop
175
176 /*
177 * Copy from unaligned source to aligned dest.
178 */
179 5: # destaligned
180 andi t0,SIZEREG,3 # t0 = bytecount mod 4
181 subu a3,SIZEREG,t0 # number of words to transfer
182 beq a3,zero,3b
183 nop
184 move SIZEREG,t0 # this many to do after we are done
185 addu a3,SRCREG,a3 # stop point
186
187 1:
188 LWHI t3,0(SRCREG)
189 LWLO t3,3(SRCREG)
190 addi SRCREG,4
191 sw t3,0(DSTREG)
192 bne SRCREG,a3,1b
193 addi DSTREG,4
194
195 j 3b
196 nop
197
198 6: # backcopy -- based on above
199 addu SRCREG,SIZEREG
200 addu DSTREG,SIZEREG
201 andi t1,DSTREG,3 # get last 3 bits of dest
202 bne t1,zero,3f
203 andi t0,SRCREG,3 # get last 3 bits of src
204 bne t0,zero,5f
205
206 /*
207 * Forward aligned->aligned copy, 8*4 bytes at a time.
208 */
209 li AT,-32
210 and t0,SIZEREG,AT # count truncated to multiple of 32
211 beq t0,zero,2f # any work to do?
212 subu SIZEREG,t0
213 subu a3,SRCREG,t0
214
215 /*
216 * loop body
217 */
218 1: # cp
219 lw t3,-16(SRCREG)
220 lw v1,-12(SRCREG)
221 lw t0,-8(SRCREG)
222 lw t1,-4(SRCREG)
223 subu SRCREG,32
224 sw t3,-16(DSTREG)
225 sw v1,-12(DSTREG)
226 sw t0,-8(DSTREG)
227 sw t1,-4(DSTREG)
228 lw t1,12(SRCREG)
229 lw t0,8(SRCREG)
230 lw v1,4(SRCREG)
231 lw t3,0(SRCREG)
232 subu DSTREG,32
233 sw t1,12(DSTREG)
234 sw t0,8(DSTREG)
235 sw v1,4(DSTREG)
236 bne SRCREG,a3,1b
237 sw t3,0(DSTREG)
238
239 /*
240 * Copy a word at a time, no loop unrolling.
241 */
242 2: # wordcopy
243 andi t2,SIZEREG,3 # get byte count / 4
244 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
245 beq t2,zero,3f
246 subu t0,SRCREG,t2 # stop at t0
247 subu SIZEREG,SIZEREG,t2
248 1:
249 lw t3,-4(SRCREG)
250 subu SRCREG,4
251 sw t3,-4(DSTREG)
252 bne SRCREG,t0,1b
253 subu DSTREG,4
254
255 3: # bytecopy
256 beq SIZEREG,zero,4f # nothing left to do?
257 nop
258 1:
259 lb t3,-1(SRCREG)
260 subu SRCREG,1
261 sb t3,-1(DSTREG)
262 subu SIZEREG,1
263 bgtz SIZEREG,1b
264 subu DSTREG,1
265
266 4: # copydone
267 j ra
268 nop
269
270 /*
271 * Copy from unaligned source to aligned dest.
272 */
273 5: # destaligned
274 andi t0,SIZEREG,3 # t0 = bytecount mod 4
275 subu a3,SIZEREG,t0 # number of words to transfer
276 beq a3,zero,3b
277 nop
278 move SIZEREG,t0 # this many to do after we are done
279 subu a3,SRCREG,a3 # stop point
280
281 1:
282 LWHI t3,-4(SRCREG)
283 LWLO t3,-1(SRCREG)
284 subu SRCREG,4
285 sw t3,-4(DSTREG)
286 bne SRCREG,a3,1b
287 subu DSTREG,4
288
289 j 3b
290 nop
291
292 .set reorder
293 .set at
294 END(FUNCTION)
295