bcopy.S revision 1.2.40.3 1 /* $NetBSD: bcopy.S,v 1.2.40.3 2009/08/20 10:03:43 matt Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <mips/asm.h>
39 #ifndef _LOCORE
40 #define _LOCORE /* XXX not really, just assembly-code source */
41 #endif
42 #include <machine/endian.h>
43
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 /* RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") */
47 RCSID("$NetBSD: bcopy.S,v 1.2.40.3 2009/08/20 10:03:43 matt Exp $")
48 #endif /* LIBC_SCCS and not lint */
49
50 /*
51 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
52 *
53 * a0 src address
54 * a1 dst address
55 * a2 length
56 */
57
58 #if defined(MEMCOPY) || defined(MEMMOVE)
59 #ifdef MEMCOPY
60 #define FUNCTION memcpy
61 #else
62 #define FUNCTION memmove
63 #endif
64 #define SRCREG a1
65 #define DSTREG a0
66 #else
67 #define FUNCTION bcopy
68 #define SRCREG a0
69 #define DSTREG a1
70 #endif
71
72 #define SIZEREG a2
73
74 LEAF(FUNCTION)
75 .set noat
76 .set noreorder
77
78 #if defined(MEMCOPY) || defined(MEMMOVE)
79 /* set up return value, while we still can */
80 move v0,DSTREG
81 #endif
82 /*
83 * Make sure we can copy forwards.
84 */
85 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
86 bne t0,zero,6f # copy backwards
87
88 /*
89 * There are four alignment cases (with frequency)
90 * (Based on measurements taken with a DECstation 5000/200
91 * inside a Mach kernel.)
92 *
93 * aligned -> aligned (mostly)
94 * unaligned -> aligned (sometimes)
95 * aligned,unaligned -> unaligned (almost never)
96 *
97 * Note that we could add another case that checks if
98 * the destination and source are unaligned but the
99 * copy is alignable. eg if src and dest are both
100 * on a halfword boundary.
101 */
102 andi t1,DSTREG,(SZREG-1) # get last bits of dest
103 bne t1,zero,3f # dest unaligned
104 andi t0,SRCREG,(SZREG-1) # get last bits of src
105 bne t0,zero,5f
106
107 /*
108 * Forward aligned->aligned copy, 8 words at a time.
109 */
110 98:
111 li AT,-(SZREG*8)
112 and t0,SIZEREG,AT # count truncated to multiples
113 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
114 sltu AT,SRCREG,a3 # any work to do?
115 beq AT,zero,2f
116 PTR_SUBU SIZEREG,t0
117
118 /*
119 * loop body
120 */
121 1: # cp
122 REG_L t3,(0*SZREG)(SRCREG)
123 REG_L v1,(1*SZREG)(SRCREG)
124 REG_L t0,(2*SZREG)(SRCREG)
125 REG_L t1,(3*SZREG)(SRCREG)
126 PTR_ADDU SRCREG,SZREG*8
127 REG_S t3,(0*SZREG)(DSTREG)
128 REG_S v1,(1*SZREG)(DSTREG)
129 REG_S t0,(2*SZREG)(DSTREG)
130 REG_S t1,(3*SZREG)(DSTREG)
131 REG_L t1,(-1*SZREG)(SRCREG)
132 REG_L t0,(-2*SZREG)(SRCREG)
133 REG_L v1,(-3*SZREG)(SRCREG)
134 REG_L t3,(-4*SZREG)(SRCREG)
135 PTR_ADDU DSTREG,SZREG*8
136 REG_S t1,(-1*SZREG)(DSTREG)
137 REG_S t0,(-2*SZREG)(DSTREG)
138 REG_S v1,(-3*SZREG)(DSTREG)
139 bne SRCREG,a3,1b
140 REG_S t3,(-4*SZREG)(DSTREG)
141
142 /*
143 * Copy a word at a time, no loop unrolling.
144 */
145 2: # wordcopy
146 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
147 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
148 beq t2,zero,3f
149 PTR_ADDU t0,SRCREG,t2 # stop at t0
150 PTR_SUBU SIZEREG,SIZEREG,t2
151 1:
152 REG_L t3,0(SRCREG)
153 PTR_ADDU SRCREG,SZREG
154 REG_S t3,0(DSTREG)
155 bne SRCREG,t0,1b
156 PTR_ADDU DSTREG,SZREG
157
158 3: # bytecopy
159 beq SIZEREG,zero,4f # nothing left to do?
160 nop
161 1:
162 lb t3,0(SRCREG)
163 PTR_ADDU SRCREG,1
164 sb t3,0(DSTREG)
165 PTR_SUBU SIZEREG,1
166 bgtz SIZEREG,1b
167 PTR_ADDU DSTREG,1
168
169 4: # copydone
170 j ra
171 nop
172
173 /*
174 * Copy from unaligned source to aligned dest.
175 */
176 5: # destaligned
177 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
178 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
179 beq a3,zero,3b
180 nop
181 move SIZEREG,t0 # this many to do after we are done
182 PTR_ADDU a3,SRCREG,a3 # stop point
183
184 1:
185 REG_LHI t3,0(SRCREG)
186 REG_LLO t3,SZREG-1(SRCREG)
187 PTR_ADDI SRCREG,SZREG
188 REG_S t3,0(DSTREG)
189 bne SRCREG,a3,1b
190 PTR_ADDI DSTREG,SZREG
191
192 b 3b
193 nop
194
195 6: # backcopy -- based on above
196 PTR_ADDU SRCREG,SIZEREG
197 PTR_ADDU DSTREG,SIZEREG
198 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
199 bne t1,zero,3f
200 andi t0,SRCREG,SZREG-1 # get last 3 bits of src
201 bne t0,zero,5f
202
203 /*
204 * Forward aligned->aligned copy, 8*4 bytes at a time.
205 */
206 li AT,(-8*SZREG)
207 and t0,SIZEREG,AT # count truncated to multiple of 32
208 beq t0,zero,2f # any work to do?
209 PTR_SUBU SIZEREG,t0
210 PTR_SUBU a3,SRCREG,t0
211
212 /*
213 * loop body
214 */
215 1: # cp
216 REG_L t3,(-4*SZREG)(SRCREG)
217 REG_L v1,(-3*SZREG)(SRCREG)
218 REG_L t0,(-2*SZREG)(SRCREG)
219 REG_L t1,(-1*SZREG)(SRCREG)
220 PTR_SUBU SRCREG,8*SZREG
221 REG_S t3,(-4*SZREG)(DSTREG)
222 REG_S v1,(-3*SZREG)(DSTREG)
223 REG_S t0,(-2*SZREG)(DSTREG)
224 REG_S t1,(-1*SZREG)(DSTREG)
225 REG_L t1,(3*SZREG)(SRCREG)
226 REG_L t0,(2*SZREG)(SRCREG)
227 REG_L v1,(1*SZREG)(SRCREG)
228 REG_L t3,(0*SZREG)(SRCREG)
229 PTR_SUBU DSTREG,8*SZREG
230 REG_S t1,(3*SZREG)(DSTREG)
231 REG_S t0,(2*SZREG)(DSTREG)
232 REG_S v1,(1*SZREG)(DSTREG)
233 bne SRCREG,a3,1b
234 REG_S t3,(0*SZREG)(DSTREG)
235
236 /*
237 * Copy a word at a time, no loop unrolling.
238 */
239 2: # wordcopy
240 andi t2,SIZEREG,SZREG-1 # get byte count / 4
241 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
242 beq t2,zero,3f
243 PTR_SUBU t0,SRCREG,t2 # stop at t0
244 PTR_SUBU SIZEREG,SIZEREG,t2
245 1:
246 REG_L t3,-SZREG(SRCREG)
247 PTR_SUBU SRCREG,SZREG
248 REG_S t3,-SZREG(DSTREG)
249 bne SRCREG,t0,1b
250 PTR_SUBU DSTREG,SZREG
251
252 3: # bytecopy
253 beq SIZEREG,zero,4f # nothing left to do?
254 nop
255 1:
256 lb t3,-1(SRCREG)
257 PTR_SUBU SRCREG,1
258 sb t3,-1(DSTREG)
259 PTR_SUBU SIZEREG,1
260 bgtz SIZEREG,1b
261 PTR_SUBU DSTREG,1
262
263 4: # copydone
264 j ra
265 nop
266
267 /*
268 * Copy from unaligned source to aligned dest.
269 */
270 5: # destaligned
271 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
272 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
273 beq a3,zero,3b
274 nop
275 move SIZEREG,t0 # this many to do after we are done
276 PTR_SUBU a3,SRCREG,a3 # stop point
277
278 1:
279 REG_LHI t3,-SZREG(SRCREG)
280 REG_LLO t3,-1(SRCREG)
281 PTR_SUBU SRCREG,SZREG
282 REG_S t3,-SZREG(DSTREG)
283 bne SRCREG,a3,1b
284 PTR_SUBU DSTREG,SZREG
285
286 b 3b
287 nop
288
289 .set reorder
290 .set at
291 END(FUNCTION)
292