bcopy.S revision 1.2.40.2 1 /* $NetBSD: bcopy.S,v 1.2.40.2 2009/08/19 06:56:13 matt Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <mips/asm.h>
39 #ifndef _LOCORE
40 #define _LOCORE /* XXX not really, just assembly-code source */
41 #endif
42 #include <machine/endian.h>
43
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 /* RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") */
47 RCSID("$NetBSD: bcopy.S,v 1.2.40.2 2009/08/19 06:56:13 matt Exp $")
48 #endif /* LIBC_SCCS and not lint */
49
50 /*
51 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
52 *
53 * a0 src address
54 * a1 dst address
55 * a2 length
56 */
57
58 #if defined(MEMCOPY) || defined(MEMMOVE)
59 #ifdef MEMCOPY
60 #define FUNCTION memcpy
61 #else
62 #define FUNCTION memmove
63 #endif
64 #define SRCREG a1
65 #define DSTREG a0
66 #else
67 #define FUNCTION bcopy
68 #define SRCREG a0
69 #define DSTREG a1
70 #endif
71
72 #define SIZEREG a2
73
74 LEAF(FUNCTION)
75 .set noat
76 .set noreorder
77
78 #if defined(MEMCOPY) || defined(MEMMOVE)
79 /* set up return value, while we still can */
80 move v0,DSTREG
81 #endif
82 /*
83 * Make sure we can copy forwards.
84 */
85 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
86 bne t0,zero,6f # copy backwards
87
88 /*
89 * There are four alignment cases (with frequency)
90 * (Based on measurements taken with a DECstation 5000/200
91 * inside a Mach kernel.)
92 *
93 * aligned -> aligned (mostly)
94 * unaligned -> aligned (sometimes)
95 * aligned,unaligned -> unaligned (almost never)
96 *
97 * Note that we could add another case that checks if
98 * the destination and source are unaligned but the
99 * copy is alignable. eg if src and dest are both
100 * on a halfword boundary.
101 */
102 #if 1
103 andi t1,DSTREG,(SZREG-1) # get last bits of dest
104 bne t1,zero,3f # dest unaligned
105 andi t0,SRCREG,(SZREG-1) # get last bits of src
106 bne t0,zero,5f
107 #else
108 andi t1,DSTREG,(SZREG-1) # get last bits of dest
109 andi t0,SRCREG,(SZREG-1) # get last bits of src
110 beq t1,t0,97f # aligned on non-word
111 nop;
112 bne t1,zero,3f # dest unaligned
113 nop
114 b 5f # source unaligned
115 nop
116
117 97:
118 sltiu t1,SIZEREG,SZREG
119 bne t1,zero,3f
120 nop
121 subu t2,zero,t2 # t2 = -t0
122 andi t2,t2,(SZREG-1) # t2 &= (SZREG-1)
123 # t0 + t2 == SZREG
124 PTR_SUBU SIZEREG,SIZEREG,t2 # retreat to word boundary
125 PTR_ADDU DSTREG,DSTREG,t2 # advance to word boundary
126 PTR_ADDU SRCREG,SRCREG,t2 # advance to word boundary
127 REG_L a3,-SZREG(DSTREG)
128 REG_L v1,-SZREG(SRCREG)
129 sll t0,t0,3 # bits to clear in dest
130 sll t1,t1,3 # bits to clear in source
131 /*
132 * DST = 01 23 45 67 BE: 01234567 LE: 67452301
133 * 01000000 00000001
134 * SRC = 89 ab cd ef BE: 89abcdef LE: efcdab89
135 * 00abcdef efcdab00
136 * 01abcdef
137 */
138 #if _BYTE_ORDER == _BIG_ENDIAN
139 REG_SRLV a3,a3,t1 # clear lsb dest bits
140 REG_SLLV a3,a3,t1
141 REG_SLLV v1,v1,t0 # clear msb source bits
142 REG_SRLV v1,v1,t0
143 #endif
144 #if _BYTE_ORDER == _LITTLE_ENDIAN
145 REG_SLLV a3,a3,t1 # clear msb dest bits
146 REG_SRLV a3,a3,t1
147 REG_SRLV v1,v1,t0 # clear lsb source bits
148 REG_SLLV v1,v1,t0
149 #endif
150 or a3,a3,v1 # merge
151 REG_S a3,-SZREG(DSTREG) # and save
152 99:
153 #endif
154
155 /*
156 * Forward aligned->aligned copy, 8*4 bytes at a time.
157 */
158 98:
159 li AT,-(SZREG*8)
160 and t0,SIZEREG,AT # count truncated to multiples
161 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
162 sltu AT,SRCREG,a3 # any work to do?
163 beq AT,zero,2f
164 PTR_SUBU SIZEREG,t0
165
166 /*
167 * loop body
168 */
169 1: # cp
170 REG_L t3,(0*SZREG)(SRCREG)
171 REG_L v1,(1*SZREG)(SRCREG)
172 REG_L t0,(2*SZREG)(SRCREG)
173 REG_L t1,(3*SZREG)(SRCREG)
174 PTR_ADDU SRCREG,SZREG*8
175 REG_S t3,(0*SZREG)(DSTREG)
176 REG_S v1,(1*SZREG)(DSTREG)
177 REG_S t0,(2*SZREG)(DSTREG)
178 REG_S t1,(3*SZREG)(DSTREG)
179 REG_L t1,(-1*SZREG)(SRCREG)
180 REG_L t0,(-2*SZREG)(SRCREG)
181 REG_L v1,(-3*SZREG)(SRCREG)
182 REG_L t3,(-4*SZREG)(SRCREG)
183 PTR_ADDU DSTREG,SZREG*8
184 REG_S t1,(-1*SZREG)(DSTREG)
185 REG_S t0,(-2*SZREG)(DSTREG)
186 REG_S v1,(-3*SZREG)(DSTREG)
187 bne SRCREG,a3,1b
188 REG_S t3,(-4*SZREG)(DSTREG)
189
190 /*
191 * Copy a word at a time, no loop unrolling.
192 */
193 2: # wordcopy
194 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
195 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
196 beq t2,zero,3f
197 PTR_ADDU t0,SRCREG,t2 # stop at t0
198 PTR_SUBU SIZEREG,SIZEREG,t2
199 1:
200 REG_L t3,0(SRCREG)
201 PTR_ADDU SRCREG,SZREG
202 REG_S t3,0(DSTREG)
203 bne SRCREG,t0,1b
204 PTR_ADDU DSTREG,SZREG
205
206 3: # bytecopy
207 beq SIZEREG,zero,4f # nothing left to do?
208 nop
209 1:
210 lb t3,0(SRCREG)
211 PTR_ADDU SRCREG,1
212 sb t3,0(DSTREG)
213 PTR_SUBU SIZEREG,1
214 bgtz SIZEREG,1b
215 PTR_ADDU DSTREG,1
216
217 4: # copydone
218 j ra
219 nop
220
221 /*
222 * Copy from unaligned source to aligned dest.
223 */
224 5: # destaligned
225 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
226 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
227 beq a3,zero,3b
228 nop
229 move SIZEREG,t0 # this many to do after we are done
230 PTR_ADDU a3,SRCREG,a3 # stop point
231
232 1:
233 LWHI t3,0(SRCREG)
234 LWLO t3,3(SRCREG)
235 PTR_ADDI SRCREG,SZREG
236 sw t3,0(DSTREG)
237 bne SRCREG,a3,1b
238 PTR_ADDI DSTREG,SZREG
239
240 j 3b
241 nop
242
243 6: # backcopy -- based on above
244 PTR_ADDU SRCREG,SIZEREG
245 PTR_ADDU DSTREG,SIZEREG
246 andi t1,DSTREG,3 # get last 3 bits of dest
247 bne t1,zero,3f
248 andi t0,SRCREG,3 # get last 3 bits of src
249 bne t0,zero,5f
250
251 /*
252 * Forward aligned->aligned copy, 8*4 bytes at a time.
253 */
254 li AT,(-8*SZREG)
255 and t0,SIZEREG,AT # count truncated to multiple of 32
256 beq t0,zero,2f # any work to do?
257 PTR_SUBU SIZEREG,t0
258 PTR_SUBU a3,SRCREG,t0
259
260 /*
261 * loop body
262 */
263 1: # cp
264 REG_L t3,(-4*SZREG)(SRCREG)
265 REG_L v1,(-3*SZREG)(SRCREG)
266 REG_L t0,(-2*SZREG)(SRCREG)
267 REG_L t1,(-1*SZREG)(SRCREG)
268 PTR_SUBU SRCREG,8*SZREG
269 REG_S t3,(-4*SZREG)(DSTREG)
270 REG_S v1,(-3*SZREG)(DSTREG)
271 REG_S t0,(-2*SZREG)(DSTREG)
272 REG_S t1,(-1*SZREG)(DSTREG)
273 REG_L t1,(3*SZREG)(SRCREG)
274 REG_L t0,(2*SZREG)(SRCREG)
275 REG_L v1,(1*SZREG)(SRCREG)
276 REG_L t3,(0*SZREG)(SRCREG)
277 PTR_SUBU DSTREG,8*SZREG
278 REG_S t1,(3*SZREG)(DSTREG)
279 REG_S t0,(2*SZREG)(DSTREG)
280 REG_S v1,(1*SZREG)(DSTREG)
281 bne SRCREG,a3,1b
282 REG_S t3,(0*SZREG)(DSTREG)
283
284 /*
285 * Copy a word at a time, no loop unrolling.
286 */
287 2: # wordcopy
288 andi t2,SIZEREG,SZREG-1 # get byte count / 4
289 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
290 beq t2,zero,3f
291 PTR_SUBU t0,SRCREG,t2 # stop at t0
292 PTR_SUBU SIZEREG,SIZEREG,t2
293 1:
294 REG_L t3,-SZREG(SRCREG)
295 PTR_SUBU SRCREG,SZREG
296 REG_S t3,-SZREG(DSTREG)
297 bne SRCREG,t0,1b
298 PTR_SUBU DSTREG,SZREG
299
300 3: # bytecopy
301 beq SIZEREG,zero,4f # nothing left to do?
302 nop
303 1:
304 lb t3,-1(SRCREG)
305 PTR_SUBU SRCREG,1
306 sb t3,-1(DSTREG)
307 PTR_SUBU SIZEREG,1
308 bgtz SIZEREG,1b
309 PTR_SUBU DSTREG,1
310
311 4: # copydone
312 j ra
313 nop
314
315 /*
316 * Copy from unaligned source to aligned dest.
317 */
318 5: # destaligned
319 andi t0,SIZEREG,3 # t0 = bytecount mod 4
320 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
321 beq a3,zero,3b
322 nop
323 move SIZEREG,t0 # this many to do after we are done
324 PTR_SUBU a3,SRCREG,a3 # stop point
325
326 1:
327 LWHI t3,-4(SRCREG)
328 LWLO t3,-1(SRCREG)
329 PTR_SUBU SRCREG,4
330 sw t3,-4(DSTREG)
331 bne SRCREG,a3,1b
332 PTR_SUBU DSTREG,4
333
334 j 3b
335 nop
336
337 .set reorder
338 .set at
339 END(FUNCTION)
340