bcopy.S revision 1.1 1 1.1 christos /* $NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Mach Operating System
5 1.1 christos * Copyright (c) 1993 Carnegie Mellon University
6 1.1 christos * All Rights Reserved.
7 1.1 christos *
8 1.1 christos * Permission to use, copy, modify and distribute this software and its
9 1.1 christos * documentation is hereby granted, provided that both the copyright
10 1.1 christos * notice and this permission notice appear in all copies of the
11 1.1 christos * software, derivative works or modified versions, and any portions
12 1.1 christos * thereof, and that both notices appear in supporting documentation.
13 1.1 christos *
14 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 1.1 christos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 christos *
18 1.1 christos * Carnegie Mellon requests users of this software to return to
19 1.1 christos *
20 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 christos * School of Computer Science
22 1.1 christos * Carnegie Mellon University
23 1.1 christos * Pittsburgh PA 15213-3890
24 1.1 christos *
25 1.1 christos * any improvements or extensions that they make and grant Carnegie Mellon
26 1.1 christos * the rights to redistribute these changes.
27 1.1 christos */
28 1.1 christos
29 1.1 christos /*
30 1.1 christos * File: mips_bcopy.s
31 1.1 christos * Author: Chris Maeda
32 1.1 christos * Date: June 1993
33 1.1 christos *
34 1.1 christos * Fast copy routine. Derived from aligned_block_copy.
35 1.1 christos */
36 1.1 christos
37 1.1 christos
38 1.1 christos #include <mips/asm.h>
39 1.1 christos #define _LOCORE /* XXX not really, just assembly-code source */
40 1.1 christos #include <machine/endian.h>
41 1.1 christos
42 1.1 christos
43 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
44 1.1 christos ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
45 1.1 christos ASMSTR("$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $")
46 1.1 christos #endif /* LIBC_SCCS and not lint */
47 1.1 christos
48 1.1 christos #ifdef __ABICALLS__
49 1.1 christos .abicalls
50 1.1 christos #endif
51 1.1 christos
52 1.1 christos /*
53 1.1 christos * bcopy(caddr_t src, caddr_t dst, unsigned int len)
54 1.1 christos *
55 1.1 christos * a0 src address
56 1.1 christos * a1 dst address
57 1.1 christos * a2 length
58 1.1 christos */
59 1.1 christos
60 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
61 1.1 christos #ifdef MEMCOPY
62 1.1 christos #define FUNCTION memcpy
63 1.1 christos #else
64 1.1 christos #define FUNCTION memmove
65 1.1 christos #endif
66 1.1 christos #define SRCREG a1
67 1.1 christos #define DSTREG a0
68 1.1 christos #else
69 1.1 christos #define FUNCTION bcopy
70 1.1 christos #define SRCREG a0
71 1.1 christos #define DSTREG a1
72 1.1 christos #endif
73 1.1 christos
74 1.1 christos #define SIZEREG a2
75 1.1 christos
76 1.1 christos LEAF(FUNCTION)
77 1.1 christos .set noat
78 1.1 christos .set noreorder
79 1.1 christos
80 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
81 1.1 christos /* set up return value, while we still can */
82 1.1 christos move v0,DSTREG
83 1.1 christos #endif
84 1.1 christos /*
85 1.1 christos * Make sure we can copy forwards.
86 1.1 christos */
87 1.1 christos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
88 1.1 christos bne t0,zero,6f # copy backwards
89 1.1 christos
90 1.1 christos /*
91 1.1 christos * There are four alignment cases (with frequency)
92 1.1 christos * (Based on measurements taken with a DECstation 5000/200
93 1.1 christos * inside a Mach kernel.)
94 1.1 christos *
95 1.1 christos * aligned -> aligned (mostly)
96 1.1 christos * unaligned -> aligned (sometimes)
97 1.1 christos * aligned,unaligned -> unaligned (almost never)
98 1.1 christos *
99 1.1 christos * Note that we could add another case that checks if
100 1.1 christos * the destination and source are unaligned but the
101 1.1 christos * copy is alignable. eg if src and dest are both
102 1.1 christos * on a halfword boundary.
103 1.1 christos */
104 1.1 christos andi t1,DSTREG,3 # get last 3 bits of dest
105 1.1 christos bne t1,zero,3f
106 1.1 christos andi t0,SRCREG,3 # get last 3 bits of src
107 1.1 christos bne t0,zero,5f
108 1.1 christos
109 1.1 christos /*
110 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
111 1.1 christos */
112 1.1 christos li AT,-32
113 1.1 christos and t0,SIZEREG,AT # count truncated to multiple of 32 */
114 1.1 christos addu a3,SRCREG,t0 # run fast loop up to this address
115 1.1 christos sltu AT,SRCREG,a3 # any work to do?
116 1.1 christos beq AT,zero,2f
117 1.1 christos subu SIZEREG,t0
118 1.1 christos
119 1.1 christos /*
120 1.1 christos * loop body
121 1.1 christos */
122 1.1 christos 1: # cp
123 1.1 christos lw t3,0(SRCREG)
124 1.1 christos lw v1,4(SRCREG)
125 1.1 christos lw t0,8(SRCREG)
126 1.1 christos lw t1,12(SRCREG)
127 1.1 christos addu SRCREG,32
128 1.1 christos sw t3,0(DSTREG)
129 1.1 christos sw v1,4(DSTREG)
130 1.1 christos sw t0,8(DSTREG)
131 1.1 christos sw t1,12(DSTREG)
132 1.1 christos lw t1,-4(SRCREG)
133 1.1 christos lw t0,-8(SRCREG)
134 1.1 christos lw v1,-12(SRCREG)
135 1.1 christos lw t3,-16(SRCREG)
136 1.1 christos addu DSTREG,32
137 1.1 christos sw t1,-4(DSTREG)
138 1.1 christos sw t0,-8(DSTREG)
139 1.1 christos sw v1,-12(DSTREG)
140 1.1 christos bne SRCREG,a3,1b
141 1.1 christos sw t3,-16(DSTREG)
142 1.1 christos
143 1.1 christos /*
144 1.1 christos * Copy a word at a time, no loop unrolling.
145 1.1 christos */
146 1.1 christos 2: # wordcopy
147 1.1 christos andi t2,SIZEREG,3 # get byte count / 4
148 1.1 christos subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
149 1.1 christos beq t2,zero,3f
150 1.1 christos addu t0,SRCREG,t2 # stop at t0
151 1.1 christos subu SIZEREG,SIZEREG,t2
152 1.1 christos 1:
153 1.1 christos lw t3,0(SRCREG)
154 1.1 christos addu SRCREG,4
155 1.1 christos sw t3,0(DSTREG)
156 1.1 christos bne SRCREG,t0,1b
157 1.1 christos addu DSTREG,4
158 1.1 christos
159 1.1 christos 3: # bytecopy
160 1.1 christos beq SIZEREG,zero,4f # nothing left to do?
161 1.1 christos nop
162 1.1 christos 1:
163 1.1 christos lb t3,0(SRCREG)
164 1.1 christos addu SRCREG,1
165 1.1 christos sb t3,0(DSTREG)
166 1.1 christos subu SIZEREG,1
167 1.1 christos bgtz SIZEREG,1b
168 1.1 christos addu DSTREG,1
169 1.1 christos
170 1.1 christos 4: # copydone
171 1.1 christos j ra
172 1.1 christos nop
173 1.1 christos
174 1.1 christos /*
175 1.1 christos * Copy from unaligned source to aligned dest.
176 1.1 christos */
177 1.1 christos 5: # destaligned
178 1.1 christos andi t0,SIZEREG,3 # t0 = bytecount mod 4
179 1.1 christos subu a3,SIZEREG,t0 # number of words to transfer
180 1.1 christos beq a3,zero,3b
181 1.1 christos nop
182 1.1 christos move SIZEREG,t0 # this many to do after we are done
183 1.1 christos addu a3,SRCREG,a3 # stop point
184 1.1 christos
185 1.1 christos 1:
186 1.1 christos LWHI t3,0(SRCREG)
187 1.1 christos LWLO t3,3(SRCREG)
188 1.1 christos addi SRCREG,4
189 1.1 christos sw t3,0(DSTREG)
190 1.1 christos bne SRCREG,a3,1b
191 1.1 christos addi DSTREG,4
192 1.1 christos
193 1.1 christos j 3b
194 1.1 christos nop
195 1.1 christos
196 1.1 christos 6: # backcopy -- based on above
197 1.1 christos addu SRCREG,SIZEREG
198 1.1 christos addu DSTREG,SIZEREG
199 1.1 christos andi t1,DSTREG,3 # get last 3 bits of dest
200 1.1 christos bne t1,zero,3f
201 1.1 christos andi t0,SRCREG,3 # get last 3 bits of src
202 1.1 christos bne t0,zero,5f
203 1.1 christos
204 1.1 christos /*
205 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
206 1.1 christos */
207 1.1 christos li AT,-32
208 1.1 christos and t0,SIZEREG,AT # count truncated to multiple of 32
209 1.1 christos beq t0,zero,2f # any work to do?
210 1.1 christos subu SIZEREG,t0
211 1.1 christos subu a3,SRCREG,t0
212 1.1 christos
213 1.1 christos /*
214 1.1 christos * loop body
215 1.1 christos */
216 1.1 christos 1: # cp
217 1.1 christos lw t3,-16(SRCREG)
218 1.1 christos lw v1,-12(SRCREG)
219 1.1 christos lw t0,-8(SRCREG)
220 1.1 christos lw t1,-4(SRCREG)
221 1.1 christos subu SRCREG,32
222 1.1 christos sw t3,-16(DSTREG)
223 1.1 christos sw v1,-12(DSTREG)
224 1.1 christos sw t0,-8(DSTREG)
225 1.1 christos sw t1,-4(DSTREG)
226 1.1 christos lw t1,12(SRCREG)
227 1.1 christos lw t0,8(SRCREG)
228 1.1 christos lw v1,4(SRCREG)
229 1.1 christos lw t3,0(SRCREG)
230 1.1 christos subu DSTREG,32
231 1.1 christos sw t1,12(DSTREG)
232 1.1 christos sw t0,8(DSTREG)
233 1.1 christos sw v1,4(DSTREG)
234 1.1 christos bne SRCREG,a3,1b
235 1.1 christos sw t3,0(DSTREG)
236 1.1 christos
237 1.1 christos /*
238 1.1 christos * Copy a word at a time, no loop unrolling.
239 1.1 christos */
240 1.1 christos 2: # wordcopy
241 1.1 christos andi t2,SIZEREG,3 # get byte count / 4
242 1.1 christos subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
243 1.1 christos beq t2,zero,3f
244 1.1 christos subu t0,SRCREG,t2 # stop at t0
245 1.1 christos subu SIZEREG,SIZEREG,t2
246 1.1 christos 1:
247 1.1 christos lw t3,-4(SRCREG)
248 1.1 christos subu SRCREG,4
249 1.1 christos sw t3,-4(DSTREG)
250 1.1 christos bne SRCREG,t0,1b
251 1.1 christos subu DSTREG,4
252 1.1 christos
253 1.1 christos 3: # bytecopy
254 1.1 christos beq SIZEREG,zero,4f # nothing left to do?
255 1.1 christos nop
256 1.1 christos 1:
257 1.1 christos lb t3,-1(SRCREG)
258 1.1 christos subu SRCREG,1
259 1.1 christos sb t3,-1(DSTREG)
260 1.1 christos subu SIZEREG,1
261 1.1 christos bgtz SIZEREG,1b
262 1.1 christos subu DSTREG,1
263 1.1 christos
264 1.1 christos 4: # copydone
265 1.1 christos j ra
266 1.1 christos nop
267 1.1 christos
268 1.1 christos /*
269 1.1 christos * Copy from unaligned source to aligned dest.
270 1.1 christos */
271 1.1 christos 5: # destaligned
272 1.1 christos andi t0,SIZEREG,3 # t0 = bytecount mod 4
273 1.1 christos subu a3,SIZEREG,t0 # number of words to transfer
274 1.1 christos beq a3,zero,3b
275 1.1 christos nop
276 1.1 christos move SIZEREG,t0 # this many to do after we are done
277 1.1 christos subu a3,SRCREG,a3 # stop point
278 1.1 christos
279 1.1 christos 1:
280 1.1 christos LWHI t3,-4(SRCREG)
281 1.1 christos LWLO t3,-1(SRCREG)
282 1.1 christos subu SRCREG,4
283 1.1 christos sw t3,-4(DSTREG)
284 1.1 christos bne SRCREG,a3,1b
285 1.1 christos subu DSTREG,4
286 1.1 christos
287 1.1 christos j 3b
288 1.1 christos nop
289 1.1 christos
290 1.1 christos .set reorder
291 1.1 christos .set at
292 1.1 christos END(FUNCTION)
293