bcopy.S revision 1.2.40.3 1 1.2.40.3 matt /* $NetBSD: bcopy.S,v 1.2.40.3 2009/08/20 10:03:43 matt Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Mach Operating System
5 1.1 christos * Copyright (c) 1993 Carnegie Mellon University
6 1.1 christos * All Rights Reserved.
7 1.1 christos *
8 1.1 christos * Permission to use, copy, modify and distribute this software and its
9 1.1 christos * documentation is hereby granted, provided that both the copyright
10 1.1 christos * notice and this permission notice appear in all copies of the
11 1.1 christos * software, derivative works or modified versions, and any portions
12 1.1 christos * thereof, and that both notices appear in supporting documentation.
13 1.1 christos *
14 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 1.1 christos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 christos *
18 1.1 christos * Carnegie Mellon requests users of this software to return to
19 1.1 christos *
20 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 christos * School of Computer Science
22 1.1 christos * Carnegie Mellon University
23 1.1 christos * Pittsburgh PA 15213-3890
24 1.1 christos *
25 1.1 christos * any improvements or extensions that they make and grant Carnegie Mellon
26 1.1 christos * the rights to redistribute these changes.
27 1.1 christos */
28 1.1 christos
29 1.1 christos /*
30 1.1 christos * File: mips_bcopy.s
31 1.1 christos * Author: Chris Maeda
32 1.1 christos * Date: June 1993
33 1.1 christos *
34 1.1 christos * Fast copy routine. Derived from aligned_block_copy.
35 1.1 christos */
36 1.1 christos
37 1.1 christos
38 1.1 christos #include <mips/asm.h>
39 1.2 tsutsui #ifndef _LOCORE
40 1.1 christos #define _LOCORE /* XXX not really, just assembly-code source */
41 1.2 tsutsui #endif
42 1.1 christos #include <machine/endian.h>
43 1.1 christos
44 1.1 christos
45 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
46 1.2.40.1 matt /* RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") */
47 1.2.40.3 matt RCSID("$NetBSD: bcopy.S,v 1.2.40.3 2009/08/20 10:03:43 matt Exp $")
48 1.1 christos #endif /* LIBC_SCCS and not lint */
49 1.1 christos
50 1.1 christos /*
51 1.1 christos * bcopy(caddr_t src, caddr_t dst, unsigned int len)
52 1.1 christos *
53 1.1 christos * a0 src address
54 1.1 christos * a1 dst address
55 1.1 christos * a2 length
56 1.1 christos */
57 1.1 christos
58 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
59 1.1 christos #ifdef MEMCOPY
60 1.1 christos #define FUNCTION memcpy
61 1.1 christos #else
62 1.1 christos #define FUNCTION memmove
63 1.1 christos #endif
64 1.1 christos #define SRCREG a1
65 1.1 christos #define DSTREG a0
66 1.1 christos #else
67 1.1 christos #define FUNCTION bcopy
68 1.1 christos #define SRCREG a0
69 1.1 christos #define DSTREG a1
70 1.1 christos #endif
71 1.1 christos
72 1.1 christos #define SIZEREG a2
73 1.1 christos
74 1.1 christos LEAF(FUNCTION)
75 1.1 christos .set noat
76 1.1 christos .set noreorder
77 1.1 christos
78 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
79 1.1 christos /* set up return value, while we still can */
80 1.1 christos move v0,DSTREG
81 1.1 christos #endif
82 1.1 christos /*
83 1.1 christos * Make sure we can copy forwards.
84 1.1 christos */
85 1.1 christos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
86 1.1 christos bne t0,zero,6f # copy backwards
87 1.1 christos
88 1.1 christos /*
89 1.1 christos * There are four alignment cases (with frequency)
90 1.1 christos * (Based on measurements taken with a DECstation 5000/200
91 1.1 christos * inside a Mach kernel.)
92 1.1 christos *
93 1.1 christos * aligned -> aligned (mostly)
94 1.1 christos * unaligned -> aligned (sometimes)
95 1.1 christos * aligned,unaligned -> unaligned (almost never)
96 1.1 christos *
97 1.1 christos * Note that we could add another case that checks if
98 1.1 christos * the destination and source are unaligned but the
99 1.1 christos * copy is alignable. eg if src and dest are both
100 1.1 christos * on a halfword boundary.
101 1.1 christos */
102 1.2.40.1 matt andi t1,DSTREG,(SZREG-1) # get last bits of dest
103 1.2.40.1 matt bne t1,zero,3f # dest unaligned
104 1.2.40.1 matt andi t0,SRCREG,(SZREG-1) # get last bits of src
105 1.2.40.1 matt bne t0,zero,5f
106 1.1 christos
107 1.1 christos /*
108 1.2.40.3 matt * Forward aligned->aligned copy, 8 words at a time.
109 1.1 christos */
110 1.2.40.1 matt 98:
111 1.2.40.1 matt li AT,-(SZREG*8)
112 1.2.40.1 matt and t0,SIZEREG,AT # count truncated to multiples
113 1.2.40.1 matt PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
114 1.2.40.1 matt sltu AT,SRCREG,a3 # any work to do?
115 1.2.40.1 matt beq AT,zero,2f
116 1.2.40.1 matt PTR_SUBU SIZEREG,t0
117 1.1 christos
118 1.1 christos /*
119 1.1 christos * loop body
120 1.1 christos */
121 1.1 christos 1: # cp
122 1.2.40.1 matt REG_L t3,(0*SZREG)(SRCREG)
123 1.2.40.1 matt REG_L v1,(1*SZREG)(SRCREG)
124 1.2.40.1 matt REG_L t0,(2*SZREG)(SRCREG)
125 1.2.40.1 matt REG_L t1,(3*SZREG)(SRCREG)
126 1.2.40.1 matt PTR_ADDU SRCREG,SZREG*8
127 1.2.40.1 matt REG_S t3,(0*SZREG)(DSTREG)
128 1.2.40.1 matt REG_S v1,(1*SZREG)(DSTREG)
129 1.2.40.1 matt REG_S t0,(2*SZREG)(DSTREG)
130 1.2.40.1 matt REG_S t1,(3*SZREG)(DSTREG)
131 1.2.40.1 matt REG_L t1,(-1*SZREG)(SRCREG)
132 1.2.40.2 matt REG_L t0,(-2*SZREG)(SRCREG)
133 1.2.40.2 matt REG_L v1,(-3*SZREG)(SRCREG)
134 1.2.40.2 matt REG_L t3,(-4*SZREG)(SRCREG)
135 1.2.40.1 matt PTR_ADDU DSTREG,SZREG*8
136 1.2.40.1 matt REG_S t1,(-1*SZREG)(DSTREG)
137 1.2.40.1 matt REG_S t0,(-2*SZREG)(DSTREG)
138 1.2.40.1 matt REG_S v1,(-3*SZREG)(DSTREG)
139 1.2.40.1 matt bne SRCREG,a3,1b
140 1.2.40.1 matt REG_S t3,(-4*SZREG)(DSTREG)
141 1.1 christos
142 1.1 christos /*
143 1.1 christos * Copy a word at a time, no loop unrolling.
144 1.1 christos */
145 1.1 christos 2: # wordcopy
146 1.2.40.1 matt andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
147 1.2.40.2 matt PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
148 1.2.40.1 matt beq t2,zero,3f
149 1.2.40.1 matt PTR_ADDU t0,SRCREG,t2 # stop at t0
150 1.2.40.1 matt PTR_SUBU SIZEREG,SIZEREG,t2
151 1.1 christos 1:
152 1.2.40.1 matt REG_L t3,0(SRCREG)
153 1.2.40.1 matt PTR_ADDU SRCREG,SZREG
154 1.2.40.1 matt REG_S t3,0(DSTREG)
155 1.2.40.1 matt bne SRCREG,t0,1b
156 1.2.40.1 matt PTR_ADDU DSTREG,SZREG
157 1.1 christos
158 1.1 christos 3: # bytecopy
159 1.2.40.1 matt beq SIZEREG,zero,4f # nothing left to do?
160 1.1 christos nop
161 1.1 christos 1:
162 1.2.40.1 matt lb t3,0(SRCREG)
163 1.2.40.1 matt PTR_ADDU SRCREG,1
164 1.2.40.1 matt sb t3,0(DSTREG)
165 1.2.40.1 matt PTR_SUBU SIZEREG,1
166 1.2.40.1 matt bgtz SIZEREG,1b
167 1.2.40.1 matt PTR_ADDU DSTREG,1
168 1.1 christos
169 1.1 christos 4: # copydone
170 1.1 christos j ra
171 1.1 christos nop
172 1.1 christos
173 1.1 christos /*
174 1.1 christos * Copy from unaligned source to aligned dest.
175 1.1 christos */
176 1.1 christos 5: # destaligned
177 1.2.40.1 matt andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
178 1.2.40.1 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
179 1.2.40.1 matt beq a3,zero,3b
180 1.1 christos nop
181 1.2.40.1 matt move SIZEREG,t0 # this many to do after we are done
182 1.2.40.1 matt PTR_ADDU a3,SRCREG,a3 # stop point
183 1.1 christos
184 1.1 christos 1:
185 1.2.40.3 matt REG_LHI t3,0(SRCREG)
186 1.2.40.3 matt REG_LLO t3,SZREG-1(SRCREG)
187 1.2.40.1 matt PTR_ADDI SRCREG,SZREG
188 1.2.40.3 matt REG_S t3,0(DSTREG)
189 1.2.40.1 matt bne SRCREG,a3,1b
190 1.2.40.1 matt PTR_ADDI DSTREG,SZREG
191 1.1 christos
192 1.2.40.3 matt b 3b
193 1.1 christos nop
194 1.1 christos
195 1.1 christos 6: # backcopy -- based on above
196 1.2.40.1 matt PTR_ADDU SRCREG,SIZEREG
197 1.2.40.1 matt PTR_ADDU DSTREG,SIZEREG
198 1.2.40.3 matt andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
199 1.2.40.1 matt bne t1,zero,3f
200 1.2.40.3 matt andi t0,SRCREG,SZREG-1 # get last 3 bits of src
201 1.2.40.1 matt bne t0,zero,5f
202 1.1 christos
203 1.1 christos /*
204 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
205 1.1 christos */
206 1.2.40.1 matt li AT,(-8*SZREG)
207 1.2.40.1 matt and t0,SIZEREG,AT # count truncated to multiple of 32
208 1.2.40.1 matt beq t0,zero,2f # any work to do?
209 1.2.40.1 matt PTR_SUBU SIZEREG,t0
210 1.2.40.1 matt PTR_SUBU a3,SRCREG,t0
211 1.1 christos
212 1.1 christos /*
213 1.1 christos * loop body
214 1.1 christos */
215 1.1 christos 1: # cp
216 1.2.40.1 matt REG_L t3,(-4*SZREG)(SRCREG)
217 1.2.40.1 matt REG_L v1,(-3*SZREG)(SRCREG)
218 1.2.40.1 matt REG_L t0,(-2*SZREG)(SRCREG)
219 1.2.40.1 matt REG_L t1,(-1*SZREG)(SRCREG)
220 1.2.40.1 matt PTR_SUBU SRCREG,8*SZREG
221 1.2.40.1 matt REG_S t3,(-4*SZREG)(DSTREG)
222 1.2.40.1 matt REG_S v1,(-3*SZREG)(DSTREG)
223 1.2.40.1 matt REG_S t0,(-2*SZREG)(DSTREG)
224 1.2.40.1 matt REG_S t1,(-1*SZREG)(DSTREG)
225 1.2.40.1 matt REG_L t1,(3*SZREG)(SRCREG)
226 1.2.40.2 matt REG_L t0,(2*SZREG)(SRCREG)
227 1.2.40.2 matt REG_L v1,(1*SZREG)(SRCREG)
228 1.2.40.2 matt REG_L t3,(0*SZREG)(SRCREG)
229 1.2.40.1 matt PTR_SUBU DSTREG,8*SZREG
230 1.2.40.1 matt REG_S t1,(3*SZREG)(DSTREG)
231 1.2.40.1 matt REG_S t0,(2*SZREG)(DSTREG)
232 1.2.40.1 matt REG_S v1,(1*SZREG)(DSTREG)
233 1.2.40.1 matt bne SRCREG,a3,1b
234 1.2.40.1 matt REG_S t3,(0*SZREG)(DSTREG)
235 1.1 christos
236 1.1 christos /*
237 1.1 christos * Copy a word at a time, no loop unrolling.
238 1.1 christos */
239 1.1 christos 2: # wordcopy
240 1.2.40.1 matt andi t2,SIZEREG,SZREG-1 # get byte count / 4
241 1.2.40.2 matt PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
242 1.2.40.1 matt beq t2,zero,3f
243 1.2.40.1 matt PTR_SUBU t0,SRCREG,t2 # stop at t0
244 1.2.40.1 matt PTR_SUBU SIZEREG,SIZEREG,t2
245 1.1 christos 1:
246 1.2.40.1 matt REG_L t3,-SZREG(SRCREG)
247 1.2.40.1 matt PTR_SUBU SRCREG,SZREG
248 1.2.40.1 matt REG_S t3,-SZREG(DSTREG)
249 1.2.40.1 matt bne SRCREG,t0,1b
250 1.2.40.1 matt PTR_SUBU DSTREG,SZREG
251 1.1 christos
252 1.1 christos 3: # bytecopy
253 1.2.40.1 matt beq SIZEREG,zero,4f # nothing left to do?
254 1.1 christos nop
255 1.1 christos 1:
256 1.2.40.1 matt lb t3,-1(SRCREG)
257 1.2.40.1 matt PTR_SUBU SRCREG,1
258 1.2.40.1 matt sb t3,-1(DSTREG)
259 1.2.40.1 matt PTR_SUBU SIZEREG,1
260 1.2.40.1 matt bgtz SIZEREG,1b
261 1.2.40.1 matt PTR_SUBU DSTREG,1
262 1.1 christos
263 1.1 christos 4: # copydone
264 1.1 christos j ra
265 1.1 christos nop
266 1.1 christos
267 1.1 christos /*
268 1.1 christos * Copy from unaligned source to aligned dest.
269 1.1 christos */
270 1.1 christos 5: # destaligned
271 1.2.40.3 matt andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
272 1.2.40.1 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
273 1.2.40.1 matt beq a3,zero,3b
274 1.1 christos nop
275 1.2.40.1 matt move SIZEREG,t0 # this many to do after we are done
276 1.2.40.1 matt PTR_SUBU a3,SRCREG,a3 # stop point
277 1.1 christos
278 1.1 christos 1:
279 1.2.40.3 matt REG_LHI t3,-SZREG(SRCREG)
280 1.2.40.3 matt REG_LLO t3,-1(SRCREG)
281 1.2.40.3 matt PTR_SUBU SRCREG,SZREG
282 1.2.40.3 matt REG_S t3,-SZREG(DSTREG)
283 1.2.40.1 matt bne SRCREG,a3,1b
284 1.2.40.3 matt PTR_SUBU DSTREG,SZREG
285 1.1 christos
286 1.2.40.3 matt b 3b
287 1.1 christos nop
288 1.1 christos
289 1.1 christos .set reorder
290 1.1 christos .set at
291 1.1 christos END(FUNCTION)
292