bcopy.S revision 1.4 1 1.4 bouyer /* $NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Mach Operating System
5 1.1 christos * Copyright (c) 1993 Carnegie Mellon University
6 1.1 christos * All Rights Reserved.
7 1.1 christos *
8 1.1 christos * Permission to use, copy, modify and distribute this software and its
9 1.1 christos * documentation is hereby granted, provided that both the copyright
10 1.1 christos * notice and this permission notice appear in all copies of the
11 1.1 christos * software, derivative works or modified versions, and any portions
12 1.1 christos * thereof, and that both notices appear in supporting documentation.
13 1.1 christos *
14 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 1.1 christos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 christos *
18 1.1 christos * Carnegie Mellon requests users of this software to return to
19 1.1 christos *
20 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 christos * School of Computer Science
22 1.1 christos * Carnegie Mellon University
23 1.1 christos * Pittsburgh PA 15213-3890
24 1.1 christos *
25 1.1 christos * any improvements or extensions that they make and grant Carnegie Mellon
26 1.1 christos * the rights to redistribute these changes.
27 1.1 christos */
28 1.1 christos
29 1.1 christos /*
30 1.1 christos * File: mips_bcopy.s
31 1.1 christos * Author: Chris Maeda
32 1.1 christos * Date: June 1993
33 1.1 christos *
34 1.1 christos * Fast copy routine. Derived from aligned_block_copy.
35 1.1 christos */
36 1.1 christos
37 1.1 christos
38 1.1 christos #include <mips/asm.h>
39 1.2 tsutsui #ifndef _LOCORE
40 1.1 christos #define _LOCORE /* XXX not really, just assembly-code source */
41 1.2 tsutsui #endif
42 1.1 christos #include <machine/endian.h>
43 1.1 christos
44 1.1 christos
45 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
46 1.3 matt #if 0
47 1.3 matt RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
48 1.3 matt #else
49 1.4 bouyer RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $")
50 1.3 matt #endif
51 1.1 christos #endif /* LIBC_SCCS and not lint */
52 1.1 christos
53 1.1 christos /*
54 1.1 christos * bcopy(caddr_t src, caddr_t dst, unsigned int len)
55 1.1 christos *
56 1.1 christos * a0 src address
57 1.1 christos * a1 dst address
58 1.1 christos * a2 length
59 1.1 christos */
60 1.1 christos
61 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
62 1.1 christos #ifdef MEMCOPY
63 1.1 christos #define FUNCTION memcpy
64 1.1 christos #else
65 1.1 christos #define FUNCTION memmove
66 1.1 christos #endif
67 1.1 christos #define SRCREG a1
68 1.1 christos #define DSTREG a0
69 1.1 christos #else
70 1.1 christos #define FUNCTION bcopy
71 1.1 christos #define SRCREG a0
72 1.1 christos #define DSTREG a1
73 1.1 christos #endif
74 1.1 christos
75 1.1 christos #define SIZEREG a2
76 1.1 christos
77 1.1 christos LEAF(FUNCTION)
78 1.1 christos .set noat
79 1.1 christos .set noreorder
80 1.1 christos
81 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
82 1.1 christos /* set up return value, while we still can */
83 1.1 christos move v0,DSTREG
84 1.1 christos #endif
85 1.1 christos /*
86 1.1 christos * Make sure we can copy forwards.
87 1.1 christos */
88 1.1 christos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
89 1.1 christos bne t0,zero,6f # copy backwards
90 1.1 christos
91 1.1 christos /*
92 1.1 christos * There are four alignment cases (with frequency)
93 1.1 christos * (Based on measurements taken with a DECstation 5000/200
94 1.1 christos * inside a Mach kernel.)
95 1.1 christos *
96 1.1 christos * aligned -> aligned (mostly)
97 1.1 christos * unaligned -> aligned (sometimes)
98 1.1 christos * aligned,unaligned -> unaligned (almost never)
99 1.1 christos *
100 1.1 christos * Note that we could add another case that checks if
101 1.1 christos * the destination and source are unaligned but the
102 1.1 christos * copy is alignable. eg if src and dest are both
103 1.1 christos * on a halfword boundary.
104 1.1 christos */
105 1.3 matt andi t1,DSTREG,(SZREG-1) # get last bits of dest
106 1.3 matt bne t1,zero,3f # dest unaligned
107 1.3 matt andi t0,SRCREG,(SZREG-1) # get last bits of src
108 1.3 matt bne t0,zero,5f
109 1.1 christos
110 1.1 christos /*
111 1.3 matt * Forward aligned->aligned copy, 8 words at a time.
112 1.1 christos */
113 1.3 matt 98:
114 1.3 matt li AT,-(SZREG*8)
115 1.3 matt and t0,SIZEREG,AT # count truncated to multiples
116 1.3 matt PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
117 1.3 matt sltu AT,SRCREG,a3 # any work to do?
118 1.3 matt beq AT,zero,2f
119 1.3 matt PTR_SUBU SIZEREG,t0
120 1.1 christos
121 1.1 christos /*
122 1.1 christos * loop body
123 1.1 christos */
124 1.1 christos 1: # cp
125 1.3 matt REG_L t3,(0*SZREG)(SRCREG)
126 1.3 matt REG_L v1,(1*SZREG)(SRCREG)
127 1.3 matt REG_L t0,(2*SZREG)(SRCREG)
128 1.3 matt REG_L t1,(3*SZREG)(SRCREG)
129 1.3 matt PTR_ADDU SRCREG,SZREG*8
130 1.3 matt REG_S t3,(0*SZREG)(DSTREG)
131 1.3 matt REG_S v1,(1*SZREG)(DSTREG)
132 1.3 matt REG_S t0,(2*SZREG)(DSTREG)
133 1.3 matt REG_S t1,(3*SZREG)(DSTREG)
134 1.3 matt REG_L t1,(-1*SZREG)(SRCREG)
135 1.3 matt REG_L t0,(-2*SZREG)(SRCREG)
136 1.3 matt REG_L v1,(-3*SZREG)(SRCREG)
137 1.3 matt REG_L t3,(-4*SZREG)(SRCREG)
138 1.3 matt PTR_ADDU DSTREG,SZREG*8
139 1.3 matt REG_S t1,(-1*SZREG)(DSTREG)
140 1.3 matt REG_S t0,(-2*SZREG)(DSTREG)
141 1.3 matt REG_S v1,(-3*SZREG)(DSTREG)
142 1.3 matt bne SRCREG,a3,1b
143 1.3 matt REG_S t3,(-4*SZREG)(DSTREG)
144 1.1 christos
145 1.1 christos /*
146 1.1 christos * Copy a word at a time, no loop unrolling.
147 1.1 christos */
148 1.1 christos 2: # wordcopy
149 1.3 matt andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
150 1.3 matt PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
151 1.3 matt beq t2,zero,3f
152 1.3 matt PTR_ADDU t0,SRCREG,t2 # stop at t0
153 1.3 matt PTR_SUBU SIZEREG,SIZEREG,t2
154 1.1 christos 1:
155 1.3 matt REG_L t3,0(SRCREG)
156 1.3 matt PTR_ADDU SRCREG,SZREG
157 1.3 matt REG_S t3,0(DSTREG)
158 1.3 matt bne SRCREG,t0,1b
159 1.3 matt PTR_ADDU DSTREG,SZREG
160 1.1 christos
161 1.1 christos 3: # bytecopy
162 1.3 matt beq SIZEREG,zero,4f # nothing left to do?
163 1.1 christos nop
164 1.1 christos 1:
165 1.3 matt lb t3,0(SRCREG)
166 1.3 matt PTR_ADDU SRCREG,1
167 1.3 matt sb t3,0(DSTREG)
168 1.3 matt PTR_SUBU SIZEREG,1
169 1.3 matt bgtz SIZEREG,1b
170 1.3 matt PTR_ADDU DSTREG,1
171 1.1 christos
172 1.1 christos 4: # copydone
173 1.4 bouyer .set at #-mfix-loongson2f-btb
174 1.1 christos j ra
175 1.1 christos nop
176 1.4 bouyer .set noat
177 1.1 christos
178 1.1 christos /*
179 1.1 christos * Copy from unaligned source to aligned dest.
180 1.1 christos */
181 1.1 christos 5: # destaligned
182 1.3 matt andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
183 1.3 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
184 1.3 matt beq a3,zero,3b
185 1.1 christos nop
186 1.3 matt move SIZEREG,t0 # this many to do after we are done
187 1.3 matt PTR_ADDU a3,SRCREG,a3 # stop point
188 1.1 christos
189 1.1 christos 1:
190 1.3 matt REG_LHI t3,0(SRCREG)
191 1.3 matt REG_LLO t3,SZREG-1(SRCREG)
192 1.3 matt PTR_ADDI SRCREG,SZREG
193 1.3 matt REG_S t3,0(DSTREG)
194 1.3 matt bne SRCREG,a3,1b
195 1.3 matt PTR_ADDI DSTREG,SZREG
196 1.1 christos
197 1.3 matt b 3b
198 1.1 christos nop
199 1.1 christos
200 1.1 christos 6: # backcopy -- based on above
201 1.3 matt PTR_ADDU SRCREG,SIZEREG
202 1.3 matt PTR_ADDU DSTREG,SIZEREG
203 1.3 matt andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
204 1.3 matt bne t1,zero,3f
205 1.3 matt andi t0,SRCREG,SZREG-1 # get last 3 bits of src
206 1.3 matt bne t0,zero,5f
207 1.1 christos
208 1.1 christos /*
209 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
210 1.1 christos */
211 1.3 matt li AT,(-8*SZREG)
212 1.3 matt and t0,SIZEREG,AT # count truncated to multiple of 32
213 1.3 matt beq t0,zero,2f # any work to do?
214 1.3 matt PTR_SUBU SIZEREG,t0
215 1.3 matt PTR_SUBU a3,SRCREG,t0
216 1.1 christos
217 1.1 christos /*
218 1.1 christos * loop body
219 1.1 christos */
220 1.1 christos 1: # cp
221 1.3 matt REG_L t3,(-4*SZREG)(SRCREG)
222 1.3 matt REG_L v1,(-3*SZREG)(SRCREG)
223 1.3 matt REG_L t0,(-2*SZREG)(SRCREG)
224 1.3 matt REG_L t1,(-1*SZREG)(SRCREG)
225 1.3 matt PTR_SUBU SRCREG,8*SZREG
226 1.3 matt REG_S t3,(-4*SZREG)(DSTREG)
227 1.3 matt REG_S v1,(-3*SZREG)(DSTREG)
228 1.3 matt REG_S t0,(-2*SZREG)(DSTREG)
229 1.3 matt REG_S t1,(-1*SZREG)(DSTREG)
230 1.3 matt REG_L t1,(3*SZREG)(SRCREG)
231 1.3 matt REG_L t0,(2*SZREG)(SRCREG)
232 1.3 matt REG_L v1,(1*SZREG)(SRCREG)
233 1.3 matt REG_L t3,(0*SZREG)(SRCREG)
234 1.3 matt PTR_SUBU DSTREG,8*SZREG
235 1.3 matt REG_S t1,(3*SZREG)(DSTREG)
236 1.3 matt REG_S t0,(2*SZREG)(DSTREG)
237 1.3 matt REG_S v1,(1*SZREG)(DSTREG)
238 1.3 matt bne SRCREG,a3,1b
239 1.3 matt REG_S t3,(0*SZREG)(DSTREG)
240 1.1 christos
241 1.1 christos /*
242 1.1 christos * Copy a word at a time, no loop unrolling.
243 1.1 christos */
244 1.1 christos 2: # wordcopy
245 1.3 matt andi t2,SIZEREG,SZREG-1 # get byte count / 4
246 1.3 matt PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
247 1.3 matt beq t2,zero,3f
248 1.3 matt PTR_SUBU t0,SRCREG,t2 # stop at t0
249 1.3 matt PTR_SUBU SIZEREG,SIZEREG,t2
250 1.1 christos 1:
251 1.3 matt REG_L t3,-SZREG(SRCREG)
252 1.3 matt PTR_SUBU SRCREG,SZREG
253 1.3 matt REG_S t3,-SZREG(DSTREG)
254 1.3 matt bne SRCREG,t0,1b
255 1.3 matt PTR_SUBU DSTREG,SZREG
256 1.1 christos
257 1.1 christos 3: # bytecopy
258 1.3 matt beq SIZEREG,zero,4f # nothing left to do?
259 1.1 christos nop
260 1.1 christos 1:
261 1.3 matt lb t3,-1(SRCREG)
262 1.3 matt PTR_SUBU SRCREG,1
263 1.3 matt sb t3,-1(DSTREG)
264 1.3 matt PTR_SUBU SIZEREG,1
265 1.3 matt bgtz SIZEREG,1b
266 1.3 matt PTR_SUBU DSTREG,1
267 1.1 christos
268 1.1 christos 4: # copydone
269 1.4 bouyer .set at #-mfix-loongson2f-btb
270 1.1 christos j ra
271 1.1 christos nop
272 1.4 bouyer .set noat
273 1.1 christos
274 1.1 christos /*
275 1.1 christos * Copy from unaligned source to aligned dest.
276 1.1 christos */
277 1.1 christos 5: # destaligned
278 1.3 matt andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
279 1.3 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
280 1.3 matt beq a3,zero,3b
281 1.1 christos nop
282 1.3 matt move SIZEREG,t0 # this many to do after we are done
283 1.3 matt PTR_SUBU a3,SRCREG,a3 # stop point
284 1.1 christos
285 1.1 christos 1:
286 1.3 matt REG_LHI t3,-SZREG(SRCREG)
287 1.3 matt REG_LLO t3,-1(SRCREG)
288 1.3 matt PTR_SUBU SRCREG,SZREG
289 1.3 matt REG_S t3,-SZREG(DSTREG)
290 1.3 matt bne SRCREG,a3,1b
291 1.3 matt PTR_SUBU DSTREG,SZREG
292 1.1 christos
293 1.3 matt b 3b
294 1.1 christos nop
295 1.1 christos
296 1.1 christos .set reorder
297 1.1 christos .set at
298 1.1 christos END(FUNCTION)
299