bcopy.S revision 1.2.40.1 1 1.2.40.1 matt /* $NetBSD: bcopy.S,v 1.2.40.1 2009/08/16 03:02:47 matt Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Mach Operating System
5 1.1 christos * Copyright (c) 1993 Carnegie Mellon University
6 1.1 christos * All Rights Reserved.
7 1.1 christos *
8 1.1 christos * Permission to use, copy, modify and distribute this software and its
9 1.1 christos * documentation is hereby granted, provided that both the copyright
10 1.1 christos * notice and this permission notice appear in all copies of the
11 1.1 christos * software, derivative works or modified versions, and any portions
12 1.1 christos * thereof, and that both notices appear in supporting documentation.
13 1.1 christos *
14 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 1.1 christos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 christos *
18 1.1 christos * Carnegie Mellon requests users of this software to return to
19 1.1 christos *
20 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 christos * School of Computer Science
22 1.1 christos * Carnegie Mellon University
23 1.1 christos * Pittsburgh PA 15213-3890
24 1.1 christos *
25 1.1 christos * any improvements or extensions that they make and grant Carnegie Mellon
26 1.1 christos * the rights to redistribute these changes.
27 1.1 christos */
28 1.1 christos
29 1.1 christos /*
30 1.1 christos * File: mips_bcopy.s
31 1.1 christos * Author: Chris Maeda
32 1.1 christos * Date: June 1993
33 1.1 christos *
34 1.1 christos * Fast copy routine. Derived from aligned_block_copy.
35 1.1 christos */
36 1.1 christos
37 1.1 christos
38 1.1 christos #include <mips/asm.h>
39 1.2 tsutsui #ifndef _LOCORE
40 1.1 christos #define _LOCORE /* XXX not really, just assembly-code source */
41 1.2 tsutsui #endif
42 1.1 christos #include <machine/endian.h>
43 1.1 christos
44 1.1 christos
45 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
46 1.2.40.1 matt /* RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") */
47 1.2.40.1 matt RCSID("$NetBSD: bcopy.S,v 1.2.40.1 2009/08/16 03:02:47 matt Exp $")
48 1.1 christos #endif /* LIBC_SCCS and not lint */
49 1.1 christos
50 1.1 christos /*
51 1.1 christos * bcopy(caddr_t src, caddr_t dst, unsigned int len)
52 1.1 christos *
53 1.1 christos * a0 src address
54 1.1 christos * a1 dst address
55 1.1 christos * a2 length
56 1.1 christos */
57 1.1 christos
58 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
59 1.1 christos #ifdef MEMCOPY
60 1.1 christos #define FUNCTION memcpy
61 1.1 christos #else
62 1.1 christos #define FUNCTION memmove
63 1.1 christos #endif
64 1.1 christos #define SRCREG a1
65 1.1 christos #define DSTREG a0
66 1.1 christos #else
67 1.1 christos #define FUNCTION bcopy
68 1.1 christos #define SRCREG a0
69 1.1 christos #define DSTREG a1
70 1.1 christos #endif
71 1.1 christos
72 1.1 christos #define SIZEREG a2
73 1.1 christos
74 1.1 christos LEAF(FUNCTION)
75 1.1 christos .set noat
76 1.1 christos .set noreorder
77 1.1 christos
78 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE)
79 1.1 christos /* set up return value, while we still can */
80 1.1 christos move v0,DSTREG
81 1.1 christos #endif
82 1.1 christos /*
83 1.1 christos * Make sure we can copy forwards.
84 1.1 christos */
85 1.1 christos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
86 1.1 christos bne t0,zero,6f # copy backwards
87 1.1 christos
88 1.1 christos /*
89 1.1 christos * There are four alignment cases (with frequency)
90 1.1 christos * (Based on measurements taken with a DECstation 5000/200
91 1.1 christos * inside a Mach kernel.)
92 1.1 christos *
93 1.1 christos * aligned -> aligned (mostly)
94 1.1 christos * unaligned -> aligned (sometimes)
95 1.1 christos * aligned,unaligned -> unaligned (almost never)
96 1.1 christos *
97 1.1 christos * Note that we could add another case that checks if
98 1.1 christos * the destination and source are unaligned but the
99 1.1 christos * copy is alignable. eg if src and dest are both
100 1.1 christos * on a halfword boundary.
101 1.1 christos */
102 1.2.40.1 matt #if 1
103 1.2.40.1 matt andi t1,DSTREG,(SZREG-1) # get last bits of dest
104 1.2.40.1 matt bne t1,zero,3f # dest unaligned
105 1.2.40.1 matt andi t0,SRCREG,(SZREG-1) # get last bits of src
106 1.2.40.1 matt bne t0,zero,5f
107 1.2.40.1 matt #else
108 1.2.40.1 matt andi t1,DSTREG,(SZREG-1) # get last bits of dest
109 1.2.40.1 matt andi t0,SRCREG,(SZREG-1) # get last bits of src
110 1.2.40.1 matt beq t1,t0,97f # aligned on non-word
111 1.2.40.1 matt nop;
112 1.2.40.1 matt bne t1,zero,3f # dest unaligned
113 1.2.40.1 matt nop
114 1.2.40.1 matt b 5f # source unaligned
115 1.2.40.1 matt nop
116 1.2.40.1 matt
117 1.2.40.1 matt 97:
118 1.2.40.1 matt sltiu t1,SIZEREG,SZREG
119 1.2.40.1 matt bne t1,zero,3f
120 1.2.40.1 matt nop
121 1.2.40.1 matt subu t2,zero,t2 # t2 = -t0
122 1.2.40.1 matt andi t2,t2,(SZREG-1) # t2 &= (SZREG-1)
123 1.2.40.1 matt # t0 + t2 == SZREG
124 1.2.40.1 matt PTR_SUBU SIZEREG,SIZEREG,t2 # retreat to word boundary
125 1.2.40.1 matt PTR_ADDU DSTREG,DSTREG,t2 # advance to word boundary
126 1.2.40.1 matt PTR_ADDU SRCREG,SRCREG,t2 # advance to word boundary
127 1.2.40.1 matt REG_L a3,-SZREG(DSTREG)
128 1.2.40.1 matt REG_L v1,-SZREG(SRCREG)
129 1.2.40.1 matt sll t0,t0,3 # bits to clear in dest
130 1.2.40.1 matt sll t1,t1,3 # bits to clear in source
131 1.2.40.1 matt /*
132 1.2.40.1 matt * DST = 01 23 45 67 BE: 01234567 LE: 67452301
133 1.2.40.1 matt * 01000000 00000001
134 1.2.40.1 matt * SRC = 89 ab cd ef BE: 89abcdef LE: efcdab89
135 1.2.40.1 matt * 00abcdef efcdab00
136 1.2.40.1 matt * 01abcdef
137 1.2.40.1 matt */
138 1.2.40.1 matt #if _BYTE_ORDER == _BIG_ENDIAN
139 1.2.40.1 matt REG_SRLV a3,a3,t1 # clear lsb dest bits
140 1.2.40.1 matt REG_SLLV a3,a3,t1
141 1.2.40.1 matt REG_SLLV v1,v1,t0 # clear msb source bits
142 1.2.40.1 matt REG_SRLV v1,v1,t0
143 1.2.40.1 matt #endif
144 1.2.40.1 matt #if _BYTE_ORDER == _LITTLE_ENDIAN
145 1.2.40.1 matt REG_SLLV a3,a3,t1 # clear msb dest bits
146 1.2.40.1 matt REG_SRLV a3,a3,t1
147 1.2.40.1 matt REG_SRLV v1,v1,t0 # clear lsb source bits
148 1.2.40.1 matt REG_SLLV v1,v1,t0
149 1.2.40.1 matt #endif
150 1.2.40.1 matt or a3,a3,v1 # merge
151 1.2.40.1 matt REG_S a3,-SZREG(DSTREG) # and save
152 1.2.40.1 matt 99:
153 1.2.40.1 matt #endif
154 1.1 christos
155 1.1 christos /*
156 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
157 1.1 christos */
158 1.2.40.1 matt 98:
159 1.2.40.1 matt li AT,-(SZREG*8)
160 1.2.40.1 matt and t0,SIZEREG,AT # count truncated to multiples
161 1.2.40.1 matt PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
162 1.2.40.1 matt sltu AT,SRCREG,a3 # any work to do?
163 1.2.40.1 matt beq AT,zero,2f
164 1.2.40.1 matt PTR_SUBU SIZEREG,t0
165 1.1 christos
166 1.1 christos /*
167 1.1 christos * loop body
168 1.1 christos */
169 1.1 christos 1: # cp
170 1.2.40.1 matt REG_L t3,(0*SZREG)(SRCREG)
171 1.2.40.1 matt REG_L v1,(1*SZREG)(SRCREG)
172 1.2.40.1 matt REG_L t0,(2*SZREG)(SRCREG)
173 1.2.40.1 matt REG_L t1,(3*SZREG)(SRCREG)
174 1.2.40.1 matt PTR_ADDU SRCREG,SZREG*8
175 1.2.40.1 matt REG_S t3,(0*SZREG)(DSTREG)
176 1.2.40.1 matt REG_S v1,(1*SZREG)(DSTREG)
177 1.2.40.1 matt REG_S t0,(2*SZREG)(DSTREG)
178 1.2.40.1 matt REG_S t1,(3*SZREG)(DSTREG)
179 1.2.40.1 matt REG_L t3,(-4*SZREG)(SRCREG)
180 1.2.40.1 matt REG_L v1,(-3*SZREG)(SRCREG)
181 1.2.40.1 matt REG_L t0,(-2*SZREG)(SRCREG)
182 1.2.40.1 matt REG_L t1,(-1*SZREG)(SRCREG)
183 1.2.40.1 matt PTR_ADDU DSTREG,SZREG*8
184 1.2.40.1 matt REG_S t1,(-1*SZREG)(DSTREG)
185 1.2.40.1 matt REG_S t0,(-2*SZREG)(DSTREG)
186 1.2.40.1 matt REG_S v1,(-3*SZREG)(DSTREG)
187 1.2.40.1 matt bne SRCREG,a3,1b
188 1.2.40.1 matt REG_S t3,(-4*SZREG)(DSTREG)
189 1.1 christos
190 1.1 christos /*
191 1.1 christos * Copy a word at a time, no loop unrolling.
192 1.1 christos */
193 1.1 christos 2: # wordcopy
194 1.2.40.1 matt andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
195 1.2.40.1 matt xor t2,SIZEREG,t2 # t2 = words to copy * SZREG
196 1.2.40.1 matt beq t2,zero,3f
197 1.2.40.1 matt PTR_ADDU t0,SRCREG,t2 # stop at t0
198 1.2.40.1 matt PTR_SUBU SIZEREG,SIZEREG,t2
199 1.1 christos 1:
200 1.2.40.1 matt REG_L t3,0(SRCREG)
201 1.2.40.1 matt PTR_ADDU SRCREG,SZREG
202 1.2.40.1 matt REG_S t3,0(DSTREG)
203 1.2.40.1 matt bne SRCREG,t0,1b
204 1.2.40.1 matt PTR_ADDU DSTREG,SZREG
205 1.1 christos
206 1.1 christos 3: # bytecopy
207 1.2.40.1 matt beq SIZEREG,zero,4f # nothing left to do?
208 1.1 christos nop
209 1.1 christos 1:
210 1.2.40.1 matt lb t3,0(SRCREG)
211 1.2.40.1 matt PTR_ADDU SRCREG,1
212 1.2.40.1 matt sb t3,0(DSTREG)
213 1.2.40.1 matt PTR_SUBU SIZEREG,1
214 1.2.40.1 matt bgtz SIZEREG,1b
215 1.2.40.1 matt PTR_ADDU DSTREG,1
216 1.1 christos
217 1.1 christos 4: # copydone
218 1.1 christos j ra
219 1.1 christos nop
220 1.1 christos
221 1.1 christos /*
222 1.1 christos * Copy from unaligned source to aligned dest.
223 1.1 christos */
224 1.1 christos 5: # destaligned
225 1.2.40.1 matt andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
226 1.2.40.1 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
227 1.2.40.1 matt beq a3,zero,3b
228 1.1 christos nop
229 1.2.40.1 matt move SIZEREG,t0 # this many to do after we are done
230 1.2.40.1 matt PTR_ADDU a3,SRCREG,a3 # stop point
231 1.1 christos
232 1.1 christos 1:
233 1.2.40.1 matt LWHI t3,0(SRCREG)
234 1.2.40.1 matt LWLO t3,3(SRCREG)
235 1.2.40.1 matt PTR_ADDI SRCREG,SZREG
236 1.2.40.1 matt sw t3,0(DSTREG)
237 1.2.40.1 matt bne SRCREG,a3,1b
238 1.2.40.1 matt PTR_ADDI DSTREG,SZREG
239 1.1 christos
240 1.2.40.1 matt j 3b
241 1.1 christos nop
242 1.1 christos
243 1.1 christos 6: # backcopy -- based on above
244 1.2.40.1 matt PTR_ADDU SRCREG,SIZEREG
245 1.2.40.1 matt PTR_ADDU DSTREG,SIZEREG
246 1.2.40.1 matt andi t1,DSTREG,3 # get last 3 bits of dest
247 1.2.40.1 matt bne t1,zero,3f
248 1.2.40.1 matt andi t0,SRCREG,3 # get last 3 bits of src
249 1.2.40.1 matt bne t0,zero,5f
250 1.1 christos
251 1.1 christos /*
252 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time.
253 1.1 christos */
254 1.2.40.1 matt li AT,(-8*SZREG)
255 1.2.40.1 matt and t0,SIZEREG,AT # count truncated to multiple of 32
256 1.2.40.1 matt beq t0,zero,2f # any work to do?
257 1.2.40.1 matt PTR_SUBU SIZEREG,t0
258 1.2.40.1 matt PTR_SUBU a3,SRCREG,t0
259 1.1 christos
260 1.1 christos /*
261 1.1 christos * loop body
262 1.1 christos */
263 1.1 christos 1: # cp
264 1.2.40.1 matt REG_L t3,(-4*SZREG)(SRCREG)
265 1.2.40.1 matt REG_L v1,(-3*SZREG)(SRCREG)
266 1.2.40.1 matt REG_L t0,(-2*SZREG)(SRCREG)
267 1.2.40.1 matt REG_L t1,(-1*SZREG)(SRCREG)
268 1.2.40.1 matt PTR_SUBU SRCREG,8*SZREG
269 1.2.40.1 matt REG_S t3,(-4*SZREG)(DSTREG)
270 1.2.40.1 matt REG_S v1,(-3*SZREG)(DSTREG)
271 1.2.40.1 matt REG_S t0,(-2*SZREG)(DSTREG)
272 1.2.40.1 matt REG_S t1,(-1*SZREG)(DSTREG)
273 1.2.40.1 matt REG_L t3,(0*SZREG)(SRCREG)
274 1.2.40.1 matt REG_L v1,(1*SZREG)(SRCREG)
275 1.2.40.1 matt REG_L t0,(2*SZREG)(SRCREG)
276 1.2.40.1 matt REG_L t1,(3*SZREG)(SRCREG)
277 1.2.40.1 matt PTR_SUBU DSTREG,8*SZREG
278 1.2.40.1 matt REG_S t1,(3*SZREG)(DSTREG)
279 1.2.40.1 matt REG_S t0,(2*SZREG)(DSTREG)
280 1.2.40.1 matt REG_S v1,(1*SZREG)(DSTREG)
281 1.2.40.1 matt bne SRCREG,a3,1b
282 1.2.40.1 matt REG_S t3,(0*SZREG)(DSTREG)
283 1.1 christos
284 1.1 christos /*
285 1.1 christos * Copy a word at a time, no loop unrolling.
286 1.1 christos */
287 1.1 christos 2: # wordcopy
288 1.2.40.1 matt andi t2,SIZEREG,SZREG-1 # get byte count / 4
289 1.2.40.1 matt xor t2,SIZEREG,t2 # t2 = number of words to copy
290 1.2.40.1 matt beq t2,zero,3f
291 1.2.40.1 matt PTR_SUBU t0,SRCREG,t2 # stop at t0
292 1.2.40.1 matt PTR_SUBU SIZEREG,SIZEREG,t2
293 1.1 christos 1:
294 1.2.40.1 matt REG_L t3,-SZREG(SRCREG)
295 1.2.40.1 matt PTR_SUBU SRCREG,SZREG
296 1.2.40.1 matt REG_S t3,-SZREG(DSTREG)
297 1.2.40.1 matt bne SRCREG,t0,1b
298 1.2.40.1 matt PTR_SUBU DSTREG,SZREG
299 1.1 christos
300 1.1 christos 3: # bytecopy
301 1.2.40.1 matt beq SIZEREG,zero,4f # nothing left to do?
302 1.1 christos nop
303 1.1 christos 1:
304 1.2.40.1 matt lb t3,-1(SRCREG)
305 1.2.40.1 matt PTR_SUBU SRCREG,1
306 1.2.40.1 matt sb t3,-1(DSTREG)
307 1.2.40.1 matt PTR_SUBU SIZEREG,1
308 1.2.40.1 matt bgtz SIZEREG,1b
309 1.2.40.1 matt PTR_SUBU DSTREG,1
310 1.1 christos
311 1.1 christos 4: # copydone
312 1.1 christos j ra
313 1.1 christos nop
314 1.1 christos
315 1.1 christos /*
316 1.1 christos * Copy from unaligned source to aligned dest.
317 1.1 christos */
318 1.1 christos 5: # destaligned
319 1.2.40.1 matt andi t0,SIZEREG,3 # t0 = bytecount mod 4
320 1.2.40.1 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
321 1.2.40.1 matt beq a3,zero,3b
322 1.1 christos nop
323 1.2.40.1 matt move SIZEREG,t0 # this many to do after we are done
324 1.2.40.1 matt PTR_SUBU a3,SRCREG,a3 # stop point
325 1.1 christos
326 1.1 christos 1:
327 1.2.40.1 matt LWHI t3,-4(SRCREG)
328 1.2.40.1 matt LWLO t3,-1(SRCREG)
329 1.2.40.1 matt PTR_SUBU SRCREG,4
330 1.2.40.1 matt sw t3,-4(DSTREG)
331 1.2.40.1 matt bne SRCREG,a3,1b
332 1.2.40.1 matt PTR_SUBU DSTREG,4
333 1.1 christos
334 1.2.40.1 matt j 3b
335 1.1 christos nop
336 1.1 christos
337 1.1 christos .set reorder
338 1.1 christos .set at
339 1.1 christos END(FUNCTION)
340