bcopy.S revision 1.4.44.2 1 /* $NetBSD: bcopy.S,v 1.4.44.2 2020/04/21 19:37:44 martin Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <mips/asm.h>
39 #ifndef _LOCORE
40 #define _LOCORE /* XXX not really, just assembly-code source */
41 #endif
42 #include <machine/endian.h>
43
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 #if 0
47 RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
48 #else
49 RCSID("$NetBSD: bcopy.S,v 1.4.44.2 2020/04/21 19:37:44 martin Exp $")
50 #endif
51 #endif /* LIBC_SCCS and not lint */
52
53 /*
54 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
55 *
56 * a0 src address
57 * a1 dst address
58 * a2 length
59 */
60
61 #if defined(MEMCOPY) || defined(MEMMOVE)
62 #ifdef MEMCOPY
63 #define FUNCTION memcpy
64 #else
65 #define FUNCTION memmove
66 #endif
67 #define SRCREG a1
68 #define DSTREG a0
69 #else
70 #define FUNCTION bcopy
71 #define SRCREG a0
72 #define DSTREG a1
73 #endif
74
75 #define SIZEREG a2
76
77 LEAF(FUNCTION)
78 .set noat
79 .set noreorder
80
81 #if defined(MEMCOPY) || defined(MEMMOVE)
82 /* set up return value, while we still can */
83 move v0,DSTREG
84 #endif
85 /*
86 * Make sure we can copy forwards.
87 */
88 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
89 bne t0,zero,6f # copy backwards
90
91 /*
92 * There are four alignment cases (with frequency)
93 * (Based on measurements taken with a DECstation 5000/200
94 * inside a Mach kernel.)
95 *
96 * aligned -> aligned (mostly)
97 * unaligned -> aligned (sometimes)
98 * aligned,unaligned -> unaligned (almost never)
99 *
100 * Note that we could add another case that checks if
101 * the destination and source are unaligned but the
102 * copy is alignable. eg if src and dest are both
103 * on a halfword boundary.
104 */
105 andi t1,DSTREG,(SZREG-1) # get last bits of dest
106 bne t1,zero,3f # dest unaligned
107 andi t0,SRCREG,(SZREG-1) # get last bits of src
108 bne t0,zero,5f
109
110 /*
111 * Forward aligned->aligned copy, 8 words at a time.
112 */
113 98:
114 li AT,-(SZREG*8)
115 and t0,SIZEREG,AT # count truncated to multiples
116 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
117 sltu AT,SRCREG,a3 # any work to do?
118 beq AT,zero,2f
119 PTR_SUBU SIZEREG,t0
120
121 /*
122 * loop body
123 */
124 1: # cp
125 REG_L t3,(0*SZREG)(SRCREG)
126 REG_L v1,(1*SZREG)(SRCREG)
127 REG_L t0,(2*SZREG)(SRCREG)
128 REG_L t1,(3*SZREG)(SRCREG)
129 PTR_ADDU SRCREG,SZREG*8
130 REG_S t3,(0*SZREG)(DSTREG)
131 REG_S v1,(1*SZREG)(DSTREG)
132 REG_S t0,(2*SZREG)(DSTREG)
133 REG_S t1,(3*SZREG)(DSTREG)
134 REG_L t1,(-1*SZREG)(SRCREG)
135 REG_L t0,(-2*SZREG)(SRCREG)
136 REG_L v1,(-3*SZREG)(SRCREG)
137 REG_L t3,(-4*SZREG)(SRCREG)
138 PTR_ADDU DSTREG,SZREG*8
139 REG_S t1,(-1*SZREG)(DSTREG)
140 REG_S t0,(-2*SZREG)(DSTREG)
141 REG_S v1,(-3*SZREG)(DSTREG)
142 bne SRCREG,a3,1b
143 REG_S t3,(-4*SZREG)(DSTREG)
144
145 /*
146 * Copy a word at a time, no loop unrolling.
147 */
148 2: # wordcopy
149 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
150 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
151 beq t2,zero,3f
152 PTR_ADDU t0,SRCREG,t2 # stop at t0
153 PTR_SUBU SIZEREG,SIZEREG,t2
154 1:
155 REG_L t3,0(SRCREG)
156 PTR_ADDU SRCREG,SZREG
157 REG_S t3,0(DSTREG)
158 bne SRCREG,t0,1b
159 PTR_ADDU DSTREG,SZREG
160
161 3: # bytecopy
162 beq SIZEREG,zero,4f # nothing left to do?
163 nop
164 1:
165 lb t3,0(SRCREG)
166 PTR_ADDU SRCREG,1
167 sb t3,0(DSTREG)
168 PTR_SUBU SIZEREG,1
169 bgtz SIZEREG,1b
170 PTR_ADDU DSTREG,1
171
172 4: # copydone
173 .set at #-mfix-loongson2f-btb
174 j ra
175 nop
176 .set noat
177
178 /*
179 * Copy from unaligned source to aligned dest.
180 */
181 5: # destaligned
182 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
183 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
184 beq a3,zero,3b
185 nop
186 move SIZEREG,t0 # this many to do after we are done
187 PTR_ADDU a3,SRCREG,a3 # stop point
188
189 1:
190 REG_LHI t3,0(SRCREG)
191 REG_LLO t3,SZREG-1(SRCREG)
192 PTR_ADDI SRCREG,SZREG
193 REG_S t3,0(DSTREG)
194 bne SRCREG,a3,1b
195 PTR_ADDI DSTREG,SZREG
196
197 b 3b
198 nop
199
200 6: # backcopy -- based on above
201 PTR_ADDU SRCREG,SIZEREG
202 PTR_ADDU DSTREG,SIZEREG
203 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
204 bne t1,zero,3f
205 andi t0,SRCREG,SZREG-1 # get last 3 bits of src
206 bne t0,zero,5f
207
208 /*
209 * Forward aligned->aligned copy, 8*4 bytes at a time.
210 */
211 li AT,(-8*SZREG)
212 and t0,SIZEREG,AT # count truncated to multiple of 32
213 beq t0,zero,2f # any work to do?
214 PTR_SUBU SIZEREG,t0
215 PTR_SUBU a3,SRCREG,t0
216
217 /*
218 * loop body
219 */
220 1: # cp
221 REG_L t3,(-4*SZREG)(SRCREG)
222 REG_L v1,(-3*SZREG)(SRCREG)
223 REG_L t0,(-2*SZREG)(SRCREG)
224 REG_L t1,(-1*SZREG)(SRCREG)
225 PTR_SUBU SRCREG,8*SZREG
226 REG_S t3,(-4*SZREG)(DSTREG)
227 REG_S v1,(-3*SZREG)(DSTREG)
228 REG_S t0,(-2*SZREG)(DSTREG)
229 REG_S t1,(-1*SZREG)(DSTREG)
230 REG_L t1,(3*SZREG)(SRCREG)
231 REG_L t0,(2*SZREG)(SRCREG)
232 REG_L v1,(1*SZREG)(SRCREG)
233 REG_L t3,(0*SZREG)(SRCREG)
234 PTR_SUBU DSTREG,8*SZREG
235 REG_S t1,(3*SZREG)(DSTREG)
236 REG_S t0,(2*SZREG)(DSTREG)
237 REG_S v1,(1*SZREG)(DSTREG)
238 bne SRCREG,a3,1b
239 REG_S t3,(0*SZREG)(DSTREG)
240
241 /*
242 * Copy a word at a time, no loop unrolling.
243 */
244 2: # wordcopy
245 andi t2,SIZEREG,SZREG-1 # get byte count / 4
246 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
247 beq t2,zero,3f
248 PTR_SUBU t0,SRCREG,t2 # stop at t0
249 PTR_SUBU SIZEREG,SIZEREG,t2
250 1:
251 REG_L t3,-SZREG(SRCREG)
252 PTR_SUBU SRCREG,SZREG
253 REG_S t3,-SZREG(DSTREG)
254 bne SRCREG,t0,1b
255 PTR_SUBU DSTREG,SZREG
256
257 3: # bytecopy
258 beq SIZEREG,zero,4f # nothing left to do?
259 nop
260 1:
261 lb t3,-1(SRCREG)
262 PTR_SUBU SRCREG,1
263 sb t3,-1(DSTREG)
264 PTR_SUBU SIZEREG,1
265 bgtz SIZEREG,1b
266 PTR_SUBU DSTREG,1
267
268 4: # copydone
269 .set at #-mfix-loongson2f-btb
270 j ra
271 nop
272 .set noat
273
274 /*
275 * Copy from unaligned source to aligned dest.
276 */
277 5: # destaligned
278 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
279 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
280 beq a3,zero,3b
281 nop
282 move SIZEREG,t0 # this many to do after we are done
283 PTR_SUBU a3,SRCREG,a3 # stop point
284
285 1:
286 REG_LHI t3,-SZREG(SRCREG)
287 REG_LLO t3,-1(SRCREG)
288 PTR_SUBU SRCREG,SZREG
289 REG_S t3,-SZREG(DSTREG)
290 bne SRCREG,a3,1b
291 PTR_SUBU DSTREG,SZREG
292
293 b 3b
294 nop
295
296 .set reorder
297 .set at
298 END(FUNCTION)
299