bcopy.S revision 1.14.14.1 1 /* $NetBSD: bcopy.S,v 1.14.14.1 2017/12/03 11:38:46 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copy routines for NetBSD/hppa.
34 */
35
36 #undef _LOCORE
37 #define _LOCORE /* XXX fredette - unfortunate */
38
39 #if defined(SPCOPY) && !defined(_STANDALONE)
40
41 #include "opt_diagnostic.h"
42 #include "opt_multiprocessor.h"
43
44 #include <machine/cpu.h>
45
46 #endif
47
48 #include <machine/asm.h>
49 #include <machine/frame.h>
50 #include <machine/reg.h>
51
52 #if defined(LIBC_SCCS) && !defined(lint)
53 RCSID("$NetBSD: bcopy.S,v 1.14.14.1 2017/12/03 11:38:46 jdolecek Exp $")
54 #endif /* LIBC_SCCS and not lint */
55
56 /*
57 * The stbys instruction is a little asymmetric. When (%r2 & 3)
58 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You
59 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2)
60 * worked like stws,mb. But it doesn't.
61 *
62 * This macro works around this problem. It requires that %t2
63 * hold the number of bytes that will be written by this store
64 * (meaning that it ranges from one to four).
65 *
66 * Watch the delay-slot trickery here. The comib is used to set
67 * up which instruction, either the stws or the stbys, is run
68 * in the delay slot of the b instruction.
69 */
70 #define _STBYS_E_M(r, dst_spc, dst_off) \
71 comib,<> 4, %t2, 4 ! \
72 b 4 ! \
73 stws,mb r, -4(dst_spc, dst_off) ! \
74 stbys,e,m r, 0(dst_spc, dst_off)
75
76 /*
77 * This macro does a bulk copy with no shifting. cmplt and m are
78 * the completer and displacement multiplier, respectively, for
79 * the load and store instructions.
80 */
81 #define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
82 ! \
83 /* ! \
84 * Loop storing 16 bytes at a time. Since count ! \
85 * may be > INT_MAX, we have to be careful and ! \
86 * avoid comparisons that treat it as a signed ! \
87 * quantity, until after this loop, when count ! \
88 * is guaranteed to be less than 16. ! \
89 */ ! \
90 comib,>>=,n 15, count, _LABEL(_skip16) ! \
91 .label _LABEL(_loop16) ! \
92 addi -16, count, count ! \
93 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
94 ldws,cmplt m*4(src_spc, src_off), %t2 ! \
95 ldws,cmplt m*4(src_spc, src_off), %t3 ! \
96 ldws,cmplt m*4(src_spc, src_off), %t4 ! \
97 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
98 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \
99 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \
100 comib,<< 15, count, _LABEL(_loop16) ! \
101 stws,cmplt %t4, m*4(dst_spc, dst_off) ! \
102 .label _LABEL(_skip16) ! \
103 ! \
104 /* Loop storing 4 bytes at a time. */ ! \
105 addib,<,n -4, count, _LABEL(_skip4) ! \
106 .label _LABEL(_loop4) ! \
107 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
108 addib,>= -4, count, _LABEL(_loop4) ! \
109 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
110 .label _LABEL(_skip4) ! \
111 /* Restore the correct count. */ ! \
112 addi 4, count, count ! \
113 ! \
114 .label _LABEL(_do1) ! \
115 ! \
116 /* Loop storing 1 byte at a time. */ ! \
117 addib,<,n -1, count, _LABEL(_skip1) ! \
118 .label _LABEL(_loop1) ! \
119 ldbs,cmplt m*1(src_spc, src_off), %t1 ! \
120 addib,>= -1, count, _LABEL(_loop1) ! \
121 stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \
122 .label _LABEL(_skip1) ! \
123 /* Restore the correct count. */ ! \
124 b _LABEL(_done) ! \
125 addi 1, count, count
126
127 /*
128 * This macro is definitely strange. It exists purely to
129 * allow the _COPYS macro to be reused, but because it
130 * requires this long attempt to explain it, I'm starting
131 * to doubt the value of that.
132 *
133 * Part of the expansion of the _COPYS macro below are loops
134 * that copy four words or one word at a time, performing shifts
135 * to get data to line up correctly in the destination buffer.
136 *
137 * The _COPYS macro is used when copying backwards, as well
138 * as forwards. The 4-word loop always loads into %t1, %t2, %t3,
139 * and %t4 in that order. This means that when copying forward,
140 * %t1 will have the word from the lowest address, and %t4 will
141 * have the word from the highest address. When copying
142 * backwards, the opposite is true.
143 *
144 * The shift instructions need pairs of registers with adjacent
145 * words, with the register containing the word from the lowest
146 * address *always* coming first. It is this assymetry that
147 * gives rise to this macro - depending on which direction
148 * we're copying in, these ordered pairs are different.
149 *
150 * Fortunately, we can compute those register numbers at compile
151 * time, and assemble them manually into a shift instruction.
152 * That's what this macro does.
153 *
154 * This macro takes two arguments. n ranges from 0 to 3 and
155 * is the "shift number", i.e., n = 0 means we're doing the
156 * shift for what will be the first store.
157 *
158 * m is the displacement multiplier from the _COPYS macro call.
159 * This is 1 for a forward copy and -1 for a backwards copy.
160 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and
161 * 1 for a forward copy, and the ((m - 1) / 2) term yields
162 * 0 for a forward copy, and -1 for a backwards copy.
163 * These terms are used to discriminate the register computations
164 * below.
165 *
166 * When copying forward, then, the first register used with
167 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4,
168 * which matches _COPYS' requirement that the word last loaded
169 * be in %t4. The first register used for the second vshd
170 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1.
171 * And so on to %t2 and %t3.
172 *
173 * When copying forward, the second register used with the first
174 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will
175 * continue to be %t2, then %t3, and finally %t4.
176 *
177 * When copying backwards, the values for the first and second
178 * register for each vshd are reversed from the forwards case.
179 * (Symmetry reclaimed!) Proving this is "left as an exercise
180 * for the reader" (remember the different discriminating values!)
181 */
182 #define _VSHD(n, m, t) \
183 .word (0xd0000000 | \
184 ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \
185 ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \
186 (t))
187
188 /*
189 * This macro does a bulk copy with shifting. cmplt and m are
190 * the completer and displacement multiplier, respectively, for
191 * the load and store instructions. It is assumed that the
192 * word last loaded is already in %t4.
193 */
194 #define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
195 ! \
196 /* ! \
197 * Loop storing 16 bytes at a time. Since count ! \
198 * may be > INT_MAX, we have to be careful and ! \
199 * avoid comparisons that treat it as a signed ! \
200 * quantity, until after this loop, when count ! \
201 * is guaranteed to be less than 16. ! \
202 */ ! \
203 comib,>>=,n 15, count, _LABEL(S_skip16) ! \
204 .label _LABEL(S_loop16) ! \
205 addi -16, count, count ! \
206 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
207 ldws,cmplt m*4(src_spc, src_off), %t2 ! \
208 ldws,cmplt m*4(src_spc, src_off), %t3 ! \
209 _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \
210 ldws,cmplt m*4(src_spc, src_off), %t4 ! \
211 _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \
212 _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \
213 _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \
214 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \
215 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
216 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \
217 comib,<< 15, count, _LABEL(S_loop16) ! \
218 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \
219 .label _LABEL(S_skip16) ! \
220 ! \
221 /* Loop storing 4 bytes at a time. */ ! \
222 addib,<,n -4, count, _LABEL(S_skip4) ! \
223 .label _LABEL(S_loop4) ! \
224 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
225 _VSHD(0, m, 1) /* into %r1 (1) */ ! \
226 copy %t1, %t4 ! \
227 addib,>= -4, count, _LABEL(S_loop4) ! \
228 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \
229 .label _LABEL(S_skip4) ! \
230 ! \
231 /* ! \
232 * We now need to "back up" src_off by the ! \
233 * number of bytes remaining in the FIFO ! \
234 * (i.e., the number of bytes remaining in %t4), ! \
235 * because (the correct) count still includes ! \
236 * these bytes, and we intent to keep it that ! \
237 * way, and finish with the single-byte copier. ! \
238 * ! \
239 * The number of bytes remaining in the FIFO is ! \
240 * related to the shift count, so recover it, ! \
241 * restoring the correct count at the same time. ! \
242 */ ! \
243 mfctl %cr11, %t1 ! \
244 addi 4, count, count ! \
245 shd %r0, %t1, 3, %t1 ! \
246 ! \
247 /* ! \
248 * If we're copying forward, the shift count ! \
249 * is the number of bytes remaining in the ! \
250 * FIFO, and we want to subtract it from src_off. ! \
251 * If we're copying backwards, (4 - shift count) ! \
252 * is the number of bytes remaining in the FIFO, ! \
253 * and we want to add it to src_off. ! \
254 * ! \
255 * We observe that x + (4 - y) = x - (y - 4), ! \
256 * and introduce this instruction to add -4 when ! \
257 * m is -1, although this does mean one extra ! \
258 * instruction in the forward case. ! \
259 */ ! \
260 addi 4*((m - 1) / 2), %t1, %t1 ! \
261 ! \
262 /* Now branch to the byte-at-a-time loop. */ ! \
263 b _LABEL(_do1) ! \
264 sub src_off, %t1, src_off
265
266 /*
267 * This macro copies a region in the forward direction.
268 */
269 #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \
270 ! \
271 /* ! \
272 * Since in the shifting-left case we will ! \
273 * load 8 bytes before checking count, to ! \
274 * keep things simple, branch to the byte ! \
275 * copier unless we're copying at least 8. ! \
276 */ ! \
277 comib,>>,n 8, count, _LABEL(_do1) ! \
278 ! \
279 /* ! \
280 * Once we 4-byte align the source offset, ! \
281 * figure out how many bytes from the region ! \
282 * will be in the first 4-byte word we read. ! \
283 * Ditto for writing the destination offset. ! \
284 */ ! \
285 extru src_off, 31, 2, %t1 ! \
286 extru dst_off, 31, 2, %t2 ! \
287 subi 4, %t1, %t1 ! \
288 subi 4, %t2, %t2 ! \
289 ! \
290 /* ! \
291 * Calculate the byte shift required. A ! \
292 * positive value means a source 4-byte word ! \
293 * has to be shifted to the right to line up ! \
294 * as a destination 4-byte word. ! \
295 */ ! \
296 sub %t1, %t2, %t1 ! \
297 ! \
298 /* 4-byte align src_off. */ ! \
299 depi 0, 31, 2, src_off ! \
300 ! \
301 /* ! \
302 * It's somewhat important to note that this ! \
303 * code thinks of count as "the number of bytes ! \
304 * that haven't been stored yet", as opposed to ! \
305 * "the number of bytes that haven't been copied ! \
306 * yet". The distinction is subtle, but becomes ! \
307 * apparent at the end of the shifting code, where ! \
308 * we "back up" src_off to correspond to count, ! \
309 * as opposed to flushing the FIFO. ! \
310 * ! \
311 * We calculated above how many bytes our first ! \
312 * store will store, so update count now. ! \
313 * ! \
314 * If the shift is zero, strictly as an optimization ! \
315 * we use a copy loop that does no shifting. ! \
316 */ ! \
317 comb,<> %r0, %t1, _LABEL(_shifting) ! \
318 sub count, %t2, count ! \
319 ! \
320 /* Load and store the first word. */ ! \
321 ldws,ma 4(src_spc, src_off), %t4 ! \
322 stbys,b,m %t4, 4(dst_spc, dst_off) ! \
323 ! \
324 /* Do the rest of the copy. */ ! \
325 _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \
326 ! \
327 .label _LABEL(_shifting) ! \
328 ! \
329 /* ! \
330 * If shift < 0, we need to shift words to the ! \
331 * left. Since we can't do this directly, we ! \
332 * adjust the shift so it's a shift to the right ! \
333 * and load the first word into the high word of ! \
334 * the FIFO. Otherwise, we load a zero into the ! \
335 * high word of the FIFO. ! \
336 */ ! \
337 comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \
338 copy %r0, %t3 ! \
339 addi 4, %t1, %t1 ! \
340 ldws,ma 4(src_spc, src_off), %t3 ! \
341 .label _LABEL(_shiftingrt) ! \
342 ! \
343 /* ! \
344 * Turn the shift byte count into a bit count, ! \
345 * load the next word, set the Shift Amount ! \
346 * Register, and form and store the first word. ! \
347 */ ! \
348 sh3add %t1, %r0, %t1 ! \
349 ldws,ma 4(src_spc, src_off), %t4 ! \
350 mtctl %t1, %cr11 ! \
351 vshd %t3, %t4, %r1 ! \
352 stbys,b,m %r1, 4(dst_spc, dst_off) ! \
353 ! \
354 /* Do the rest of the copy. */ ! \
355 _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1)
356
357 /* This macro copies a region in the reverse direction. */
358 #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \
359 ! \
360 /* Immediately add count to both offsets. */ ! \
361 add src_off, count, src_off ! \
362 add dst_off, count, dst_off ! \
363 ! \
364 /* ! \
365 * Since in the shifting-right case we ! \
366 * will load 8 bytes before checking ! \
367 * count, to keep things simple, branch ! \
368 * to the byte copier unless we're ! \
369 * copying at least 8 bytes. ! \
370 */ ! \
371 comib,>>,n 8, count, _LABEL(_do1) ! \
372 ! \
373 /* ! \
374 * Once we 4-byte align the source offset, ! \
375 * figure out how many bytes from the region ! \
376 * will be in the first 4-byte word we read. ! \
377 * Ditto for writing the destination offset. ! \
378 */ ! \
379 extru,<> src_off, 31, 2, %t1 ! \
380 ldi 4, %t1 ! \
381 extru,<> dst_off, 31, 2, %t2 ! \
382 ldi 4, %t2 ! \
383 ! \
384 /* ! \
385 * Calculate the byte shift required. A ! \
386 * positive value means a source 4-byte ! \
387 * word has to be shifted to the right to ! \
388 * line up as a destination 4-byte word. ! \
389 */ ! \
390 sub %t2, %t1, %t1 ! \
391 ! \
392 /* ! \
393 * 4-byte align src_off, leaving it pointing ! \
394 * to the 4-byte word *after* the next word ! \
395 * we intend to load. ! \
396 * ! \
397 * It's somewhat important to note that this ! \
398 * code thinks of count as "the number of bytes ! \
399 * that haven't been stored yet", as opposed to ! \
400 * "the number of bytes that haven't been copied ! \
401 * yet". The distinction is subtle, but becomes ! \
402 * apparent at the end of the shifting code, where ! \
403 * we "back up" src_off to correspond to count, ! \
404 * as opposed to flushing the FIFO. ! \
405 * ! \
406 * We calculated above how many bytes our first ! \
407 * store will store, so update count now. ! \
408 * ! \
409 * If the shift is zero, we use a copy loop that ! \
410 * does no shifting. NB: unlike the forward case, ! \
411 * this is NOT strictly an optimization. If the ! \
412 * SAR is zero the vshds do NOT do the right thing. ! \
413 * This is another assymetry more or less the "fault" ! \
414 * of vshd. ! \
415 */ ! \
416 addi 3, src_off, src_off ! \
417 sub count, %t2, count ! \
418 comb,<> %r0, %t1, _LABEL(_shifting) ! \
419 depi 0, 31, 2, src_off ! \
420 ! \
421 /* Load and store the first word. */ ! \
422 ldws,mb -4(src_spc, src_off), %t4 ! \
423 _STBYS_E_M(%t4, dst_spc, dst_off) ! \
424 ! \
425 /* Do the rest of the copy. */ ! \
426 _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \
427 ! \
428 .label _LABEL(_shifting) ! \
429 ! \
430 /* ! \
431 * If shift < 0, we need to shift words to the ! \
432 * left. Since we can't do this directly, we ! \
433 * adjust the shift so it's a shift to the right ! \
434 * and load a zero in to the low word of the FIFO. ! \
435 * Otherwise, we load the first word into the ! \
436 * low word of the FIFO. ! \
437 * ! \
438 * Note the nullification trickery here. We ! \
439 * assume that we're shifting to the left, and ! \
440 * load zero into the low word of the FIFO. Then ! \
441 * we nullify the addi if we're shifting to the ! \
442 * right. If the addi is not nullified, we are ! \
443 * shifting to the left, so we nullify the load. ! \
444 * we branch if we're shifting to the ! \
445 */ ! \
446 copy %r0, %t3 ! \
447 comb,<=,n %r0, %t1, 0 ! \
448 addi,tr 4, %t1, %t1 ! \
449 ldws,mb -4(src_spc, src_off), %t3 ! \
450 ! \
451 /* ! \
452 * Turn the shift byte count into a bit count, ! \
453 * load the next word, set the Shift Amount ! \
454 * Register, and form and store the first word. ! \
455 */ ! \
456 sh3add %t1, %r0, %t1 ! \
457 ldws,mb -4(src_spc, src_off), %t4 ! \
458 mtctl %t1, %cr11 ! \
459 vshd %t4, %t3, %r1 ! \
460 _STBYS_E_M(%r1, dst_spc, dst_off) ! \
461 ! \
462 /* Do the rest of the copy. */ ! \
463 _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1)
464
465 /*
466 * For paranoia, when things aren't going well, enable this
467 * code to assemble byte-at-a-time-only copying.
468 */
469 #if 1
470 #undef _COPY_FORWARD
471 #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \
472 comb,=,n %r0, count, _LABEL(_done) ! \
473 ldbs,ma 1(src_spc, src_off), %r1 ! \
474 addib,<> -1, count, -12 ! \
475 stbs,ma %r1, 1(dst_spc, dst_off) ! \
476 b,n _LABEL(_done)
477 #undef _COPY_REVERSE
478 #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \
479 comb,= %r0, count, _LABEL(_done) ! \
480 add src_off, count, src_off ! \
481 add dst_off, count, dst_off ! \
482 ldbs,mb -1(src_spc, src_off), %r1 ! \
483 addib,<> -1, count, -12 ! \
484 stbs,mb %r1, -1(dst_spc, dst_off) ! \
485 b,n _LABEL(_done)
486 #endif
487
488 /*
489 * If none of the following are defined, define BCOPY.
490 */
491 #if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE))
492 #define BCOPY
493 #endif
494
495 #if defined(SPCOPY) && !defined(_STANDALONE)
496
497 #include <sys/errno.h>
498 #include "assym.h"
499
500 /*
501 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
502 * size_t len)
503 *
504 * We assume that the regions do not overlap.
505 */
506 LEAF_ENTRY(spcopy)
507
508 /*
509 * Setup the fault handler, which will fill in %ret0 if triggered.
510 */
511 GET_CURLWP(%r31)
512 #ifdef DIAGNOSTIC
513 comb,<>,n %r0, %r31, Lspcopy_curlwp_ok
514 ldil L%panic, %r1
515 ldil L%Lspcopy_curlwp_bad, %arg0
516 ldo R%panic(%r1), %r1
517 ldo R%Lspcopy_curlwp_bad(%arg0), %arg0
518 .call
519 bv,n %r0(%r1)
520 nop
521 Lspcopy_curlwp_bad:
522 .asciz "spcopy: curlwp == NULL\n"
523 .align 8
524 Lspcopy_curlwp_ok:
525 #endif /* DIAGNOSTIC */
526 ldil L%spcopy_fault, %r1
527 ldw L_PCB(%r31), %r31
528 ldo R%spcopy_fault(%r1), %r1
529 stw %r1, PCB_ONFAULT(%r31)
530
531 /* Setup the space registers. */
532 mfsp %sr2, %ret1
533 mtsp %arg0, %sr1
534 mtsp %arg2, %sr2
535
536 /* Get the len argument and do the copy. */
537 ldw HPPA_FRAME_ARG(4)(%sp), %arg0
538 #define _LABEL(l) __CONCAT(spcopy,l)
539 _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0)
540 _LABEL(_done):
541
542 /* Return. */
543 copy %r0, %ret0
544 ALTENTRY(spcopy_fault)
545 stw %r0, PCB_ONFAULT(%r31)
546 bv %r0(%rp)
547 mtsp %ret1, %sr2
548 EXIT(spcopy)
549 #endif /* SPCOPY && !_STANDALONE */
550
551 #ifdef MEMCPY
552 /*
553 * void *memcpy(void *restrict dst, const void *restrict src, size_t len);
554 *
555 * memcpy is specifically restricted to working on
556 * non-overlapping regions, so we can just copy forward.
557 */
558 LEAF_ENTRY(memcpy)
559 copy %arg0, %ret0
560 #define _LABEL(l) __CONCAT(memcpy,l)
561 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
562 _LABEL(_done):
563 bv,n %r0(%rp)
564 nop
565 EXIT(memcpy)
566 #endif /* MEMCPY */
567
568 #ifdef BCOPY
569 /*
570 * void bcopy(const void *src, void *dst, size_t len);
571 */
572 LEAF_ENTRY(bcopy)
573 copy %arg0, %r1
574 copy %arg1, %arg0
575 copy %r1, %arg1
576 /* FALLTHROUGH */
577 #define _LABEL_F(l) __CONCAT(bcopy_F,l)
578 #define _LABEL_R(l) __CONCAT(bcopy_R,l)
579 #endif
580
581 #ifdef MEMMOVE
582 /*
583 * void *memmove(void *dst, const void *src, size_t len);
584 */
585 LEAF_ENTRY(memmove)
586 #define _LABEL_F(l) __CONCAT(memmove_F,l)
587 #define _LABEL_R(l) __CONCAT(memmove_R,l)
588 copy %arg0, %ret0
589 #endif /* MEMMOVE */
590
591 #if defined(BCOPY) || defined(MEMMOVE)
592
593 /*
594 * If src >= dst or src + len <= dst, we copy
595 * forward, else we copy in reverse.
596 */
597 add %arg1, %arg2, %r1
598 comb,>>=,n %arg1, %arg0, 0
599 comb,>>,n %r1, %arg0, _LABEL_R(_go)
600
601 #define _LABEL _LABEL_F
602 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
603 #undef _LABEL
604
605 _LABEL_R(_go):
606 #define _LABEL _LABEL_R
607 _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2)
608 #undef _LABEL
609
610 _LABEL_F(_done):
611 _LABEL_R(_done):
612 bv,n %r0(%rp)
613 nop
614 #ifdef BCOPY
615 EXIT(bcopy)
616 #else
617 EXIT(memmove)
618 #endif
619 #endif /* BCOPY || MEMMOVE */
620