memcpy.S revision 1.2 1 1.2 christos /* $NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1996-2002 Eduardo Horvath
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * Redistribution and use in source and binary forms, with or without
8 1.1 christos * modification, are permitted provided that the following conditions
9 1.1 christos * are met:
10 1.1 christos * 1. Redistributions of source code must retain the above copyright
11 1.1 christos * notice, this list of conditions and the following disclaimer.
12 1.1 christos *
13 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
17 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 1.1 christos * SUCH DAMAGE.
24 1.1 christos *
25 1.1 christos */
26 1.1 christos #include "strmacros.h"
27 1.2 christos #if defined(LIBC_SCCS) && !defined(lint)
28 1.2 christos RCSID("$NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $")
29 1.2 christos #endif /* LIBC_SCCS and not lint */
30 1.1 christos
31 1.1 christos /*
32 1.2 christos * memcpy
33 1.2 christos * Assumes regions do not overlap;
34 1.1 christos *
35 1.1 christos * Must not use %g7 (see copyin/copyout above).
36 1.1 christos */
37 1.1 christos ENTRY(memcpy) /* dest, src, size */
38 1.1 christos /*
39 1.1 christos * Swap args for bcopy. Gcc generates calls to memcpy for
40 1.1 christos * structure assignments.
41 1.1 christos */
42 1.1 christos mov %o0, %o3
43 1.1 christos mov %o1, %o0
44 1.1 christos mov %o3, %o1
45 1.1 christos #if !defined(_KERNEL) || defined(_RUMPKERNEL)
46 1.1 christos ENTRY(bcopy) /* src, dest, size */
47 1.1 christos #endif
48 1.1 christos #ifdef DEBUG
49 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL)
50 1.1 christos set pmapdebug, %o4
51 1.1 christos ld [%o4], %o4
52 1.1 christos btst 0x80, %o4 ! PDB_COPY
53 1.1 christos bz,pt %icc, 3f
54 1.1 christos nop
55 1.1 christos #endif
56 1.1 christos save %sp, -CC64FSZ, %sp
57 1.1 christos mov %i0, %o1
58 1.1 christos set 2f, %o0
59 1.1 christos mov %i1, %o2
60 1.1 christos call printf
61 1.1 christos mov %i2, %o3
62 1.1 christos ! ta 1; nop
63 1.1 christos restore
64 1.1 christos .data
65 1.1 christos 2: .asciz "memcpy(%p<-%p,%x)\n"
66 1.1 christos _ALIGN
67 1.1 christos .text
68 1.1 christos 3:
69 1.1 christos #endif
70 1.1 christos
71 1.1 christos cmp %o2, BCOPY_SMALL
72 1.1 christos
73 1.1 christos Lmemcpy_start:
74 1.1 christos bge,pt CCCR, 2f ! if >= this many, go be fancy.
75 1.1 christos cmp %o2, 256
76 1.1 christos
77 1.1 christos mov %o1, %o5 ! Save memcpy return value
78 1.1 christos /*
79 1.1 christos * Not much to copy, just do it a byte at a time.
80 1.1 christos */
81 1.1 christos deccc %o2 ! while (--len >= 0)
82 1.1 christos bl 1f
83 1.1 christos .empty
84 1.1 christos 0:
85 1.1 christos inc %o0
86 1.1 christos ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++;
87 1.1 christos stb %o4, [%o1]
88 1.1 christos deccc %o2
89 1.1 christos bge 0b
90 1.1 christos inc %o1
91 1.1 christos 1:
92 1.1 christos retl
93 1.1 christos mov %o5, %o0
94 1.1 christos NOTREACHED
95 1.1 christos
96 1.1 christos /*
97 1.1 christos * Plenty of data to copy, so try to do it optimally.
98 1.1 christos */
99 1.1 christos 2:
100 1.1 christos #ifdef USE_BLOCK_STORE_LOAD
101 1.1 christos ! If it is big enough, use VIS instructions
102 1.1 christos bge Lmemcpy_block
103 1.1 christos nop
104 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */
105 1.1 christos Lmemcpy_fancy:
106 1.1 christos
107 1.1 christos !!
108 1.1 christos !! First align the output to a 8-byte entity
109 1.1 christos !!
110 1.1 christos
111 1.1 christos save %sp, -CC64FSZ, %sp
112 1.1 christos
113 1.1 christos mov %i0, %l0
114 1.1 christos mov %i1, %l1
115 1.1 christos
116 1.1 christos mov %i2, %l2
117 1.1 christos btst 1, %l1
118 1.1 christos
119 1.1 christos bz,pt %icc, 4f
120 1.1 christos btst 2, %l1
121 1.1 christos ldub [%l0], %l4 ! Load 1st byte
122 1.1 christos
123 1.1 christos deccc 1, %l2
124 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX
125 1.1 christos inc 1, %l0
126 1.1 christos
127 1.1 christos stb %l4, [%l1] ! Store 1st byte
128 1.1 christos inc 1, %l1 ! Update address
129 1.1 christos btst 2, %l1
130 1.1 christos 4:
131 1.1 christos bz,pt %icc, 4f
132 1.1 christos
133 1.1 christos btst 1, %l0
134 1.1 christos bz,a 1f
135 1.1 christos lduh [%l0], %l4 ! Load short
136 1.1 christos
137 1.1 christos ldub [%l0], %l4 ! Load bytes
138 1.1 christos
139 1.1 christos ldub [%l0+1], %l3
140 1.1 christos sllx %l4, 8, %l4
141 1.1 christos or %l3, %l4, %l4
142 1.1 christos
143 1.1 christos 1:
144 1.1 christos deccc 2, %l2
145 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX
146 1.1 christos inc 2, %l0
147 1.1 christos sth %l4, [%l1] ! Store 1st short
148 1.1 christos
149 1.1 christos inc 2, %l1
150 1.1 christos 4:
151 1.1 christos btst 4, %l1
152 1.1 christos bz,pt CCCR, 4f
153 1.1 christos
154 1.1 christos btst 3, %l0
155 1.1 christos bz,a,pt CCCR, 1f
156 1.1 christos lduw [%l0], %l4 ! Load word -1
157 1.1 christos
158 1.1 christos btst 1, %l0
159 1.1 christos bz,a,pt %icc, 2f
160 1.1 christos lduh [%l0], %l4
161 1.1 christos
162 1.1 christos ldub [%l0], %l4
163 1.1 christos
164 1.1 christos lduh [%l0+1], %l3
165 1.1 christos sllx %l4, 16, %l4
166 1.1 christos or %l4, %l3, %l4
167 1.1 christos
168 1.1 christos ldub [%l0+3], %l3
169 1.1 christos sllx %l4, 8, %l4
170 1.1 christos ba,pt %icc, 1f
171 1.1 christos or %l4, %l3, %l4
172 1.1 christos
173 1.1 christos 2:
174 1.1 christos lduh [%l0+2], %l3
175 1.1 christos sllx %l4, 16, %l4
176 1.1 christos or %l4, %l3, %l4
177 1.1 christos
178 1.1 christos 1:
179 1.1 christos deccc 4, %l2
180 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX
181 1.1 christos inc 4, %l0
182 1.1 christos
183 1.1 christos st %l4, [%l1] ! Store word
184 1.1 christos inc 4, %l1
185 1.1 christos 4:
186 1.1 christos !!
187 1.1 christos !! We are now 32-bit aligned in the dest.
188 1.1 christos !!
189 1.1 christos Lmemcpy_common:
190 1.1 christos
191 1.1 christos and %l0, 7, %l4 ! Shift amount
192 1.1 christos andn %l0, 7, %l0 ! Source addr
193 1.1 christos
194 1.1 christos brz,pt %l4, Lmemcpy_noshift8 ! No shift version...
195 1.1 christos
196 1.1 christos sllx %l4, 3, %l4 ! In bits
197 1.1 christos mov 8<<3, %l3
198 1.1 christos
199 1.1 christos ldx [%l0], %o0 ! Load word -1
200 1.1 christos sub %l3, %l4, %l3 ! Reverse shift
201 1.1 christos deccc 12*8, %l2 ! Have enough room?
202 1.1 christos
203 1.1 christos sllx %o0, %l4, %o0
204 1.1 christos bl,pn CCCR, 2f
205 1.1 christos and %l3, 0x38, %l3
206 1.1 christos Lmemcpy_unrolled8:
207 1.1 christos
208 1.1 christos /*
209 1.1 christos * This is about as close to optimal as you can get, since
210 1.1 christos * the shifts require EU0 and cannot be paired, and you have
211 1.1 christos * 3 dependent operations on the data.
212 1.1 christos */
213 1.1 christos
214 1.1 christos ! ldx [%l0+0*8], %o0 ! Already done
215 1.1 christos ! sllx %o0, %l4, %o0 ! Already done
216 1.1 christos ldx [%l0+1*8], %o1
217 1.1 christos ldx [%l0+2*8], %o2
218 1.1 christos ldx [%l0+3*8], %o3
219 1.1 christos ldx [%l0+4*8], %o4
220 1.1 christos ba,pt %icc, 1f
221 1.1 christos ldx [%l0+5*8], %o5
222 1.1 christos .align 8
223 1.1 christos 1:
224 1.1 christos srlx %o1, %l3, %g1
225 1.1 christos inc 6*8, %l0
226 1.1 christos
227 1.1 christos sllx %o1, %l4, %o1
228 1.1 christos or %g1, %o0, %g6
229 1.1 christos ldx [%l0+0*8], %o0
230 1.1 christos
231 1.1 christos stx %g6, [%l1+0*8]
232 1.1 christos srlx %o2, %l3, %g1
233 1.1 christos
234 1.1 christos sllx %o2, %l4, %o2
235 1.1 christos or %g1, %o1, %g6
236 1.1 christos ldx [%l0+1*8], %o1
237 1.1 christos
238 1.1 christos stx %g6, [%l1+1*8]
239 1.1 christos srlx %o3, %l3, %g1
240 1.1 christos
241 1.1 christos sllx %o3, %l4, %o3
242 1.1 christos or %g1, %o2, %g6
243 1.1 christos ldx [%l0+2*8], %o2
244 1.1 christos
245 1.1 christos stx %g6, [%l1+2*8]
246 1.1 christos srlx %o4, %l3, %g1
247 1.1 christos
248 1.1 christos sllx %o4, %l4, %o4
249 1.1 christos or %g1, %o3, %g6
250 1.1 christos ldx [%l0+3*8], %o3
251 1.1 christos
252 1.1 christos stx %g6, [%l1+3*8]
253 1.1 christos srlx %o5, %l3, %g1
254 1.1 christos
255 1.1 christos sllx %o5, %l4, %o5
256 1.1 christos or %g1, %o4, %g6
257 1.1 christos ldx [%l0+4*8], %o4
258 1.1 christos
259 1.1 christos stx %g6, [%l1+4*8]
260 1.1 christos srlx %o0, %l3, %g1
261 1.1 christos deccc 6*8, %l2 ! Have enough room?
262 1.1 christos
263 1.1 christos sllx %o0, %l4, %o0 ! Next loop
264 1.1 christos or %g1, %o5, %g6
265 1.1 christos ldx [%l0+5*8], %o5
266 1.1 christos
267 1.1 christos stx %g6, [%l1+5*8]
268 1.1 christos bge,pt CCCR, 1b
269 1.1 christos inc 6*8, %l1
270 1.1 christos
271 1.1 christos Lmemcpy_unrolled8_cleanup:
272 1.1 christos !!
273 1.1 christos !! Finished 8 byte block, unload the regs.
274 1.1 christos !!
275 1.1 christos srlx %o1, %l3, %g1
276 1.1 christos inc 5*8, %l0
277 1.1 christos
278 1.1 christos sllx %o1, %l4, %o1
279 1.1 christos or %g1, %o0, %g6
280 1.1 christos
281 1.1 christos stx %g6, [%l1+0*8]
282 1.1 christos srlx %o2, %l3, %g1
283 1.1 christos
284 1.1 christos sllx %o2, %l4, %o2
285 1.1 christos or %g1, %o1, %g6
286 1.1 christos
287 1.1 christos stx %g6, [%l1+1*8]
288 1.1 christos srlx %o3, %l3, %g1
289 1.1 christos
290 1.1 christos sllx %o3, %l4, %o3
291 1.1 christos or %g1, %o2, %g6
292 1.1 christos
293 1.1 christos stx %g6, [%l1+2*8]
294 1.1 christos srlx %o4, %l3, %g1
295 1.1 christos
296 1.1 christos sllx %o4, %l4, %o4
297 1.1 christos or %g1, %o3, %g6
298 1.1 christos
299 1.1 christos stx %g6, [%l1+3*8]
300 1.1 christos srlx %o5, %l3, %g1
301 1.1 christos
302 1.1 christos sllx %o5, %l4, %o5
303 1.1 christos or %g1, %o4, %g6
304 1.1 christos
305 1.1 christos stx %g6, [%l1+4*8]
306 1.1 christos inc 5*8, %l1
307 1.1 christos
308 1.1 christos mov %o5, %o0 ! Save our unused data
309 1.1 christos dec 5*8, %l2
310 1.1 christos 2:
311 1.1 christos inccc 12*8, %l2
312 1.1 christos bz,pn %icc, Lmemcpy_complete
313 1.1 christos
314 1.1 christos !! Unrolled 8 times
315 1.1 christos Lmemcpy_aligned8:
316 1.1 christos ! ldx [%l0], %o0 ! Already done
317 1.1 christos ! sllx %o0, %l4, %o0 ! Shift high word
318 1.1 christos
319 1.1 christos deccc 8, %l2 ! Pre-decrement
320 1.1 christos bl,pn CCCR, Lmemcpy_finish
321 1.1 christos 1:
322 1.1 christos ldx [%l0+8], %o1 ! Load word 0
323 1.1 christos inc 8, %l0
324 1.1 christos
325 1.1 christos srlx %o1, %l3, %g6
326 1.1 christos or %g6, %o0, %g6 ! Combine
327 1.1 christos
328 1.1 christos stx %g6, [%l1] ! Store result
329 1.1 christos inc 8, %l1
330 1.1 christos
331 1.1 christos deccc 8, %l2
332 1.1 christos bge,pn CCCR, 1b
333 1.1 christos sllx %o1, %l4, %o0
334 1.1 christos
335 1.1 christos btst 7, %l2 ! Done?
336 1.1 christos bz,pt CCCR, Lmemcpy_complete
337 1.1 christos
338 1.1 christos !!
339 1.1 christos !! Loadup the last dregs into %o0 and shift it into place
340 1.1 christos !!
341 1.1 christos srlx %l3, 3, %g6 ! # bytes in %o0
342 1.1 christos dec 8, %g6 ! - 8
343 1.1 christos !! n-8 - (by - 8) -> n - by
344 1.1 christos subcc %l2, %g6, %g0 ! # bytes we need
345 1.1 christos ble,pt %icc, Lmemcpy_finish
346 1.1 christos nop
347 1.1 christos ldx [%l0+8], %o1 ! Need another word
348 1.1 christos srlx %o1, %l3, %o1
349 1.1 christos ba,pt %icc, Lmemcpy_finish
350 1.1 christos or %o0, %o1, %o0 ! All loaded up.
351 1.1 christos
352 1.1 christos Lmemcpy_noshift8:
353 1.1 christos deccc 6*8, %l2 ! Have enough room?
354 1.1 christos bl,pn CCCR, 2f
355 1.1 christos nop
356 1.1 christos ba,pt %icc, 1f
357 1.1 christos nop
358 1.1 christos .align 32
359 1.1 christos 1:
360 1.1 christos ldx [%l0+0*8], %o0
361 1.1 christos ldx [%l0+1*8], %o1
362 1.1 christos ldx [%l0+2*8], %o2
363 1.1 christos stx %o0, [%l1+0*8]
364 1.1 christos stx %o1, [%l1+1*8]
365 1.1 christos stx %o2, [%l1+2*8]
366 1.1 christos
367 1.1 christos
368 1.1 christos ldx [%l0+3*8], %o3
369 1.1 christos ldx [%l0+4*8], %o4
370 1.1 christos ldx [%l0+5*8], %o5
371 1.1 christos inc 6*8, %l0
372 1.1 christos stx %o3, [%l1+3*8]
373 1.1 christos deccc 6*8, %l2
374 1.1 christos stx %o4, [%l1+4*8]
375 1.1 christos stx %o5, [%l1+5*8]
376 1.1 christos bge,pt CCCR, 1b
377 1.1 christos inc 6*8, %l1
378 1.1 christos 2:
379 1.1 christos inc 6*8, %l2
380 1.1 christos 1:
381 1.1 christos deccc 8, %l2
382 1.1 christos bl,pn %icc, 1f ! < 0 --> sub word
383 1.1 christos nop
384 1.1 christos ldx [%l0], %g6
385 1.1 christos inc 8, %l0
386 1.1 christos stx %g6, [%l1]
387 1.1 christos bg,pt %icc, 1b ! Exactly 0 --> done
388 1.1 christos inc 8, %l1
389 1.1 christos 1:
390 1.1 christos btst 7, %l2 ! Done?
391 1.1 christos bz,pt CCCR, Lmemcpy_complete
392 1.1 christos clr %l4
393 1.1 christos ldx [%l0], %o0
394 1.1 christos Lmemcpy_finish:
395 1.1 christos
396 1.1 christos brz,pn %l2, 2f ! 100% complete?
397 1.1 christos cmp %l2, 8 ! Exactly 8 bytes?
398 1.1 christos bz,a,pn CCCR, 2f
399 1.1 christos stx %o0, [%l1]
400 1.1 christos
401 1.1 christos btst 4, %l2 ! Word store?
402 1.1 christos bz CCCR, 1f
403 1.1 christos srlx %o0, 32, %g6 ! Shift high word down
404 1.1 christos stw %g6, [%l1]
405 1.1 christos inc 4, %l1
406 1.1 christos mov %o0, %g6 ! Operate on the low bits
407 1.1 christos 1:
408 1.1 christos btst 2, %l2
409 1.1 christos mov %g6, %o0
410 1.1 christos bz 1f
411 1.1 christos srlx %o0, 16, %g6
412 1.1 christos
413 1.1 christos sth %g6, [%l1] ! Store short
414 1.1 christos inc 2, %l1
415 1.1 christos mov %o0, %g6 ! Operate on low bytes
416 1.1 christos 1:
417 1.1 christos mov %g6, %o0
418 1.1 christos btst 1, %l2 ! Byte aligned?
419 1.1 christos bz 2f
420 1.1 christos srlx %o0, 8, %g6
421 1.1 christos
422 1.1 christos stb %g6, [%l1] ! Store last byte
423 1.1 christos inc 1, %l1 ! Update address
424 1.1 christos 2:
425 1.1 christos Lmemcpy_complete:
426 1.1 christos #if 0
427 1.1 christos !!
428 1.1 christos !! verify copy success.
429 1.1 christos !!
430 1.1 christos
431 1.1 christos mov %i0, %o2
432 1.1 christos mov %i1, %o4
433 1.1 christos mov %i2, %l4
434 1.1 christos 0:
435 1.1 christos ldub [%o2], %o1
436 1.1 christos inc %o2
437 1.1 christos ldub [%o4], %o3
438 1.1 christos inc %o4
439 1.1 christos cmp %o3, %o1
440 1.1 christos bnz 1f
441 1.1 christos dec %l4
442 1.1 christos brnz %l4, 0b
443 1.1 christos nop
444 1.1 christos ba 2f
445 1.1 christos nop
446 1.1 christos
447 1.1 christos 1:
448 1.1 christos set 0f, %o0
449 1.1 christos call printf
450 1.1 christos sub %i2, %l4, %o5
451 1.1 christos set 1f, %o0
452 1.1 christos mov %i0, %o2
453 1.1 christos mov %i1, %o1
454 1.1 christos call printf
455 1.1 christos mov %i2, %o3
456 1.1 christos ta 1
457 1.1 christos .data
458 1.1 christos 0: .asciz "memcpy failed: %x@%p != %x@%p byte %d\n"
459 1.1 christos 1: .asciz "memcpy(%p, %p, %lx)\n"
460 1.1 christos .align 8
461 1.1 christos .text
462 1.1 christos 2:
463 1.1 christos #endif
464 1.1 christos ret
465 1.1 christos restore %i1, %g0, %o0
466 1.1 christos
467 1.1 christos #ifdef USE_BLOCK_STORE_LOAD
468 1.1 christos
469 1.1 christos /*
470 1.1 christos * Block copy. Useful for >256 byte copies.
471 1.1 christos *
472 1.1 christos * Benchmarking has shown this always seems to be slower than
473 1.1 christos * the integer version, so this is disabled. Maybe someone will
474 1.1 christos * figure out why sometime.
475 1.1 christos */
476 1.1 christos
477 1.1 christos Lmemcpy_block:
478 1.1 christos sethi %hi(block_disable), %o3
479 1.1 christos ldx [ %o3 + %lo(block_disable) ], %o3
480 1.1 christos brnz,pn %o3, Lmemcpy_fancy
481 1.1 christos !! Make sure our trap table is installed
482 1.1 christos set _C_LABEL(trapbase), %o5
483 1.1 christos rdpr %tba, %o3
484 1.1 christos sub %o3, %o5, %o3
485 1.1 christos brnz,pn %o3, Lmemcpy_fancy ! No, then don't use block load/store
486 1.1 christos nop
487 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL)
488 1.1 christos /*
489 1.1 christos * Kernel:
490 1.1 christos *
491 1.1 christos * Here we use VIS instructions to do a block clear of a page.
492 1.1 christos * But before we can do that we need to save and enable the FPU.
493 1.1 christos * The last owner of the FPU registers is fplwp, and
494 1.1 christos * fplwp->l_md.md_fpstate is the current fpstate. If that's not
495 1.1 christos * null, call savefpstate() with it to store our current fp state.
496 1.1 christos *
497 1.1 christos * Next, allocate an aligned fpstate on the stack. We will properly
498 1.1 christos * nest calls on a particular stack so this should not be a problem.
499 1.1 christos *
500 1.1 christos * Now we grab either curlwp (or if we're on the interrupt stack
501 1.1 christos * lwp0). We stash its existing fpstate in a local register and
502 1.1 christos * put our new fpstate in curlwp->p_md.md_fpstate. We point
503 1.1 christos * fplwp at curlwp (or lwp0) and enable the FPU.
504 1.1 christos *
505 1.1 christos * If we are ever preempted, our FPU state will be saved in our
506 1.1 christos * fpstate. Then, when we're resumed and we take an FPDISABLED
507 1.1 christos * trap, the trap handler will be able to fish our FPU state out
508 1.1 christos * of curlwp (or lwp0).
509 1.1 christos *
510 1.1 christos * On exiting this routine we undo the damage: restore the original
511 1.1 christos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
512 1.1 christos * the MMU.
513 1.1 christos *
514 1.1 christos *
515 1.1 christos * Register usage, Kernel only (after save):
516 1.1 christos *
517 1.1 christos * %i0 src
518 1.1 christos * %i1 dest
519 1.1 christos * %i2 size
520 1.1 christos *
521 1.1 christos * %l0 XXXX DEBUG old fpstate
522 1.1 christos * %l1 fplwp (hi bits only)
523 1.1 christos * %l2 orig fplwp
524 1.1 christos * %l3 orig fpstate
525 1.1 christos * %l5 curlwp
526 1.1 christos * %l6 old fpstate
527 1.1 christos *
528 1.1 christos * Register ussage, Kernel and user:
529 1.1 christos *
530 1.1 christos * %g1 src (retval for memcpy)
531 1.1 christos *
532 1.1 christos * %o0 src
533 1.1 christos * %o1 dest
534 1.1 christos * %o2 end dest
535 1.1 christos * %o5 last safe fetchable address
536 1.1 christos */
537 1.1 christos
538 1.1 christos ENABLE_FPU(0)
539 1.1 christos
540 1.1 christos mov %i0, %o0 ! Src addr.
541 1.1 christos mov %i1, %o1 ! Store our dest ptr here.
542 1.1 christos mov %i2, %o2 ! Len counter
543 1.1 christos #endif /* _KERNEL */
544 1.1 christos
545 1.1 christos !!
546 1.1 christos !! First align the output to a 64-bit entity
547 1.1 christos !!
548 1.1 christos
549 1.1 christos mov %o1, %g1 ! memcpy retval
550 1.1 christos add %o0, %o2, %o5 ! End of source block
551 1.1 christos
552 1.1 christos andn %o0, 7, %o3 ! Start of block
553 1.1 christos dec %o5
554 1.1 christos fzero %f0
555 1.1 christos
556 1.1 christos andn %o5, BLOCK_ALIGN, %o5 ! Last safe addr.
557 1.1 christos ldd [%o3], %f2 ! Load 1st word
558 1.1 christos
559 1.1 christos dec 8, %o3 ! Move %o3 1 word back
560 1.1 christos btst 1, %o1
561 1.1 christos bz 4f
562 1.1 christos
563 1.1 christos mov -7, %o4 ! Lowest src addr possible
564 1.1 christos alignaddr %o0, %o4, %o4 ! Base addr for load.
565 1.1 christos
566 1.1 christos cmp %o3, %o4
567 1.1 christos be,pt CCCR, 1f ! Already loaded?
568 1.1 christos mov %o4, %o3
569 1.1 christos fmovd %f2, %f0 ! No. Shift
570 1.1 christos ldd [%o3+8], %f2 ! And load
571 1.1 christos 1:
572 1.1 christos
573 1.1 christos faligndata %f0, %f2, %f4 ! Isolate 1st byte
574 1.1 christos
575 1.1 christos stda %f4, [%o1] ASI_FL8_P ! Store 1st byte
576 1.1 christos inc 1, %o1 ! Update address
577 1.1 christos inc 1, %o0
578 1.1 christos dec 1, %o2
579 1.1 christos 4:
580 1.1 christos btst 2, %o1
581 1.1 christos bz 4f
582 1.1 christos
583 1.1 christos mov -6, %o4 ! Calculate src - 6
584 1.1 christos alignaddr %o0, %o4, %o4 ! calculate shift mask and dest.
585 1.1 christos
586 1.1 christos cmp %o3, %o4 ! Addresses same?
587 1.1 christos be,pt CCCR, 1f
588 1.1 christos mov %o4, %o3
589 1.1 christos fmovd %f2, %f0 ! Shuffle data
590 1.1 christos ldd [%o3+8], %f2 ! Load word 0
591 1.1 christos 1:
592 1.1 christos faligndata %f0, %f2, %f4 ! Move 1st short low part of f8
593 1.1 christos
594 1.1 christos stda %f4, [%o1] ASI_FL16_P ! Store 1st short
595 1.1 christos dec 2, %o2
596 1.1 christos inc 2, %o1
597 1.1 christos inc 2, %o0
598 1.1 christos 4:
599 1.1 christos brz,pn %o2, Lmemcpy_blockfinish ! XXXX
600 1.1 christos
601 1.1 christos btst 4, %o1
602 1.1 christos bz 4f
603 1.1 christos
604 1.1 christos mov -4, %o4
605 1.1 christos alignaddr %o0, %o4, %o4 ! calculate shift mask and dest.
606 1.1 christos
607 1.1 christos cmp %o3, %o4 ! Addresses same?
608 1.1 christos beq,pt CCCR, 1f
609 1.1 christos mov %o4, %o3
610 1.1 christos fmovd %f2, %f0 ! Shuffle data
611 1.1 christos ldd [%o3+8], %f2 ! Load word 0
612 1.1 christos 1:
613 1.1 christos faligndata %f0, %f2, %f4 ! Move 1st short low part of f8
614 1.1 christos
615 1.1 christos st %f5, [%o1] ! Store word
616 1.1 christos dec 4, %o2
617 1.1 christos inc 4, %o1
618 1.1 christos inc 4, %o0
619 1.1 christos 4:
620 1.1 christos brz,pn %o2, Lmemcpy_blockfinish ! XXXX
621 1.1 christos !!
622 1.1 christos !! We are now 32-bit aligned in the dest.
623 1.1 christos !!
624 1.1 christos Lmemcpy_block_common:
625 1.1 christos
626 1.1 christos mov -0, %o4
627 1.1 christos alignaddr %o0, %o4, %o4 ! base - shift
628 1.1 christos
629 1.1 christos cmp %o3, %o4 ! Addresses same?
630 1.1 christos beq,pt CCCR, 1f
631 1.1 christos mov %o4, %o3
632 1.1 christos fmovd %f2, %f0 ! Shuffle data
633 1.1 christos ldd [%o3+8], %f2 ! Load word 0
634 1.1 christos 1:
635 1.1 christos add %o3, 8, %o0 ! now use %o0 for src
636 1.1 christos
637 1.1 christos !!
638 1.1 christos !! Continue until our dest is block aligned
639 1.1 christos !!
640 1.1 christos Lmemcpy_block_aligned8:
641 1.1 christos 1:
642 1.1 christos brz %o2, Lmemcpy_blockfinish
643 1.1 christos btst BLOCK_ALIGN, %o1 ! Block aligned?
644 1.1 christos bz 1f
645 1.1 christos
646 1.1 christos faligndata %f0, %f2, %f4 ! Generate result
647 1.1 christos deccc 8, %o2
648 1.1 christos ble,pn %icc, Lmemcpy_blockfinish ! Should never happen
649 1.1 christos fmovd %f4, %f48
650 1.1 christos
651 1.1 christos std %f4, [%o1] ! Store result
652 1.1 christos inc 8, %o1
653 1.1 christos
654 1.1 christos fmovd %f2, %f0
655 1.1 christos inc 8, %o0
656 1.1 christos ba,pt %xcc, 1b ! Not yet.
657 1.1 christos ldd [%o0], %f2 ! Load next part
658 1.1 christos Lmemcpy_block_aligned64:
659 1.1 christos 1:
660 1.1 christos
661 1.1 christos /*
662 1.1 christos * 64-byte aligned -- ready for block operations.
663 1.1 christos *
664 1.1 christos * Here we have the destination block aligned, but the
665 1.1 christos * source pointer may not be. Sub-word alignment will
666 1.1 christos * be handled by faligndata instructions. But the source
667 1.1 christos * can still be potentially aligned to 8 different words
668 1.1 christos * in our 64-bit block, so we have 8 different copy routines.
669 1.1 christos *
670 1.1 christos * Once we figure out our source alignment, we branch
671 1.1 christos * to the appropriate copy routine, which sets up the
672 1.1 christos * alignment for faligndata and loads (sets) the values
673 1.1 christos * into the source registers and does the copy loop.
674 1.1 christos *
675 1.1 christos * When were down to less than 1 block to store, we
676 1.1 christos * exit the copy loop and execute cleanup code.
677 1.1 christos *
678 1.1 christos * Block loads and stores are not properly interlocked.
679 1.1 christos * Stores save one reg/cycle, so you can start overwriting
680 1.1 christos * registers the cycle after the store is issued.
681 1.1 christos *
682 1.1 christos * Block loads require a block load to a different register
683 1.1 christos * block or a membar #Sync before accessing the loaded
684 1.1 christos * data.
685 1.1 christos *
686 1.1 christos * Since the faligndata instructions may be offset as far
687 1.1 christos * as 7 registers into a block (if you are shifting source
688 1.1 christos * 7 -> dest 0), you need 3 source register blocks for full
689 1.1 christos * performance: one you are copying, one you are loading,
690 1.1 christos * and one for interlocking. Otherwise, we would need to
691 1.1 christos * sprinkle the code with membar #Sync and lose the advantage
692 1.1 christos * of running faligndata in parallel with block stores. This
693 1.1 christos * means we are fetching a full 128 bytes ahead of the stores.
694 1.1 christos * We need to make sure the prefetch does not inadvertently
695 1.1 christos * cross a page boundary and fault on data that we will never
696 1.1 christos * store.
697 1.1 christos *
698 1.1 christos */
699 1.1 christos #if 1
700 1.1 christos and %o0, BLOCK_ALIGN, %o3
701 1.1 christos srax %o3, 3, %o3 ! Isolate the offset
702 1.1 christos
703 1.1 christos brz %o3, L100 ! 0->0
704 1.1 christos btst 4, %o3
705 1.1 christos bnz %xcc, 4f
706 1.1 christos btst 2, %o3
707 1.1 christos bnz %xcc, 2f
708 1.1 christos btst 1, %o3
709 1.1 christos ba,pt %xcc, L101 ! 0->1
710 1.1 christos nop /* XXX spitfire bug */
711 1.1 christos 2:
712 1.1 christos bz %xcc, L102 ! 0->2
713 1.1 christos nop
714 1.1 christos ba,pt %xcc, L103 ! 0->3
715 1.1 christos nop /* XXX spitfire bug */
716 1.1 christos 4:
717 1.1 christos bnz %xcc, 2f
718 1.1 christos btst 1, %o3
719 1.1 christos bz %xcc, L104 ! 0->4
720 1.1 christos nop
721 1.1 christos ba,pt %xcc, L105 ! 0->5
722 1.1 christos nop /* XXX spitfire bug */
723 1.1 christos 2:
724 1.1 christos bz %xcc, L106 ! 0->6
725 1.1 christos nop
726 1.1 christos ba,pt %xcc, L107 ! 0->7
727 1.1 christos nop /* XXX spitfire bug */
728 1.1 christos #else
729 1.1 christos
730 1.1 christos !!
731 1.1 christos !! Isolate the word offset, which just happens to be
732 1.1 christos !! the slot in our jump table.
733 1.1 christos !!
734 1.1 christos !! This is 6 insns, most of which cannot be paired,
735 1.1 christos !! which is about the same as the above version.
736 1.1 christos !!
737 1.1 christos rd %pc, %o4
738 1.1 christos 1:
739 1.1 christos and %o0, 0x31, %o3
740 1.1 christos add %o3, (Lmemcpy_block_jmp - 1b), %o3
741 1.1 christos jmpl %o4 + %o3, %g0
742 1.1 christos nop
743 1.1 christos
744 1.1 christos !!
745 1.1 christos !! Jump table
746 1.1 christos !!
747 1.1 christos
748 1.1 christos Lmemcpy_block_jmp:
749 1.1 christos ba,a,pt %xcc, L100
750 1.1 christos nop
751 1.1 christos ba,a,pt %xcc, L101
752 1.1 christos nop
753 1.1 christos ba,a,pt %xcc, L102
754 1.1 christos nop
755 1.1 christos ba,a,pt %xcc, L103
756 1.1 christos nop
757 1.1 christos ba,a,pt %xcc, L104
758 1.1 christos nop
759 1.1 christos ba,a,pt %xcc, L105
760 1.1 christos nop
761 1.1 christos ba,a,pt %xcc, L106
762 1.1 christos nop
763 1.1 christos ba,a,pt %xcc, L107
764 1.1 christos nop
765 1.1 christos #endif
766 1.1 christos
767 1.1 christos !!
768 1.1 christos !! Source is block aligned.
769 1.1 christos !!
770 1.1 christos !! Just load a block and go.
771 1.1 christos !!
772 1.1 christos L100:
773 1.1 christos #ifdef RETURN_NAME
774 1.1 christos sethi %hi(1f), %g1
775 1.1 christos ba,pt %icc, 2f
776 1.1 christos or %g1, %lo(1f), %g1
777 1.1 christos 1:
778 1.1 christos .asciz "L100"
779 1.1 christos .align 8
780 1.1 christos 2:
781 1.1 christos #endif
782 1.1 christos fmovd %f0 , %f62
783 1.1 christos ldda [%o0] ASI_BLK_P, %f0
784 1.1 christos inc BLOCK_SIZE, %o0
785 1.1 christos cmp %o0, %o5
786 1.1 christos bleu,a,pn %icc, 3f
787 1.1 christos ldda [%o0] ASI_BLK_P, %f16
788 1.1 christos ba,pt %icc, 3f
789 1.1 christos membar #Sync
790 1.1 christos
791 1.1 christos .align 32 ! ICache align.
792 1.1 christos 3:
793 1.1 christos faligndata %f62, %f0, %f32
794 1.1 christos inc BLOCK_SIZE, %o0
795 1.1 christos faligndata %f0, %f2, %f34
796 1.1 christos dec BLOCK_SIZE, %o2
797 1.1 christos faligndata %f2, %f4, %f36
798 1.1 christos cmp %o0, %o5
799 1.1 christos faligndata %f4, %f6, %f38
800 1.1 christos faligndata %f6, %f8, %f40
801 1.1 christos faligndata %f8, %f10, %f42
802 1.1 christos faligndata %f10, %f12, %f44
803 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
804 1.1 christos faligndata %f12, %f14, %f46
805 1.1 christos
806 1.1 christos bleu,a,pn %icc, 2f
807 1.1 christos ldda [%o0] ASI_BLK_P, %f48
808 1.1 christos membar #Sync
809 1.1 christos 2:
810 1.1 christos stda %f32, [%o1] ASI_STORE
811 1.1 christos faligndata %f14, %f16, %f32
812 1.1 christos inc BLOCK_SIZE, %o0
813 1.1 christos faligndata %f16, %f18, %f34
814 1.1 christos inc BLOCK_SIZE, %o1
815 1.1 christos faligndata %f18, %f20, %f36
816 1.1 christos dec BLOCK_SIZE, %o2
817 1.1 christos faligndata %f20, %f22, %f38
818 1.1 christos cmp %o0, %o5
819 1.1 christos faligndata %f22, %f24, %f40
820 1.1 christos faligndata %f24, %f26, %f42
821 1.1 christos faligndata %f26, %f28, %f44
822 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
823 1.1 christos faligndata %f28, %f30, %f46
824 1.1 christos
825 1.1 christos bleu,a,pn %icc, 2f
826 1.1 christos ldda [%o0] ASI_BLK_P, %f0
827 1.1 christos membar #Sync
828 1.1 christos 2:
829 1.1 christos stda %f32, [%o1] ASI_STORE
830 1.1 christos faligndata %f30, %f48, %f32
831 1.1 christos inc BLOCK_SIZE, %o0
832 1.1 christos faligndata %f48, %f50, %f34
833 1.1 christos inc BLOCK_SIZE, %o1
834 1.1 christos faligndata %f50, %f52, %f36
835 1.1 christos dec BLOCK_SIZE, %o2
836 1.1 christos faligndata %f52, %f54, %f38
837 1.1 christos cmp %o0, %o5
838 1.1 christos faligndata %f54, %f56, %f40
839 1.1 christos faligndata %f56, %f58, %f42
840 1.1 christos faligndata %f58, %f60, %f44
841 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
842 1.1 christos faligndata %f60, %f62, %f46
843 1.1 christos bleu,a,pn %icc, 2f
844 1.1 christos ldda [%o0] ASI_BLK_P, %f16 ! Increment is at top
845 1.1 christos membar #Sync
846 1.1 christos 2:
847 1.1 christos stda %f32, [%o1] ASI_STORE
848 1.1 christos ba 3b
849 1.1 christos inc BLOCK_SIZE, %o1
850 1.1 christos
851 1.1 christos !!
852 1.1 christos !! Source at BLOCK_ALIGN+8
853 1.1 christos !!
854 1.1 christos !! We need to load almost 1 complete block by hand.
855 1.1 christos !!
856 1.1 christos L101:
857 1.1 christos #ifdef RETURN_NAME
858 1.1 christos sethi %hi(1f), %g1
859 1.1 christos ba,pt %icc, 2f
860 1.1 christos or %g1, %lo(1f), %g1
861 1.1 christos 1:
862 1.1 christos .asciz "L101"
863 1.1 christos .align 8
864 1.1 christos 2:
865 1.1 christos #endif
866 1.1 christos ! fmovd %f0, %f0 ! Hoist fmovd
867 1.1 christos ldd [%o0], %f2
868 1.1 christos inc 8, %o0
869 1.1 christos ldd [%o0], %f4
870 1.1 christos inc 8, %o0
871 1.1 christos ldd [%o0], %f6
872 1.1 christos inc 8, %o0
873 1.1 christos ldd [%o0], %f8
874 1.1 christos inc 8, %o0
875 1.1 christos ldd [%o0], %f10
876 1.1 christos inc 8, %o0
877 1.1 christos ldd [%o0], %f12
878 1.1 christos inc 8, %o0
879 1.1 christos ldd [%o0], %f14
880 1.1 christos inc 8, %o0
881 1.1 christos
882 1.1 christos cmp %o0, %o5
883 1.1 christos bleu,a,pn %icc, 3f
884 1.1 christos ldda [%o0] ASI_BLK_P, %f16
885 1.1 christos membar #Sync
886 1.1 christos 3:
887 1.1 christos faligndata %f0, %f2, %f32
888 1.1 christos inc BLOCK_SIZE, %o0
889 1.1 christos faligndata %f2, %f4, %f34
890 1.1 christos cmp %o0, %o5
891 1.1 christos faligndata %f4, %f6, %f36
892 1.1 christos dec BLOCK_SIZE, %o2
893 1.1 christos faligndata %f6, %f8, %f38
894 1.1 christos faligndata %f8, %f10, %f40
895 1.1 christos faligndata %f10, %f12, %f42
896 1.1 christos faligndata %f12, %f14, %f44
897 1.1 christos bleu,a,pn %icc, 2f
898 1.1 christos ldda [%o0] ASI_BLK_P, %f48
899 1.1 christos membar #Sync
900 1.1 christos 2:
901 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
902 1.1 christos faligndata %f14, %f16, %f46
903 1.1 christos
904 1.1 christos stda %f32, [%o1] ASI_STORE
905 1.1 christos
906 1.1 christos faligndata %f16, %f18, %f32
907 1.1 christos inc BLOCK_SIZE, %o0
908 1.1 christos faligndata %f18, %f20, %f34
909 1.1 christos inc BLOCK_SIZE, %o1
910 1.1 christos faligndata %f20, %f22, %f36
911 1.1 christos cmp %o0, %o5
912 1.1 christos faligndata %f22, %f24, %f38
913 1.1 christos dec BLOCK_SIZE, %o2
914 1.1 christos faligndata %f24, %f26, %f40
915 1.1 christos faligndata %f26, %f28, %f42
916 1.1 christos faligndata %f28, %f30, %f44
917 1.1 christos bleu,a,pn %icc, 2f
918 1.1 christos ldda [%o0] ASI_BLK_P, %f0
919 1.1 christos membar #Sync
920 1.1 christos 2:
921 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
922 1.1 christos faligndata %f30, %f48, %f46
923 1.1 christos
924 1.1 christos stda %f32, [%o1] ASI_STORE
925 1.1 christos
926 1.1 christos faligndata %f48, %f50, %f32
927 1.1 christos inc BLOCK_SIZE, %o0
928 1.1 christos faligndata %f50, %f52, %f34
929 1.1 christos inc BLOCK_SIZE, %o1
930 1.1 christos faligndata %f52, %f54, %f36
931 1.1 christos cmp %o0, %o5
932 1.1 christos faligndata %f54, %f56, %f38
933 1.1 christos dec BLOCK_SIZE, %o2
934 1.1 christos faligndata %f56, %f58, %f40
935 1.1 christos faligndata %f58, %f60, %f42
936 1.1 christos faligndata %f60, %f62, %f44
937 1.1 christos bleu,a,pn %icc, 2f
938 1.1 christos ldda [%o0] ASI_BLK_P, %f16
939 1.1 christos membar #Sync
940 1.1 christos 2:
941 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
942 1.1 christos faligndata %f62, %f0, %f46
943 1.1 christos
944 1.1 christos stda %f32, [%o1] ASI_STORE
945 1.1 christos ba 3b
946 1.1 christos inc BLOCK_SIZE, %o1
947 1.1 christos
948 1.1 christos !!
949 1.1 christos !! Source at BLOCK_ALIGN+16
950 1.1 christos !!
951 1.1 christos !! We need to load 6 doubles by hand.
952 1.1 christos !!
953 1.1 christos L102:
954 1.1 christos #ifdef RETURN_NAME
955 1.1 christos sethi %hi(1f), %g1
956 1.1 christos ba,pt %icc, 2f
957 1.1 christos or %g1, %lo(1f), %g1
958 1.1 christos 1:
959 1.1 christos .asciz "L102"
960 1.1 christos .align 8
961 1.1 christos 2:
962 1.1 christos #endif
963 1.1 christos ldd [%o0], %f4
964 1.1 christos inc 8, %o0
965 1.1 christos fmovd %f0, %f2 ! Hoist fmovd
966 1.1 christos ldd [%o0], %f6
967 1.1 christos inc 8, %o0
968 1.1 christos
969 1.1 christos ldd [%o0], %f8
970 1.1 christos inc 8, %o0
971 1.1 christos ldd [%o0], %f10
972 1.1 christos inc 8, %o0
973 1.1 christos ldd [%o0], %f12
974 1.1 christos inc 8, %o0
975 1.1 christos ldd [%o0], %f14
976 1.1 christos inc 8, %o0
977 1.1 christos
978 1.1 christos cmp %o0, %o5
979 1.1 christos bleu,a,pn %icc, 3f
980 1.1 christos ldda [%o0] ASI_BLK_P, %f16
981 1.1 christos membar #Sync
982 1.1 christos 3:
983 1.1 christos faligndata %f2, %f4, %f32
984 1.1 christos inc BLOCK_SIZE, %o0
985 1.1 christos faligndata %f4, %f6, %f34
986 1.1 christos cmp %o0, %o5
987 1.1 christos faligndata %f6, %f8, %f36
988 1.1 christos dec BLOCK_SIZE, %o2
989 1.1 christos faligndata %f8, %f10, %f38
990 1.1 christos faligndata %f10, %f12, %f40
991 1.1 christos faligndata %f12, %f14, %f42
992 1.1 christos bleu,a,pn %icc, 2f
993 1.1 christos ldda [%o0] ASI_BLK_P, %f48
994 1.1 christos membar #Sync
995 1.1 christos 2:
996 1.1 christos faligndata %f14, %f16, %f44
997 1.1 christos
998 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
999 1.1 christos faligndata %f16, %f18, %f46
1000 1.1 christos
1001 1.1 christos stda %f32, [%o1] ASI_STORE
1002 1.1 christos
1003 1.1 christos faligndata %f18, %f20, %f32
1004 1.1 christos inc BLOCK_SIZE, %o0
1005 1.1 christos faligndata %f20, %f22, %f34
1006 1.1 christos inc BLOCK_SIZE, %o1
1007 1.1 christos faligndata %f22, %f24, %f36
1008 1.1 christos cmp %o0, %o5
1009 1.1 christos faligndata %f24, %f26, %f38
1010 1.1 christos dec BLOCK_SIZE, %o2
1011 1.1 christos faligndata %f26, %f28, %f40
1012 1.1 christos faligndata %f28, %f30, %f42
1013 1.1 christos bleu,a,pn %icc, 2f
1014 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1015 1.1 christos membar #Sync
1016 1.1 christos 2:
1017 1.1 christos faligndata %f30, %f48, %f44
1018 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1019 1.1 christos faligndata %f48, %f50, %f46
1020 1.1 christos
1021 1.1 christos stda %f32, [%o1] ASI_STORE
1022 1.1 christos
1023 1.1 christos faligndata %f50, %f52, %f32
1024 1.1 christos inc BLOCK_SIZE, %o0
1025 1.1 christos faligndata %f52, %f54, %f34
1026 1.1 christos inc BLOCK_SIZE, %o1
1027 1.1 christos faligndata %f54, %f56, %f36
1028 1.1 christos cmp %o0, %o5
1029 1.1 christos faligndata %f56, %f58, %f38
1030 1.1 christos dec BLOCK_SIZE, %o2
1031 1.1 christos faligndata %f58, %f60, %f40
1032 1.1 christos faligndata %f60, %f62, %f42
1033 1.1 christos bleu,a,pn %icc, 2f
1034 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1035 1.1 christos membar #Sync
1036 1.1 christos 2:
1037 1.1 christos faligndata %f62, %f0, %f44
1038 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1039 1.1 christos faligndata %f0, %f2, %f46
1040 1.1 christos
1041 1.1 christos stda %f32, [%o1] ASI_STORE
1042 1.1 christos ba 3b
1043 1.1 christos inc BLOCK_SIZE, %o1
1044 1.1 christos
1045 1.1 christos !!
1046 1.1 christos !! Source at BLOCK_ALIGN+24
1047 1.1 christos !!
1048 1.1 christos !! We need to load 5 doubles by hand.
1049 1.1 christos !!
1050 1.1 christos L103:
1051 1.1 christos #ifdef RETURN_NAME
1052 1.1 christos sethi %hi(1f), %g1
1053 1.1 christos ba,pt %icc, 2f
1054 1.1 christos or %g1, %lo(1f), %g1
1055 1.1 christos 1:
1056 1.1 christos .asciz "L103"
1057 1.1 christos .align 8
1058 1.1 christos 2:
1059 1.1 christos #endif
1060 1.1 christos fmovd %f0, %f4
1061 1.1 christos ldd [%o0], %f6
1062 1.1 christos inc 8, %o0
1063 1.1 christos ldd [%o0], %f8
1064 1.1 christos inc 8, %o0
1065 1.1 christos ldd [%o0], %f10
1066 1.1 christos inc 8, %o0
1067 1.1 christos ldd [%o0], %f12
1068 1.1 christos inc 8, %o0
1069 1.1 christos ldd [%o0], %f14
1070 1.1 christos inc 8, %o0
1071 1.1 christos
1072 1.1 christos cmp %o0, %o5
1073 1.1 christos bleu,a,pn %icc, 2f
1074 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1075 1.1 christos membar #Sync
1076 1.1 christos 2:
1077 1.1 christos inc BLOCK_SIZE, %o0
1078 1.1 christos 3:
1079 1.1 christos faligndata %f4, %f6, %f32
1080 1.1 christos cmp %o0, %o5
1081 1.1 christos faligndata %f6, %f8, %f34
1082 1.1 christos dec BLOCK_SIZE, %o2
1083 1.1 christos faligndata %f8, %f10, %f36
1084 1.1 christos faligndata %f10, %f12, %f38
1085 1.1 christos faligndata %f12, %f14, %f40
1086 1.1 christos bleu,a,pn %icc, 2f
1087 1.1 christos ldda [%o0] ASI_BLK_P, %f48
1088 1.1 christos membar #Sync
1089 1.1 christos 2:
1090 1.1 christos faligndata %f14, %f16, %f42
1091 1.1 christos inc BLOCK_SIZE, %o0
1092 1.1 christos faligndata %f16, %f18, %f44
1093 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1094 1.1 christos faligndata %f18, %f20, %f46
1095 1.1 christos
1096 1.1 christos stda %f32, [%o1] ASI_STORE
1097 1.1 christos
1098 1.1 christos faligndata %f20, %f22, %f32
1099 1.1 christos cmp %o0, %o5
1100 1.1 christos faligndata %f22, %f24, %f34
1101 1.1 christos dec BLOCK_SIZE, %o2
1102 1.1 christos faligndata %f24, %f26, %f36
1103 1.1 christos inc BLOCK_SIZE, %o1
1104 1.1 christos faligndata %f26, %f28, %f38
1105 1.1 christos faligndata %f28, %f30, %f40
1106 1.1 christos ble,a,pn %icc, 2f
1107 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1108 1.1 christos membar #Sync
1109 1.1 christos 2:
1110 1.1 christos faligndata %f30, %f48, %f42
1111 1.1 christos inc BLOCK_SIZE, %o0
1112 1.1 christos faligndata %f48, %f50, %f44
1113 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1114 1.1 christos faligndata %f50, %f52, %f46
1115 1.1 christos
1116 1.1 christos stda %f32, [%o1] ASI_STORE
1117 1.1 christos
1118 1.1 christos faligndata %f52, %f54, %f32
1119 1.1 christos cmp %o0, %o5
1120 1.1 christos faligndata %f54, %f56, %f34
1121 1.1 christos dec BLOCK_SIZE, %o2
1122 1.1 christos faligndata %f56, %f58, %f36
1123 1.1 christos faligndata %f58, %f60, %f38
1124 1.1 christos inc BLOCK_SIZE, %o1
1125 1.1 christos faligndata %f60, %f62, %f40
1126 1.1 christos bleu,a,pn %icc, 2f
1127 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1128 1.1 christos membar #Sync
1129 1.1 christos 2:
1130 1.1 christos faligndata %f62, %f0, %f42
1131 1.1 christos inc BLOCK_SIZE, %o0
1132 1.1 christos faligndata %f0, %f2, %f44
1133 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1134 1.1 christos faligndata %f2, %f4, %f46
1135 1.1 christos
1136 1.1 christos stda %f32, [%o1] ASI_STORE
1137 1.1 christos ba 3b
1138 1.1 christos inc BLOCK_SIZE, %o1
1139 1.1 christos
1140 1.1 christos !!
1141 1.1 christos !! Source at BLOCK_ALIGN+32
1142 1.1 christos !!
1143 1.1 christos !! We need to load 4 doubles by hand.
1144 1.1 christos !!
1145 1.1 christos L104:
1146 1.1 christos #ifdef RETURN_NAME
1147 1.1 christos sethi %hi(1f), %g1
1148 1.1 christos ba,pt %icc, 2f
1149 1.1 christos or %g1, %lo(1f), %g1
1150 1.1 christos 1:
1151 1.1 christos .asciz "L104"
1152 1.1 christos .align 8
1153 1.1 christos 2:
1154 1.1 christos #endif
1155 1.1 christos fmovd %f0, %f6
1156 1.1 christos ldd [%o0], %f8
1157 1.1 christos inc 8, %o0
1158 1.1 christos ldd [%o0], %f10
1159 1.1 christos inc 8, %o0
1160 1.1 christos ldd [%o0], %f12
1161 1.1 christos inc 8, %o0
1162 1.1 christos ldd [%o0], %f14
1163 1.1 christos inc 8, %o0
1164 1.1 christos
1165 1.1 christos cmp %o0, %o5
1166 1.1 christos bleu,a,pn %icc, 2f
1167 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1168 1.1 christos membar #Sync
1169 1.1 christos 2:
1170 1.1 christos inc BLOCK_SIZE, %o0
1171 1.1 christos 3:
1172 1.1 christos faligndata %f6, %f8, %f32
1173 1.1 christos cmp %o0, %o5
1174 1.1 christos faligndata %f8, %f10, %f34
1175 1.1 christos dec BLOCK_SIZE, %o2
1176 1.1 christos faligndata %f10, %f12, %f36
1177 1.1 christos faligndata %f12, %f14, %f38
1178 1.1 christos bleu,a,pn %icc, 2f
1179 1.1 christos ldda [%o0] ASI_BLK_P, %f48
1180 1.1 christos membar #Sync
1181 1.1 christos 2:
1182 1.1 christos faligndata %f14, %f16, %f40
1183 1.1 christos faligndata %f16, %f18, %f42
1184 1.1 christos inc BLOCK_SIZE, %o0
1185 1.1 christos faligndata %f18, %f20, %f44
1186 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1187 1.1 christos faligndata %f20, %f22, %f46
1188 1.1 christos
1189 1.1 christos stda %f32, [%o1] ASI_STORE
1190 1.1 christos
1191 1.1 christos faligndata %f22, %f24, %f32
1192 1.1 christos cmp %o0, %o5
1193 1.1 christos faligndata %f24, %f26, %f34
1194 1.1 christos faligndata %f26, %f28, %f36
1195 1.1 christos inc BLOCK_SIZE, %o1
1196 1.1 christos faligndata %f28, %f30, %f38
1197 1.1 christos bleu,a,pn %icc, 2f
1198 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1199 1.1 christos membar #Sync
1200 1.1 christos 2:
1201 1.1 christos faligndata %f30, %f48, %f40
1202 1.1 christos dec BLOCK_SIZE, %o2
1203 1.1 christos faligndata %f48, %f50, %f42
1204 1.1 christos inc BLOCK_SIZE, %o0
1205 1.1 christos faligndata %f50, %f52, %f44
1206 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1207 1.1 christos faligndata %f52, %f54, %f46
1208 1.1 christos
1209 1.1 christos stda %f32, [%o1] ASI_STORE
1210 1.1 christos
1211 1.1 christos faligndata %f54, %f56, %f32
1212 1.1 christos cmp %o0, %o5
1213 1.1 christos faligndata %f56, %f58, %f34
1214 1.1 christos faligndata %f58, %f60, %f36
1215 1.1 christos inc BLOCK_SIZE, %o1
1216 1.1 christos faligndata %f60, %f62, %f38
1217 1.1 christos bleu,a,pn %icc, 2f
1218 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1219 1.1 christos membar #Sync
1220 1.1 christos 2:
1221 1.1 christos faligndata %f62, %f0, %f40
1222 1.1 christos dec BLOCK_SIZE, %o2
1223 1.1 christos faligndata %f0, %f2, %f42
1224 1.1 christos inc BLOCK_SIZE, %o0
1225 1.1 christos faligndata %f2, %f4, %f44
1226 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1227 1.1 christos faligndata %f4, %f6, %f46
1228 1.1 christos
1229 1.1 christos stda %f32, [%o1] ASI_STORE
1230 1.1 christos ba 3b
1231 1.1 christos inc BLOCK_SIZE, %o1
1232 1.1 christos
1233 1.1 christos !!
1234 1.1 christos !! Source at BLOCK_ALIGN+40
1235 1.1 christos !!
1236 1.1 christos !! We need to load 3 doubles by hand.
1237 1.1 christos !!
1238 1.1 christos L105:
1239 1.1 christos #ifdef RETURN_NAME
1240 1.1 christos sethi %hi(1f), %g1
1241 1.1 christos ba,pt %icc, 2f
1242 1.1 christos or %g1, %lo(1f), %g1
1243 1.1 christos 1:
1244 1.1 christos .asciz "L105"
1245 1.1 christos .align 8
1246 1.1 christos 2:
1247 1.1 christos #endif
1248 1.1 christos fmovd %f0, %f8
1249 1.1 christos ldd [%o0], %f10
1250 1.1 christos inc 8, %o0
1251 1.1 christos ldd [%o0], %f12
1252 1.1 christos inc 8, %o0
1253 1.1 christos ldd [%o0], %f14
1254 1.1 christos inc 8, %o0
1255 1.1 christos
1256 1.1 christos cmp %o0, %o5
1257 1.1 christos bleu,a,pn %icc, 2f
1258 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1259 1.1 christos membar #Sync
1260 1.1 christos 2:
1261 1.1 christos inc BLOCK_SIZE, %o0
1262 1.1 christos 3:
1263 1.1 christos faligndata %f8, %f10, %f32
1264 1.1 christos cmp %o0, %o5
1265 1.1 christos faligndata %f10, %f12, %f34
1266 1.1 christos faligndata %f12, %f14, %f36
1267 1.1 christos bleu,a,pn %icc, 2f
1268 1.1 christos ldda [%o0] ASI_BLK_P, %f48
1269 1.1 christos membar #Sync
1270 1.1 christos 2:
1271 1.1 christos faligndata %f14, %f16, %f38
1272 1.1 christos dec BLOCK_SIZE, %o2
1273 1.1 christos faligndata %f16, %f18, %f40
1274 1.1 christos inc BLOCK_SIZE, %o0
1275 1.1 christos faligndata %f18, %f20, %f42
1276 1.1 christos faligndata %f20, %f22, %f44
1277 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1278 1.1 christos faligndata %f22, %f24, %f46
1279 1.1 christos
1280 1.1 christos stda %f32, [%o1] ASI_STORE
1281 1.1 christos
1282 1.1 christos faligndata %f24, %f26, %f32
1283 1.1 christos cmp %o0, %o5
1284 1.1 christos faligndata %f26, %f28, %f34
1285 1.1 christos dec BLOCK_SIZE, %o2
1286 1.1 christos faligndata %f28, %f30, %f36
1287 1.1 christos bleu,a,pn %icc, 2f
1288 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1289 1.1 christos membar #Sync
1290 1.1 christos 2:
1291 1.1 christos faligndata %f30, %f48, %f38
1292 1.1 christos inc BLOCK_SIZE, %o1
1293 1.1 christos faligndata %f48, %f50, %f40
1294 1.1 christos inc BLOCK_SIZE, %o0
1295 1.1 christos faligndata %f50, %f52, %f42
1296 1.1 christos faligndata %f52, %f54, %f44
1297 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1298 1.1 christos faligndata %f54, %f56, %f46
1299 1.1 christos
1300 1.1 christos stda %f32, [%o1] ASI_STORE
1301 1.1 christos
1302 1.1 christos faligndata %f56, %f58, %f32
1303 1.1 christos cmp %o0, %o5
1304 1.1 christos faligndata %f58, %f60, %f34
1305 1.1 christos dec BLOCK_SIZE, %o2
1306 1.1 christos faligndata %f60, %f62, %f36
1307 1.1 christos bleu,a,pn %icc, 2f
1308 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1309 1.1 christos membar #Sync
1310 1.1 christos 2:
1311 1.1 christos faligndata %f62, %f0, %f38
1312 1.1 christos inc BLOCK_SIZE, %o1
1313 1.1 christos faligndata %f0, %f2, %f40
1314 1.1 christos inc BLOCK_SIZE, %o0
1315 1.1 christos faligndata %f2, %f4, %f42
1316 1.1 christos faligndata %f4, %f6, %f44
1317 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1318 1.1 christos faligndata %f6, %f8, %f46
1319 1.1 christos
1320 1.1 christos stda %f32, [%o1] ASI_STORE
1321 1.1 christos ba 3b
1322 1.1 christos inc BLOCK_SIZE, %o1
1323 1.1 christos
1324 1.1 christos
1325 1.1 christos !!
1326 1.1 christos !! Source at BLOCK_ALIGN+48
1327 1.1 christos !!
1328 1.1 christos !! We need to load 2 doubles by hand.
1329 1.1 christos !!
1330 1.1 christos L106:
1331 1.1 christos #ifdef RETURN_NAME
1332 1.1 christos sethi %hi(1f), %g1
1333 1.1 christos ba,pt %icc, 2f
1334 1.1 christos or %g1, %lo(1f), %g1
1335 1.1 christos 1:
1336 1.1 christos .asciz "L106"
1337 1.1 christos .align 8
1338 1.1 christos 2:
1339 1.1 christos #endif
1340 1.1 christos fmovd %f0, %f10
1341 1.1 christos ldd [%o0], %f12
1342 1.1 christos inc 8, %o0
1343 1.1 christos ldd [%o0], %f14
1344 1.1 christos inc 8, %o0
1345 1.1 christos
1346 1.1 christos cmp %o0, %o5
1347 1.1 christos bleu,a,pn %icc, 2f
1348 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1349 1.1 christos membar #Sync
1350 1.1 christos 2:
1351 1.1 christos inc BLOCK_SIZE, %o0
1352 1.1 christos 3:
1353 1.1 christos faligndata %f10, %f12, %f32
1354 1.1 christos cmp %o0, %o5
1355 1.1 christos faligndata %f12, %f14, %f34
1356 1.1 christos bleu,a,pn %icc, 2f
1357 1.1 christos ldda [%o0] ASI_BLK_P, %f48
1358 1.1 christos membar #Sync
1359 1.1 christos 2:
1360 1.1 christos faligndata %f14, %f16, %f36
1361 1.1 christos dec BLOCK_SIZE, %o2
1362 1.1 christos faligndata %f16, %f18, %f38
1363 1.1 christos inc BLOCK_SIZE, %o0
1364 1.1 christos faligndata %f18, %f20, %f40
1365 1.1 christos faligndata %f20, %f22, %f42
1366 1.1 christos faligndata %f22, %f24, %f44
1367 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1368 1.1 christos faligndata %f24, %f26, %f46
1369 1.1 christos
1370 1.1 christos stda %f32, [%o1] ASI_STORE
1371 1.1 christos
1372 1.1 christos faligndata %f26, %f28, %f32
1373 1.1 christos cmp %o0, %o5
1374 1.1 christos faligndata %f28, %f30, %f34
1375 1.1 christos bleu,a,pn %icc, 2f
1376 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1377 1.1 christos membar #Sync
1378 1.1 christos 2:
1379 1.1 christos faligndata %f30, %f48, %f36
1380 1.1 christos dec BLOCK_SIZE, %o2
1381 1.1 christos faligndata %f48, %f50, %f38
1382 1.1 christos inc BLOCK_SIZE, %o1
1383 1.1 christos faligndata %f50, %f52, %f40
1384 1.1 christos faligndata %f52, %f54, %f42
1385 1.1 christos inc BLOCK_SIZE, %o0
1386 1.1 christos faligndata %f54, %f56, %f44
1387 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1388 1.1 christos faligndata %f56, %f58, %f46
1389 1.1 christos
1390 1.1 christos stda %f32, [%o1] ASI_STORE
1391 1.1 christos
1392 1.1 christos faligndata %f58, %f60, %f32
1393 1.1 christos cmp %o0, %o5
1394 1.1 christos faligndata %f60, %f62, %f34
1395 1.1 christos bleu,a,pn %icc, 2f
1396 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1397 1.1 christos membar #Sync
1398 1.1 christos 2:
1399 1.1 christos faligndata %f62, %f0, %f36
1400 1.1 christos dec BLOCK_SIZE, %o2
1401 1.1 christos faligndata %f0, %f2, %f38
1402 1.1 christos inc BLOCK_SIZE, %o1
1403 1.1 christos faligndata %f2, %f4, %f40
1404 1.1 christos faligndata %f4, %f6, %f42
1405 1.1 christos inc BLOCK_SIZE, %o0
1406 1.1 christos faligndata %f6, %f8, %f44
1407 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1408 1.1 christos faligndata %f8, %f10, %f46
1409 1.1 christos
1410 1.1 christos stda %f32, [%o1] ASI_STORE
1411 1.1 christos ba 3b
1412 1.1 christos inc BLOCK_SIZE, %o1
1413 1.1 christos
1414 1.1 christos
1415 1.1 christos !!
1416 1.1 christos !! Source at BLOCK_ALIGN+56
1417 1.1 christos !!
1418 1.1 christos !! We need to load 1 double by hand.
1419 1.1 christos !!
1420 1.1 christos L107:
1421 1.1 christos #ifdef RETURN_NAME
1422 1.1 christos sethi %hi(1f), %g1
1423 1.1 christos ba,pt %icc, 2f
1424 1.1 christos or %g1, %lo(1f), %g1
1425 1.1 christos 1:
1426 1.1 christos .asciz "L107"
1427 1.1 christos .align 8
1428 1.1 christos 2:
1429 1.1 christos #endif
1430 1.1 christos fmovd %f0, %f12
1431 1.1 christos ldd [%o0], %f14
1432 1.1 christos inc 8, %o0
1433 1.1 christos
1434 1.1 christos cmp %o0, %o5
1435 1.1 christos bleu,a,pn %icc, 2f
1436 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1437 1.1 christos membar #Sync
1438 1.1 christos 2:
1439 1.1 christos inc BLOCK_SIZE, %o0
1440 1.1 christos 3:
1441 1.1 christos faligndata %f12, %f14, %f32
1442 1.1 christos cmp %o0, %o5
1443 1.1 christos bleu,a,pn %icc, 2f
1444 1.1 christos ldda [%o0] ASI_BLK_P, %f48
1445 1.1 christos membar #Sync
1446 1.1 christos 2:
1447 1.1 christos faligndata %f14, %f16, %f34
1448 1.1 christos dec BLOCK_SIZE, %o2
1449 1.1 christos faligndata %f16, %f18, %f36
1450 1.1 christos inc BLOCK_SIZE, %o0
1451 1.1 christos faligndata %f18, %f20, %f38
1452 1.1 christos faligndata %f20, %f22, %f40
1453 1.1 christos faligndata %f22, %f24, %f42
1454 1.1 christos faligndata %f24, %f26, %f44
1455 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1456 1.1 christos faligndata %f26, %f28, %f46
1457 1.1 christos
1458 1.1 christos stda %f32, [%o1] ASI_STORE
1459 1.1 christos
1460 1.1 christos faligndata %f28, %f30, %f32
1461 1.1 christos cmp %o0, %o5
1462 1.1 christos bleu,a,pn %icc, 2f
1463 1.1 christos ldda [%o0] ASI_BLK_P, %f0
1464 1.1 christos membar #Sync
1465 1.1 christos 2:
1466 1.1 christos faligndata %f30, %f48, %f34
1467 1.1 christos dec BLOCK_SIZE, %o2
1468 1.1 christos faligndata %f48, %f50, %f36
1469 1.1 christos inc BLOCK_SIZE, %o1
1470 1.1 christos faligndata %f50, %f52, %f38
1471 1.1 christos faligndata %f52, %f54, %f40
1472 1.1 christos inc BLOCK_SIZE, %o0
1473 1.1 christos faligndata %f54, %f56, %f42
1474 1.1 christos faligndata %f56, %f58, %f44
1475 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1476 1.1 christos faligndata %f58, %f60, %f46
1477 1.1 christos
1478 1.1 christos stda %f32, [%o1] ASI_STORE
1479 1.1 christos
1480 1.1 christos faligndata %f60, %f62, %f32
1481 1.1 christos cmp %o0, %o5
1482 1.1 christos bleu,a,pn %icc, 2f
1483 1.1 christos ldda [%o0] ASI_BLK_P, %f16
1484 1.1 christos membar #Sync
1485 1.1 christos 2:
1486 1.1 christos faligndata %f62, %f0, %f34
1487 1.1 christos dec BLOCK_SIZE, %o2
1488 1.1 christos faligndata %f0, %f2, %f36
1489 1.1 christos inc BLOCK_SIZE, %o1
1490 1.1 christos faligndata %f2, %f4, %f38
1491 1.1 christos faligndata %f4, %f6, %f40
1492 1.1 christos inc BLOCK_SIZE, %o0
1493 1.1 christos faligndata %f6, %f8, %f42
1494 1.1 christos faligndata %f8, %f10, %f44
1495 1.1 christos
1496 1.1 christos brlez,pn %o2, Lmemcpy_blockdone
1497 1.1 christos faligndata %f10, %f12, %f46
1498 1.1 christos
1499 1.1 christos stda %f32, [%o1] ASI_STORE
1500 1.1 christos ba 3b
1501 1.1 christos inc BLOCK_SIZE, %o1
1502 1.1 christos
1503 1.1 christos Lmemcpy_blockdone:
1504 1.1 christos inc BLOCK_SIZE, %o2 ! Fixup our overcommit
1505 1.1 christos membar #Sync ! Finish any pending loads
1506 1.1 christos #define FINISH_REG(f) \
1507 1.1 christos deccc 8, %o2; \
1508 1.1 christos bl,a Lmemcpy_blockfinish; \
1509 1.1 christos fmovd f, %f48; \
1510 1.1 christos std f, [%o1]; \
1511 1.1 christos inc 8, %o1
1512 1.1 christos
1513 1.1 christos FINISH_REG(%f32)
1514 1.1 christos FINISH_REG(%f34)
1515 1.1 christos FINISH_REG(%f36)
1516 1.1 christos FINISH_REG(%f38)
1517 1.1 christos FINISH_REG(%f40)
1518 1.1 christos FINISH_REG(%f42)
1519 1.1 christos FINISH_REG(%f44)
1520 1.1 christos FINISH_REG(%f46)
1521 1.1 christos FINISH_REG(%f48)
1522 1.1 christos #undef FINISH_REG
1523 1.1 christos !!
1524 1.1 christos !! The low 3 bits have the sub-word bits needed to be
1525 1.1 christos !! stored [because (x-8)&0x7 == x].
1526 1.1 christos !!
1527 1.1 christos Lmemcpy_blockfinish:
1528 1.1 christos brz,pn %o2, 2f ! 100% complete?
1529 1.1 christos fmovd %f48, %f4
1530 1.1 christos cmp %o2, 8 ! Exactly 8 bytes?
1531 1.1 christos bz,a,pn CCCR, 2f
1532 1.1 christos std %f4, [%o1]
1533 1.1 christos
1534 1.1 christos btst 4, %o2 ! Word store?
1535 1.1 christos bz CCCR, 1f
1536 1.1 christos nop
1537 1.1 christos st %f4, [%o1]
1538 1.1 christos inc 4, %o1
1539 1.1 christos 1:
1540 1.1 christos btst 2, %o2
1541 1.1 christos fzero %f0
1542 1.1 christos bz 1f
1543 1.1 christos
1544 1.1 christos mov -6, %o4
1545 1.1 christos alignaddr %o1, %o4, %g0
1546 1.1 christos
1547 1.1 christos faligndata %f0, %f4, %f8
1548 1.1 christos
1549 1.1 christos stda %f8, [%o1] ASI_FL16_P ! Store short
1550 1.1 christos inc 2, %o1
1551 1.1 christos 1:
1552 1.1 christos btst 1, %o2 ! Byte aligned?
1553 1.1 christos bz 2f
1554 1.1 christos
1555 1.1 christos mov -7, %o0 ! Calculate dest - 7
1556 1.1 christos alignaddr %o1, %o0, %g0 ! Calculate shift mask and dest.
1557 1.1 christos
1558 1.1 christos faligndata %f0, %f4, %f8 ! Move 1st byte to low part of f8
1559 1.1 christos
1560 1.1 christos stda %f8, [%o1] ASI_FL8_P ! Store 1st byte
1561 1.1 christos inc 1, %o1 ! Update address
1562 1.1 christos 2:
1563 1.1 christos membar #Sync
1564 1.1 christos #if 0
1565 1.1 christos !!
1566 1.1 christos !! verify copy success.
1567 1.1 christos !!
1568 1.1 christos
1569 1.1 christos mov %i0, %o2
1570 1.1 christos mov %i1, %o4
1571 1.1 christos mov %i2, %l4
1572 1.1 christos 0:
1573 1.1 christos ldub [%o2], %o1
1574 1.1 christos inc %o2
1575 1.1 christos ldub [%o4], %o3
1576 1.1 christos inc %o4
1577 1.1 christos cmp %o3, %o1
1578 1.1 christos bnz 1f
1579 1.1 christos dec %l4
1580 1.1 christos brnz %l4, 0b
1581 1.1 christos nop
1582 1.1 christos ba 2f
1583 1.1 christos nop
1584 1.1 christos
1585 1.1 christos 1:
1586 1.1 christos set block_disable, %o0
1587 1.1 christos stx %o0, [%o0]
1588 1.1 christos
1589 1.1 christos set 0f, %o0
1590 1.1 christos call prom_printf
1591 1.1 christos sub %i2, %l4, %o5
1592 1.1 christos set 1f, %o0
1593 1.1 christos mov %i0, %o2
1594 1.1 christos mov %i1, %o1
1595 1.1 christos call prom_printf
1596 1.1 christos mov %i2, %o3
1597 1.1 christos ta 1
1598 1.1 christos .data
1599 1.1 christos _ALIGN
1600 1.1 christos 0: .asciz "block memcpy failed: %x@%p != %x@%p byte %d\r\n"
1601 1.1 christos 1: .asciz "memcpy(%p, %p, %lx)\r\n"
1602 1.1 christos _ALIGN
1603 1.1 christos .text
1604 1.1 christos 2:
1605 1.1 christos #endif
1606 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL)
1607 1.1 christos
1608 1.1 christos /*
1609 1.1 christos * Weve saved our possible fpstate, now disable the fpu
1610 1.1 christos * and continue with life.
1611 1.1 christos */
1612 1.1 christos RESTORE_FPU
1613 1.1 christos ret
1614 1.1 christos restore %g1, 0, %o0 ! Return DEST for memcpy
1615 1.1 christos #endif
1616 1.1 christos retl
1617 1.1 christos mov %g1, %o0
1618 1.1 christos /*
1619 1.1 christos * Use block_disable to turn off block insns for
1620 1.1 christos * memcpy/memset
1621 1.1 christos */
1622 1.1 christos .data
1623 1.1 christos .align 8
1624 1.1 christos .globl block_disable
1625 1.1 christos block_disable: .xword 1
1626 1.1 christos .text
1627 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */
1628