Home | History | Annotate | Line # | Download | only in string
memcpy.S revision 1.2.6.2
      1  1.2.6.2  tls /*	$NetBSD: memcpy.S,v 1.2.6.2 2013/06/23 06:26:13 tls Exp $	*/
      2  1.2.6.2  tls 
      3  1.2.6.2  tls /*
      4  1.2.6.2  tls  * Copyright (c) 1996-2002 Eduardo Horvath
      5  1.2.6.2  tls  * All rights reserved.
      6  1.2.6.2  tls  *
      7  1.2.6.2  tls  * Redistribution and use in source and binary forms, with or without
      8  1.2.6.2  tls  * modification, are permitted provided that the following conditions
      9  1.2.6.2  tls  * are met:
     10  1.2.6.2  tls  * 1. Redistributions of source code must retain the above copyright
     11  1.2.6.2  tls  *    notice, this list of conditions and the following disclaimer.
     12  1.2.6.2  tls  *
     13  1.2.6.2  tls  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
     14  1.2.6.2  tls  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  1.2.6.2  tls  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     16  1.2.6.2  tls  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
     17  1.2.6.2  tls  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18  1.2.6.2  tls  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     19  1.2.6.2  tls  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     20  1.2.6.2  tls  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     21  1.2.6.2  tls  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     22  1.2.6.2  tls  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     23  1.2.6.2  tls  * SUCH DAMAGE.
     24  1.2.6.2  tls  *
     25  1.2.6.2  tls  */
     26  1.2.6.2  tls #include "strmacros.h"
     27  1.2.6.2  tls #if defined(LIBC_SCCS) && !defined(lint)
     28  1.2.6.2  tls RCSID("$NetBSD: memcpy.S,v 1.2.6.2 2013/06/23 06:26:13 tls Exp $")
     29  1.2.6.2  tls #endif  /* LIBC_SCCS and not lint */
     30  1.2.6.2  tls 
     31  1.2.6.2  tls /*
     32  1.2.6.2  tls  * memcpy
     33  1.2.6.2  tls  * Assumes regions do not overlap;
     34  1.2.6.2  tls  *
     35  1.2.6.2  tls  * Must not use %g7 (see copyin/copyout above).
     36  1.2.6.2  tls  */
     37  1.2.6.2  tls ENTRY(memcpy) /* dest, src, size */
     38  1.2.6.2  tls 	/*
     39  1.2.6.2  tls 	 * Swap args for bcopy.  Gcc generates calls to memcpy for
     40  1.2.6.2  tls 	 * structure assignments.
     41  1.2.6.2  tls 	 */
     42  1.2.6.2  tls 	mov	%o0, %o3
     43  1.2.6.2  tls 	mov	%o1, %o0
     44  1.2.6.2  tls 	mov	%o3, %o1
     45  1.2.6.2  tls #if !defined(_KERNEL) || defined(_RUMPKERNEL)
     46  1.2.6.2  tls ENTRY(bcopy) /* src, dest, size */
     47  1.2.6.2  tls #endif
     48  1.2.6.2  tls #ifdef DEBUG
     49  1.2.6.2  tls #if defined(_KERNEL) && !defined(_RUMPKERNEL)
     50  1.2.6.2  tls 	set	pmapdebug, %o4
     51  1.2.6.2  tls 	ld	[%o4], %o4
     52  1.2.6.2  tls 	btst	0x80, %o4	! PDB_COPY
     53  1.2.6.2  tls 	bz,pt	%icc, 3f
     54  1.2.6.2  tls 	 nop
     55  1.2.6.2  tls #endif
     56  1.2.6.2  tls 	save	%sp, -CC64FSZ, %sp
     57  1.2.6.2  tls 	mov	%i0, %o1
     58  1.2.6.2  tls 	set	2f, %o0
     59  1.2.6.2  tls 	mov	%i1, %o2
     60  1.2.6.2  tls 	call	printf
     61  1.2.6.2  tls 	 mov	%i2, %o3
     62  1.2.6.2  tls !	ta	1; nop
     63  1.2.6.2  tls 	restore
     64  1.2.6.2  tls 	.data
     65  1.2.6.2  tls 2:	.asciz	"memcpy(%p<-%p,%x)\n"
     66  1.2.6.2  tls 	_ALIGN
     67  1.2.6.2  tls 	.text
     68  1.2.6.2  tls 3:
     69  1.2.6.2  tls #endif
     70  1.2.6.2  tls 
     71  1.2.6.2  tls 	cmp	%o2, BCOPY_SMALL
     72  1.2.6.2  tls 
     73  1.2.6.2  tls Lmemcpy_start:
     74  1.2.6.2  tls 	bge,pt	CCCR, 2f	! if >= this many, go be fancy.
     75  1.2.6.2  tls 	 cmp	%o2, 256
     76  1.2.6.2  tls 
     77  1.2.6.2  tls 	mov	%o1, %o5	! Save memcpy return value
     78  1.2.6.2  tls 	/*
     79  1.2.6.2  tls 	 * Not much to copy, just do it a byte at a time.
     80  1.2.6.2  tls 	 */
     81  1.2.6.2  tls 	deccc	%o2		! while (--len >= 0)
     82  1.2.6.2  tls 	bl	1f
     83  1.2.6.2  tls 	 .empty
     84  1.2.6.2  tls 0:
     85  1.2.6.2  tls 	inc	%o0
     86  1.2.6.2  tls 	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
     87  1.2.6.2  tls 	stb	%o4, [%o1]
     88  1.2.6.2  tls 	deccc	%o2
     89  1.2.6.2  tls 	bge	0b
     90  1.2.6.2  tls 	 inc	%o1
     91  1.2.6.2  tls 1:
     92  1.2.6.2  tls 	retl
     93  1.2.6.2  tls 	 mov	%o5, %o0
     94  1.2.6.2  tls 	NOTREACHED
     95  1.2.6.2  tls 
     96  1.2.6.2  tls 	/*
     97  1.2.6.2  tls 	 * Plenty of data to copy, so try to do it optimally.
     98  1.2.6.2  tls 	 */
     99  1.2.6.2  tls 2:
    100  1.2.6.2  tls #ifdef USE_BLOCK_STORE_LOAD
    101  1.2.6.2  tls 	! If it is big enough, use VIS instructions
    102  1.2.6.2  tls 	bge	Lmemcpy_block
    103  1.2.6.2  tls 	 nop
    104  1.2.6.2  tls #endif /* USE_BLOCK_STORE_LOAD */
    105  1.2.6.2  tls Lmemcpy_fancy:
    106  1.2.6.2  tls 
    107  1.2.6.2  tls 	!!
    108  1.2.6.2  tls 	!! First align the output to a 8-byte entity
    109  1.2.6.2  tls 	!!
    110  1.2.6.2  tls 
    111  1.2.6.2  tls 	save	%sp, -CC64FSZ, %sp
    112  1.2.6.2  tls 
    113  1.2.6.2  tls 	mov	%i0, %l0
    114  1.2.6.2  tls 	mov	%i1, %l1
    115  1.2.6.2  tls 
    116  1.2.6.2  tls 	mov	%i2, %l2
    117  1.2.6.2  tls 	btst	1, %l1
    118  1.2.6.2  tls 
    119  1.2.6.2  tls 	bz,pt	%icc, 4f
    120  1.2.6.2  tls 	 btst	2, %l1
    121  1.2.6.2  tls 	ldub	[%l0], %l4				! Load 1st byte
    122  1.2.6.2  tls 
    123  1.2.6.2  tls 	deccc	1, %l2
    124  1.2.6.2  tls 	ble,pn	CCCR, Lmemcpy_finish			! XXXX
    125  1.2.6.2  tls 	 inc	1, %l0
    126  1.2.6.2  tls 
    127  1.2.6.2  tls 	stb	%l4, [%l1]				! Store 1st byte
    128  1.2.6.2  tls 	inc	1, %l1					! Update address
    129  1.2.6.2  tls 	btst	2, %l1
    130  1.2.6.2  tls 4:
    131  1.2.6.2  tls 	bz,pt	%icc, 4f
    132  1.2.6.2  tls 
    133  1.2.6.2  tls 	 btst	1, %l0
    134  1.2.6.2  tls 	bz,a	1f
    135  1.2.6.2  tls 	 lduh	[%l0], %l4				! Load short
    136  1.2.6.2  tls 
    137  1.2.6.2  tls 	ldub	[%l0], %l4				! Load bytes
    138  1.2.6.2  tls 
    139  1.2.6.2  tls 	ldub	[%l0+1], %l3
    140  1.2.6.2  tls 	sllx	%l4, 8, %l4
    141  1.2.6.2  tls 	or	%l3, %l4, %l4
    142  1.2.6.2  tls 
    143  1.2.6.2  tls 1:
    144  1.2.6.2  tls 	deccc	2, %l2
    145  1.2.6.2  tls 	ble,pn	CCCR, Lmemcpy_finish			! XXXX
    146  1.2.6.2  tls 	 inc	2, %l0
    147  1.2.6.2  tls 	sth	%l4, [%l1]				! Store 1st short
    148  1.2.6.2  tls 
    149  1.2.6.2  tls 	inc	2, %l1
    150  1.2.6.2  tls 4:
    151  1.2.6.2  tls 	btst	4, %l1
    152  1.2.6.2  tls 	bz,pt	CCCR, 4f
    153  1.2.6.2  tls 
    154  1.2.6.2  tls 	 btst	3, %l0
    155  1.2.6.2  tls 	bz,a,pt	CCCR, 1f
    156  1.2.6.2  tls 	 lduw	[%l0], %l4				! Load word -1
    157  1.2.6.2  tls 
    158  1.2.6.2  tls 	btst	1, %l0
    159  1.2.6.2  tls 	bz,a,pt	%icc, 2f
    160  1.2.6.2  tls 	 lduh	[%l0], %l4
    161  1.2.6.2  tls 
    162  1.2.6.2  tls 	ldub	[%l0], %l4
    163  1.2.6.2  tls 
    164  1.2.6.2  tls 	lduh	[%l0+1], %l3
    165  1.2.6.2  tls 	sllx	%l4, 16, %l4
    166  1.2.6.2  tls 	or	%l4, %l3, %l4
    167  1.2.6.2  tls 
    168  1.2.6.2  tls 	ldub	[%l0+3], %l3
    169  1.2.6.2  tls 	sllx	%l4, 8, %l4
    170  1.2.6.2  tls 	ba,pt	%icc, 1f
    171  1.2.6.2  tls 	 or	%l4, %l3, %l4
    172  1.2.6.2  tls 
    173  1.2.6.2  tls 2:
    174  1.2.6.2  tls 	lduh	[%l0+2], %l3
    175  1.2.6.2  tls 	sllx	%l4, 16, %l4
    176  1.2.6.2  tls 	or	%l4, %l3, %l4
    177  1.2.6.2  tls 
    178  1.2.6.2  tls 1:
    179  1.2.6.2  tls 	deccc	4, %l2
    180  1.2.6.2  tls 	ble,pn	CCCR, Lmemcpy_finish		! XXXX
    181  1.2.6.2  tls 	 inc	4, %l0
    182  1.2.6.2  tls 
    183  1.2.6.2  tls 	st	%l4, [%l1]				! Store word
    184  1.2.6.2  tls 	inc	4, %l1
    185  1.2.6.2  tls 4:
    186  1.2.6.2  tls 	!!
    187  1.2.6.2  tls 	!! We are now 32-bit aligned in the dest.
    188  1.2.6.2  tls 	!!
    189  1.2.6.2  tls Lmemcpy_common:
    190  1.2.6.2  tls 
    191  1.2.6.2  tls 	and	%l0, 7, %l4				! Shift amount
    192  1.2.6.2  tls 	andn	%l0, 7, %l0				! Source addr
    193  1.2.6.2  tls 
    194  1.2.6.2  tls 	brz,pt	%l4, Lmemcpy_noshift8			! No shift version...
    195  1.2.6.2  tls 
    196  1.2.6.2  tls 	 sllx	%l4, 3, %l4				! In bits
    197  1.2.6.2  tls 	mov	8<<3, %l3
    198  1.2.6.2  tls 
    199  1.2.6.2  tls 	ldx	[%l0], %o0				! Load word -1
    200  1.2.6.2  tls 	sub	%l3, %l4, %l3				! Reverse shift
    201  1.2.6.2  tls 	deccc	12*8, %l2				! Have enough room?
    202  1.2.6.2  tls 
    203  1.2.6.2  tls 	sllx	%o0, %l4, %o0
    204  1.2.6.2  tls 	bl,pn	CCCR, 2f
    205  1.2.6.2  tls 	 and	%l3, 0x38, %l3
    206  1.2.6.2  tls Lmemcpy_unrolled8:
    207  1.2.6.2  tls 
    208  1.2.6.2  tls 	/*
    209  1.2.6.2  tls 	 * This is about as close to optimal as you can get, since
    210  1.2.6.2  tls 	 * the shifts require EU0 and cannot be paired, and you have
    211  1.2.6.2  tls 	 * 3 dependent operations on the data.
    212  1.2.6.2  tls 	 */
    213  1.2.6.2  tls 
    214  1.2.6.2  tls !	ldx	[%l0+0*8], %o0				! Already done
    215  1.2.6.2  tls !	sllx	%o0, %l4, %o0				! Already done
    216  1.2.6.2  tls 	ldx	[%l0+1*8], %o1
    217  1.2.6.2  tls 	ldx	[%l0+2*8], %o2
    218  1.2.6.2  tls 	ldx	[%l0+3*8], %o3
    219  1.2.6.2  tls 	ldx	[%l0+4*8], %o4
    220  1.2.6.2  tls 	ba,pt	%icc, 1f
    221  1.2.6.2  tls 	 ldx	[%l0+5*8], %o5
    222  1.2.6.2  tls 	.align	8
    223  1.2.6.2  tls 1:
    224  1.2.6.2  tls 	srlx	%o1, %l3, %g1
    225  1.2.6.2  tls 	inc	6*8, %l0
    226  1.2.6.2  tls 
    227  1.2.6.2  tls 	sllx	%o1, %l4, %o1
    228  1.2.6.2  tls 	or	%g1, %o0, %g6
    229  1.2.6.2  tls 	ldx	[%l0+0*8], %o0
    230  1.2.6.2  tls 
    231  1.2.6.2  tls 	stx	%g6, [%l1+0*8]
    232  1.2.6.2  tls 	srlx	%o2, %l3, %g1
    233  1.2.6.2  tls 
    234  1.2.6.2  tls 	sllx	%o2, %l4, %o2
    235  1.2.6.2  tls 	or	%g1, %o1, %g6
    236  1.2.6.2  tls 	ldx	[%l0+1*8], %o1
    237  1.2.6.2  tls 
    238  1.2.6.2  tls 	stx	%g6, [%l1+1*8]
    239  1.2.6.2  tls 	srlx	%o3, %l3, %g1
    240  1.2.6.2  tls 
    241  1.2.6.2  tls 	sllx	%o3, %l4, %o3
    242  1.2.6.2  tls 	or	%g1, %o2, %g6
    243  1.2.6.2  tls 	ldx	[%l0+2*8], %o2
    244  1.2.6.2  tls 
    245  1.2.6.2  tls 	stx	%g6, [%l1+2*8]
    246  1.2.6.2  tls 	srlx	%o4, %l3, %g1
    247  1.2.6.2  tls 
    248  1.2.6.2  tls 	sllx	%o4, %l4, %o4
    249  1.2.6.2  tls 	or	%g1, %o3, %g6
    250  1.2.6.2  tls 	ldx	[%l0+3*8], %o3
    251  1.2.6.2  tls 
    252  1.2.6.2  tls 	stx	%g6, [%l1+3*8]
    253  1.2.6.2  tls 	srlx	%o5, %l3, %g1
    254  1.2.6.2  tls 
    255  1.2.6.2  tls 	sllx	%o5, %l4, %o5
    256  1.2.6.2  tls 	or	%g1, %o4, %g6
    257  1.2.6.2  tls 	ldx	[%l0+4*8], %o4
    258  1.2.6.2  tls 
    259  1.2.6.2  tls 	stx	%g6, [%l1+4*8]
    260  1.2.6.2  tls 	srlx	%o0, %l3, %g1
    261  1.2.6.2  tls 	deccc	6*8, %l2				! Have enough room?
    262  1.2.6.2  tls 
    263  1.2.6.2  tls 	sllx	%o0, %l4, %o0				! Next loop
    264  1.2.6.2  tls 	or	%g1, %o5, %g6
    265  1.2.6.2  tls 	ldx	[%l0+5*8], %o5
    266  1.2.6.2  tls 
    267  1.2.6.2  tls 	stx	%g6, [%l1+5*8]
    268  1.2.6.2  tls 	bge,pt	CCCR, 1b
    269  1.2.6.2  tls 	 inc	6*8, %l1
    270  1.2.6.2  tls 
    271  1.2.6.2  tls Lmemcpy_unrolled8_cleanup:
    272  1.2.6.2  tls 	!!
    273  1.2.6.2  tls 	!! Finished 8 byte block, unload the regs.
    274  1.2.6.2  tls 	!!
    275  1.2.6.2  tls 	srlx	%o1, %l3, %g1
    276  1.2.6.2  tls 	inc	5*8, %l0
    277  1.2.6.2  tls 
    278  1.2.6.2  tls 	sllx	%o1, %l4, %o1
    279  1.2.6.2  tls 	or	%g1, %o0, %g6
    280  1.2.6.2  tls 
    281  1.2.6.2  tls 	stx	%g6, [%l1+0*8]
    282  1.2.6.2  tls 	srlx	%o2, %l3, %g1
    283  1.2.6.2  tls 
    284  1.2.6.2  tls 	sllx	%o2, %l4, %o2
    285  1.2.6.2  tls 	or	%g1, %o1, %g6
    286  1.2.6.2  tls 
    287  1.2.6.2  tls 	stx	%g6, [%l1+1*8]
    288  1.2.6.2  tls 	srlx	%o3, %l3, %g1
    289  1.2.6.2  tls 
    290  1.2.6.2  tls 	sllx	%o3, %l4, %o3
    291  1.2.6.2  tls 	or	%g1, %o2, %g6
    292  1.2.6.2  tls 
    293  1.2.6.2  tls 	stx	%g6, [%l1+2*8]
    294  1.2.6.2  tls 	srlx	%o4, %l3, %g1
    295  1.2.6.2  tls 
    296  1.2.6.2  tls 	sllx	%o4, %l4, %o4
    297  1.2.6.2  tls 	or	%g1, %o3, %g6
    298  1.2.6.2  tls 
    299  1.2.6.2  tls 	stx	%g6, [%l1+3*8]
    300  1.2.6.2  tls 	srlx	%o5, %l3, %g1
    301  1.2.6.2  tls 
    302  1.2.6.2  tls 	sllx	%o5, %l4, %o5
    303  1.2.6.2  tls 	or	%g1, %o4, %g6
    304  1.2.6.2  tls 
    305  1.2.6.2  tls 	stx	%g6, [%l1+4*8]
    306  1.2.6.2  tls 	inc	5*8, %l1
    307  1.2.6.2  tls 
    308  1.2.6.2  tls 	mov	%o5, %o0				! Save our unused data
    309  1.2.6.2  tls 	dec	5*8, %l2
    310  1.2.6.2  tls 2:
    311  1.2.6.2  tls 	inccc	12*8, %l2
    312  1.2.6.2  tls 	bz,pn	%icc, Lmemcpy_complete
    313  1.2.6.2  tls 
    314  1.2.6.2  tls 	!! Unrolled 8 times
    315  1.2.6.2  tls Lmemcpy_aligned8:
    316  1.2.6.2  tls !	ldx	[%l0], %o0				! Already done
    317  1.2.6.2  tls !	sllx	%o0, %l4, %o0				! Shift high word
    318  1.2.6.2  tls 
    319  1.2.6.2  tls 	 deccc	8, %l2					! Pre-decrement
    320  1.2.6.2  tls 	bl,pn	CCCR, Lmemcpy_finish
    321  1.2.6.2  tls 1:
    322  1.2.6.2  tls 	ldx	[%l0+8], %o1				! Load word 0
    323  1.2.6.2  tls 	inc	8, %l0
    324  1.2.6.2  tls 
    325  1.2.6.2  tls 	srlx	%o1, %l3, %g6
    326  1.2.6.2  tls 	or	%g6, %o0, %g6				! Combine
    327  1.2.6.2  tls 
    328  1.2.6.2  tls 	stx	%g6, [%l1]				! Store result
    329  1.2.6.2  tls 	 inc	8, %l1
    330  1.2.6.2  tls 
    331  1.2.6.2  tls 	deccc	8, %l2
    332  1.2.6.2  tls 	bge,pn	CCCR, 1b
    333  1.2.6.2  tls 	 sllx	%o1, %l4, %o0
    334  1.2.6.2  tls 
    335  1.2.6.2  tls 	btst	7, %l2					! Done?
    336  1.2.6.2  tls 	bz,pt	CCCR, Lmemcpy_complete
    337  1.2.6.2  tls 
    338  1.2.6.2  tls 	!!
    339  1.2.6.2  tls 	!! Loadup the last dregs into %o0 and shift it into place
    340  1.2.6.2  tls 	!!
    341  1.2.6.2  tls 	 srlx	%l3, 3, %g6				! # bytes in %o0
    342  1.2.6.2  tls 	dec	8, %g6					!  - 8
    343  1.2.6.2  tls 	!! n-8 - (by - 8) -> n - by
    344  1.2.6.2  tls 	subcc	%l2, %g6, %g0				! # bytes we need
    345  1.2.6.2  tls 	ble,pt	%icc, Lmemcpy_finish
    346  1.2.6.2  tls 	 nop
    347  1.2.6.2  tls 	ldx	[%l0+8], %o1				! Need another word
    348  1.2.6.2  tls 	srlx	%o1, %l3, %o1
    349  1.2.6.2  tls 	ba,pt	%icc, Lmemcpy_finish
    350  1.2.6.2  tls 	 or	%o0, %o1, %o0				! All loaded up.
    351  1.2.6.2  tls 
    352  1.2.6.2  tls Lmemcpy_noshift8:
    353  1.2.6.2  tls 	deccc	6*8, %l2				! Have enough room?
    354  1.2.6.2  tls 	bl,pn	CCCR, 2f
    355  1.2.6.2  tls 	 nop
    356  1.2.6.2  tls 	ba,pt	%icc, 1f
    357  1.2.6.2  tls 	 nop
    358  1.2.6.2  tls 	.align	32
    359  1.2.6.2  tls 1:
    360  1.2.6.2  tls 	ldx	[%l0+0*8], %o0
    361  1.2.6.2  tls 	ldx	[%l0+1*8], %o1
    362  1.2.6.2  tls 	ldx	[%l0+2*8], %o2
    363  1.2.6.2  tls 	stx	%o0, [%l1+0*8]
    364  1.2.6.2  tls 	stx	%o1, [%l1+1*8]
    365  1.2.6.2  tls 	stx	%o2, [%l1+2*8]
    366  1.2.6.2  tls 
    367  1.2.6.2  tls 
    368  1.2.6.2  tls 	ldx	[%l0+3*8], %o3
    369  1.2.6.2  tls 	ldx	[%l0+4*8], %o4
    370  1.2.6.2  tls 	ldx	[%l0+5*8], %o5
    371  1.2.6.2  tls 	inc	6*8, %l0
    372  1.2.6.2  tls 	stx	%o3, [%l1+3*8]
    373  1.2.6.2  tls 	deccc	6*8, %l2
    374  1.2.6.2  tls 	stx	%o4, [%l1+4*8]
    375  1.2.6.2  tls 	stx	%o5, [%l1+5*8]
    376  1.2.6.2  tls 	bge,pt	CCCR, 1b
    377  1.2.6.2  tls 	 inc	6*8, %l1
    378  1.2.6.2  tls 2:
    379  1.2.6.2  tls 	inc	6*8, %l2
    380  1.2.6.2  tls 1:
    381  1.2.6.2  tls 	deccc	8, %l2
    382  1.2.6.2  tls 	bl,pn	%icc, 1f				! < 0 --> sub word
    383  1.2.6.2  tls 	 nop
    384  1.2.6.2  tls 	ldx	[%l0], %g6
    385  1.2.6.2  tls 	inc	8, %l0
    386  1.2.6.2  tls 	stx	%g6, [%l1]
    387  1.2.6.2  tls 	bg,pt	%icc, 1b				! Exactly 0 --> done
    388  1.2.6.2  tls 	 inc	8, %l1
    389  1.2.6.2  tls 1:
    390  1.2.6.2  tls 	btst	7, %l2					! Done?
    391  1.2.6.2  tls 	bz,pt	CCCR, Lmemcpy_complete
    392  1.2.6.2  tls 	 clr	%l4
    393  1.2.6.2  tls 	ldx	[%l0], %o0
    394  1.2.6.2  tls Lmemcpy_finish:
    395  1.2.6.2  tls 
    396  1.2.6.2  tls 	brz,pn	%l2, 2f					! 100% complete?
    397  1.2.6.2  tls 	 cmp	%l2, 8					! Exactly 8 bytes?
    398  1.2.6.2  tls 	bz,a,pn	CCCR, 2f
    399  1.2.6.2  tls 	 stx	%o0, [%l1]
    400  1.2.6.2  tls 
    401  1.2.6.2  tls 	btst	4, %l2					! Word store?
    402  1.2.6.2  tls 	bz	CCCR, 1f
    403  1.2.6.2  tls 	 srlx	%o0, 32, %g6				! Shift high word down
    404  1.2.6.2  tls 	stw	%g6, [%l1]
    405  1.2.6.2  tls 	inc	4, %l1
    406  1.2.6.2  tls 	mov	%o0, %g6				! Operate on the low bits
    407  1.2.6.2  tls 1:
    408  1.2.6.2  tls 	btst	2, %l2
    409  1.2.6.2  tls 	mov	%g6, %o0
    410  1.2.6.2  tls 	bz	1f
    411  1.2.6.2  tls 	 srlx	%o0, 16, %g6
    412  1.2.6.2  tls 
    413  1.2.6.2  tls 	sth	%g6, [%l1]				! Store short
    414  1.2.6.2  tls 	inc	2, %l1
    415  1.2.6.2  tls 	mov	%o0, %g6				! Operate on low bytes
    416  1.2.6.2  tls 1:
    417  1.2.6.2  tls 	mov	%g6, %o0
    418  1.2.6.2  tls 	btst	1, %l2					! Byte aligned?
    419  1.2.6.2  tls 	bz	2f
    420  1.2.6.2  tls 	 srlx	%o0, 8, %g6
    421  1.2.6.2  tls 
    422  1.2.6.2  tls 	stb	%g6, [%l1]				! Store last byte
    423  1.2.6.2  tls 	inc	1, %l1					! Update address
    424  1.2.6.2  tls 2:
    425  1.2.6.2  tls Lmemcpy_complete:
    426  1.2.6.2  tls #if 0
    427  1.2.6.2  tls 	!!
    428  1.2.6.2  tls 	!! verify copy success.
    429  1.2.6.2  tls 	!!
    430  1.2.6.2  tls 
    431  1.2.6.2  tls 	mov	%i0, %o2
    432  1.2.6.2  tls 	mov	%i1, %o4
    433  1.2.6.2  tls 	mov	%i2, %l4
    434  1.2.6.2  tls 0:
    435  1.2.6.2  tls 	ldub	[%o2], %o1
    436  1.2.6.2  tls 	inc	%o2
    437  1.2.6.2  tls 	ldub	[%o4], %o3
    438  1.2.6.2  tls 	inc	%o4
    439  1.2.6.2  tls 	cmp	%o3, %o1
    440  1.2.6.2  tls 	bnz	1f
    441  1.2.6.2  tls 	 dec	%l4
    442  1.2.6.2  tls 	brnz	%l4, 0b
    443  1.2.6.2  tls 	 nop
    444  1.2.6.2  tls 	ba	2f
    445  1.2.6.2  tls 	 nop
    446  1.2.6.2  tls 
    447  1.2.6.2  tls 1:
    448  1.2.6.2  tls 	set	0f, %o0
    449  1.2.6.2  tls 	call	printf
    450  1.2.6.2  tls 	 sub	%i2, %l4, %o5
    451  1.2.6.2  tls 	set	1f, %o0
    452  1.2.6.2  tls 	mov	%i0, %o2
    453  1.2.6.2  tls 	mov	%i1, %o1
    454  1.2.6.2  tls 	call	printf
    455  1.2.6.2  tls 	 mov	%i2, %o3
    456  1.2.6.2  tls 	ta	1
    457  1.2.6.2  tls 	.data
    458  1.2.6.2  tls 0:	.asciz	"memcpy failed: %x@%p != %x@%p byte %d\n"
    459  1.2.6.2  tls 1:	.asciz	"memcpy(%p, %p, %lx)\n"
    460  1.2.6.2  tls 	.align 8
    461  1.2.6.2  tls 	.text
    462  1.2.6.2  tls 2:
    463  1.2.6.2  tls #endif
    464  1.2.6.2  tls 	ret
    465  1.2.6.2  tls 	 restore %i1, %g0, %o0
    466  1.2.6.2  tls 
    467  1.2.6.2  tls #ifdef USE_BLOCK_STORE_LOAD
    468  1.2.6.2  tls 
    469  1.2.6.2  tls /*
    470  1.2.6.2  tls  * Block copy.  Useful for >256 byte copies.
    471  1.2.6.2  tls  *
    472  1.2.6.2  tls  * Benchmarking has shown this always seems to be slower than
    473  1.2.6.2  tls  * the integer version, so this is disabled.  Maybe someone will
    474  1.2.6.2  tls  * figure out why sometime.
    475  1.2.6.2  tls  */
    476  1.2.6.2  tls 
    477  1.2.6.2  tls Lmemcpy_block:
    478  1.2.6.2  tls 	sethi	%hi(block_disable), %o3
    479  1.2.6.2  tls 	ldx	[ %o3 + %lo(block_disable) ], %o3
    480  1.2.6.2  tls 	brnz,pn	%o3, Lmemcpy_fancy
    481  1.2.6.2  tls 	!! Make sure our trap table is installed
    482  1.2.6.2  tls 	set	_C_LABEL(trapbase), %o5
    483  1.2.6.2  tls 	rdpr	%tba, %o3
    484  1.2.6.2  tls 	sub	%o3, %o5, %o3
    485  1.2.6.2  tls 	brnz,pn	%o3, Lmemcpy_fancy	! No, then don't use block load/store
    486  1.2.6.2  tls 	 nop
    487  1.2.6.2  tls #if defined(_KERNEL) && !defined(_RUMPKERNEL)
    488  1.2.6.2  tls /*
    489  1.2.6.2  tls  * Kernel:
    490  1.2.6.2  tls  *
    491  1.2.6.2  tls  * Here we use VIS instructions to do a block clear of a page.
    492  1.2.6.2  tls  * But before we can do that we need to save and enable the FPU.
    493  1.2.6.2  tls  * The last owner of the FPU registers is fplwp, and
    494  1.2.6.2  tls  * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
    495  1.2.6.2  tls  * null, call savefpstate() with it to store our current fp state.
    496  1.2.6.2  tls  *
    497  1.2.6.2  tls  * Next, allocate an aligned fpstate on the stack.  We will properly
    498  1.2.6.2  tls  * nest calls on a particular stack so this should not be a problem.
    499  1.2.6.2  tls  *
    500  1.2.6.2  tls  * Now we grab either curlwp (or if we're on the interrupt stack
    501  1.2.6.2  tls  * lwp0).  We stash its existing fpstate in a local register and
    502  1.2.6.2  tls  * put our new fpstate in curlwp->p_md.md_fpstate.  We point
    503  1.2.6.2  tls  * fplwp at curlwp (or lwp0) and enable the FPU.
    504  1.2.6.2  tls  *
    505  1.2.6.2  tls  * If we are ever preempted, our FPU state will be saved in our
    506  1.2.6.2  tls  * fpstate.  Then, when we're resumed and we take an FPDISABLED
    507  1.2.6.2  tls  * trap, the trap handler will be able to fish our FPU state out
    508  1.2.6.2  tls  * of curlwp (or lwp0).
    509  1.2.6.2  tls  *
    510  1.2.6.2  tls  * On exiting this routine we undo the damage: restore the original
    511  1.2.6.2  tls  * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
    512  1.2.6.2  tls  * the MMU.
    513  1.2.6.2  tls  *
    514  1.2.6.2  tls  *
    515  1.2.6.2  tls  * Register usage, Kernel only (after save):
    516  1.2.6.2  tls  *
    517  1.2.6.2  tls  * %i0		src
    518  1.2.6.2  tls  * %i1		dest
    519  1.2.6.2  tls  * %i2		size
    520  1.2.6.2  tls  *
    521  1.2.6.2  tls  * %l0		XXXX DEBUG old fpstate
    522  1.2.6.2  tls  * %l1		fplwp (hi bits only)
    523  1.2.6.2  tls  * %l2		orig fplwp
    524  1.2.6.2  tls  * %l3		orig fpstate
    525  1.2.6.2  tls  * %l5		curlwp
    526  1.2.6.2  tls  * %l6		old fpstate
    527  1.2.6.2  tls  *
    528  1.2.6.2  tls  * Register ussage, Kernel and user:
    529  1.2.6.2  tls  *
    530  1.2.6.2  tls  * %g1		src (retval for memcpy)
    531  1.2.6.2  tls  *
    532  1.2.6.2  tls  * %o0		src
    533  1.2.6.2  tls  * %o1		dest
    534  1.2.6.2  tls  * %o2		end dest
    535  1.2.6.2  tls  * %o5		last safe fetchable address
    536  1.2.6.2  tls  */
    537  1.2.6.2  tls 
    538  1.2.6.2  tls 	ENABLE_FPU(0)
    539  1.2.6.2  tls 
    540  1.2.6.2  tls 	mov	%i0, %o0				! Src addr.
    541  1.2.6.2  tls 	mov	%i1, %o1				! Store our dest ptr here.
    542  1.2.6.2  tls 	mov	%i2, %o2				! Len counter
    543  1.2.6.2  tls #endif	/* _KERNEL */
    544  1.2.6.2  tls 
    545  1.2.6.2  tls 	!!
    546  1.2.6.2  tls 	!! First align the output to a 64-bit entity
    547  1.2.6.2  tls 	!!
    548  1.2.6.2  tls 
    549  1.2.6.2  tls 	mov	%o1, %g1				! memcpy retval
    550  1.2.6.2  tls 	add	%o0, %o2, %o5				! End of source block
    551  1.2.6.2  tls 
    552  1.2.6.2  tls 	andn	%o0, 7, %o3				! Start of block
    553  1.2.6.2  tls 	dec	%o5
    554  1.2.6.2  tls 	fzero	%f0
    555  1.2.6.2  tls 
    556  1.2.6.2  tls 	andn	%o5, BLOCK_ALIGN, %o5			! Last safe addr.
    557  1.2.6.2  tls 	ldd	[%o3], %f2				! Load 1st word
    558  1.2.6.2  tls 
    559  1.2.6.2  tls 	dec	8, %o3					! Move %o3 1 word back
    560  1.2.6.2  tls 	btst	1, %o1
    561  1.2.6.2  tls 	bz	4f
    562  1.2.6.2  tls 
    563  1.2.6.2  tls 	 mov	-7, %o4					! Lowest src addr possible
    564  1.2.6.2  tls 	alignaddr %o0, %o4, %o4				! Base addr for load.
    565  1.2.6.2  tls 
    566  1.2.6.2  tls 	cmp	%o3, %o4
    567  1.2.6.2  tls 	be,pt	CCCR, 1f				! Already loaded?
    568  1.2.6.2  tls 	 mov	%o4, %o3
    569  1.2.6.2  tls 	fmovd	%f2, %f0				! No. Shift
    570  1.2.6.2  tls 	ldd	[%o3+8], %f2				! And load
    571  1.2.6.2  tls 1:
    572  1.2.6.2  tls 
    573  1.2.6.2  tls 	faligndata	%f0, %f2, %f4			! Isolate 1st byte
    574  1.2.6.2  tls 
    575  1.2.6.2  tls 	stda	%f4, [%o1] ASI_FL8_P			! Store 1st byte
    576  1.2.6.2  tls 	inc	1, %o1					! Update address
    577  1.2.6.2  tls 	inc	1, %o0
    578  1.2.6.2  tls 	dec	1, %o2
    579  1.2.6.2  tls 4:
    580  1.2.6.2  tls 	btst	2, %o1
    581  1.2.6.2  tls 	bz	4f
    582  1.2.6.2  tls 
    583  1.2.6.2  tls 	 mov	-6, %o4					! Calculate src - 6
    584  1.2.6.2  tls 	alignaddr %o0, %o4, %o4				! calculate shift mask and dest.
    585  1.2.6.2  tls 
    586  1.2.6.2  tls 	cmp	%o3, %o4				! Addresses same?
    587  1.2.6.2  tls 	be,pt	CCCR, 1f
    588  1.2.6.2  tls 	 mov	%o4, %o3
    589  1.2.6.2  tls 	fmovd	%f2, %f0				! Shuffle data
    590  1.2.6.2  tls 	ldd	[%o3+8], %f2				! Load word 0
    591  1.2.6.2  tls 1:
    592  1.2.6.2  tls 	faligndata %f0, %f2, %f4			! Move 1st short low part of f8
    593  1.2.6.2  tls 
    594  1.2.6.2  tls 	stda	%f4, [%o1] ASI_FL16_P			! Store 1st short
    595  1.2.6.2  tls 	dec	2, %o2
    596  1.2.6.2  tls 	inc	2, %o1
    597  1.2.6.2  tls 	inc	2, %o0
    598  1.2.6.2  tls 4:
    599  1.2.6.2  tls 	brz,pn	%o2, Lmemcpy_blockfinish			! XXXX
    600  1.2.6.2  tls 
    601  1.2.6.2  tls 	 btst	4, %o1
    602  1.2.6.2  tls 	bz	4f
    603  1.2.6.2  tls 
    604  1.2.6.2  tls 	mov	-4, %o4
    605  1.2.6.2  tls 	alignaddr %o0, %o4, %o4				! calculate shift mask and dest.
    606  1.2.6.2  tls 
    607  1.2.6.2  tls 	cmp	%o3, %o4				! Addresses same?
    608  1.2.6.2  tls 	beq,pt	CCCR, 1f
    609  1.2.6.2  tls 	 mov	%o4, %o3
    610  1.2.6.2  tls 	fmovd	%f2, %f0				! Shuffle data
    611  1.2.6.2  tls 	ldd	[%o3+8], %f2				! Load word 0
    612  1.2.6.2  tls 1:
    613  1.2.6.2  tls 	faligndata %f0, %f2, %f4			! Move 1st short low part of f8
    614  1.2.6.2  tls 
    615  1.2.6.2  tls 	st	%f5, [%o1]				! Store word
    616  1.2.6.2  tls 	dec	4, %o2
    617  1.2.6.2  tls 	inc	4, %o1
    618  1.2.6.2  tls 	inc	4, %o0
    619  1.2.6.2  tls 4:
    620  1.2.6.2  tls 	brz,pn	%o2, Lmemcpy_blockfinish			! XXXX
    621  1.2.6.2  tls 	!!
    622  1.2.6.2  tls 	!! We are now 32-bit aligned in the dest.
    623  1.2.6.2  tls 	!!
    624  1.2.6.2  tls Lmemcpy_block_common:
    625  1.2.6.2  tls 
    626  1.2.6.2  tls 	 mov	-0, %o4
    627  1.2.6.2  tls 	alignaddr %o0, %o4, %o4				! base - shift
    628  1.2.6.2  tls 
    629  1.2.6.2  tls 	cmp	%o3, %o4				! Addresses same?
    630  1.2.6.2  tls 	beq,pt	CCCR, 1f
    631  1.2.6.2  tls 	 mov	%o4, %o3
    632  1.2.6.2  tls 	fmovd	%f2, %f0				! Shuffle data
    633  1.2.6.2  tls 	ldd	[%o3+8], %f2				! Load word 0
    634  1.2.6.2  tls 1:
    635  1.2.6.2  tls 	add	%o3, 8, %o0				! now use %o0 for src
    636  1.2.6.2  tls 
    637  1.2.6.2  tls 	!!
    638  1.2.6.2  tls 	!! Continue until our dest is block aligned
    639  1.2.6.2  tls 	!!
    640  1.2.6.2  tls Lmemcpy_block_aligned8:
    641  1.2.6.2  tls 1:
    642  1.2.6.2  tls 	brz	%o2, Lmemcpy_blockfinish
    643  1.2.6.2  tls 	 btst	BLOCK_ALIGN, %o1			! Block aligned?
    644  1.2.6.2  tls 	bz	1f
    645  1.2.6.2  tls 
    646  1.2.6.2  tls 	 faligndata %f0, %f2, %f4			! Generate result
    647  1.2.6.2  tls 	deccc	8, %o2
    648  1.2.6.2  tls 	ble,pn	%icc, Lmemcpy_blockfinish		! Should never happen
    649  1.2.6.2  tls 	 fmovd	%f4, %f48
    650  1.2.6.2  tls 
    651  1.2.6.2  tls 	std	%f4, [%o1]				! Store result
    652  1.2.6.2  tls 	inc	8, %o1
    653  1.2.6.2  tls 
    654  1.2.6.2  tls 	fmovd	%f2, %f0
    655  1.2.6.2  tls 	inc	8, %o0
    656  1.2.6.2  tls 	ba,pt	%xcc, 1b				! Not yet.
    657  1.2.6.2  tls 	 ldd	[%o0], %f2				! Load next part
    658  1.2.6.2  tls Lmemcpy_block_aligned64:
    659  1.2.6.2  tls 1:
    660  1.2.6.2  tls 
    661  1.2.6.2  tls /*
    662  1.2.6.2  tls  * 64-byte aligned -- ready for block operations.
    663  1.2.6.2  tls  *
    664  1.2.6.2  tls  * Here we have the destination block aligned, but the
    665  1.2.6.2  tls  * source pointer may not be.  Sub-word alignment will
    666  1.2.6.2  tls  * be handled by faligndata instructions.  But the source
    667  1.2.6.2  tls  * can still be potentially aligned to 8 different words
    668  1.2.6.2  tls  * in our 64-bit block, so we have 8 different copy routines.
    669  1.2.6.2  tls  *
    670  1.2.6.2  tls  * Once we figure out our source alignment, we branch
    671  1.2.6.2  tls  * to the appropriate copy routine, which sets up the
    672  1.2.6.2  tls  * alignment for faligndata and loads (sets) the values
    673  1.2.6.2  tls  * into the source registers and does the copy loop.
    674  1.2.6.2  tls  *
    675  1.2.6.2  tls  * When were down to less than 1 block to store, we
    676  1.2.6.2  tls  * exit the copy loop and execute cleanup code.
    677  1.2.6.2  tls  *
    678  1.2.6.2  tls  * Block loads and stores are not properly interlocked.
    679  1.2.6.2  tls  * Stores save one reg/cycle, so you can start overwriting
    680  1.2.6.2  tls  * registers the cycle after the store is issued.
    681  1.2.6.2  tls  *
    682  1.2.6.2  tls  * Block loads require a block load to a different register
    683  1.2.6.2  tls  * block or a membar #Sync before accessing the loaded
    684  1.2.6.2  tls  * data.
    685  1.2.6.2  tls  *
    686  1.2.6.2  tls  * Since the faligndata instructions may be offset as far
    687  1.2.6.2  tls  * as 7 registers into a block (if you are shifting source
    688  1.2.6.2  tls  * 7 -> dest 0), you need 3 source register blocks for full
    689  1.2.6.2  tls  * performance: one you are copying, one you are loading,
    690  1.2.6.2  tls  * and one for interlocking.  Otherwise, we would need to
    691  1.2.6.2  tls  * sprinkle the code with membar #Sync and lose the advantage
    692  1.2.6.2  tls  * of running faligndata in parallel with block stores.  This
    693  1.2.6.2  tls  * means we are fetching a full 128 bytes ahead of the stores.
    694  1.2.6.2  tls  * We need to make sure the prefetch does not inadvertently
    695  1.2.6.2  tls  * cross a page boundary and fault on data that we will never
    696  1.2.6.2  tls  * store.
    697  1.2.6.2  tls  *
    698  1.2.6.2  tls  */
    699  1.2.6.2  tls #if 1
    700  1.2.6.2  tls 	and	%o0, BLOCK_ALIGN, %o3
    701  1.2.6.2  tls 	srax	%o3, 3, %o3				! Isolate the offset
    702  1.2.6.2  tls 
    703  1.2.6.2  tls 	brz	%o3, L100				! 0->0
    704  1.2.6.2  tls 	 btst	4, %o3
    705  1.2.6.2  tls 	bnz	%xcc, 4f
    706  1.2.6.2  tls 	 btst	2, %o3
    707  1.2.6.2  tls 	bnz	%xcc, 2f
    708  1.2.6.2  tls 	 btst	1, %o3
    709  1.2.6.2  tls 	ba,pt	%xcc, L101				! 0->1
    710  1.2.6.2  tls 	 nop	/* XXX spitfire bug */
    711  1.2.6.2  tls 2:
    712  1.2.6.2  tls 	bz	%xcc, L102				! 0->2
    713  1.2.6.2  tls 	 nop
    714  1.2.6.2  tls 	ba,pt	%xcc, L103				! 0->3
    715  1.2.6.2  tls 	 nop	/* XXX spitfire bug */
    716  1.2.6.2  tls 4:
    717  1.2.6.2  tls 	bnz	%xcc, 2f
    718  1.2.6.2  tls 	 btst	1, %o3
    719  1.2.6.2  tls 	bz	%xcc, L104				! 0->4
    720  1.2.6.2  tls 	 nop
    721  1.2.6.2  tls 	ba,pt	%xcc, L105				! 0->5
    722  1.2.6.2  tls 	 nop	/* XXX spitfire bug */
    723  1.2.6.2  tls 2:
    724  1.2.6.2  tls 	bz	%xcc, L106				! 0->6
    725  1.2.6.2  tls 	 nop
    726  1.2.6.2  tls 	ba,pt	%xcc, L107				! 0->7
    727  1.2.6.2  tls 	 nop	/* XXX spitfire bug */
    728  1.2.6.2  tls #else
    729  1.2.6.2  tls 
    730  1.2.6.2  tls 	!!
    731  1.2.6.2  tls 	!! Isolate the word offset, which just happens to be
    732  1.2.6.2  tls 	!! the slot in our jump table.
    733  1.2.6.2  tls 	!!
    734  1.2.6.2  tls 	!! This is 6 insns, most of which cannot be paired,
    735  1.2.6.2  tls 	!! which is about the same as the above version.
    736  1.2.6.2  tls 	!!
    737  1.2.6.2  tls 	rd	%pc, %o4
    738  1.2.6.2  tls 1:
    739  1.2.6.2  tls 	and	%o0, 0x31, %o3
    740  1.2.6.2  tls 	add	%o3, (Lmemcpy_block_jmp - 1b), %o3
    741  1.2.6.2  tls 	jmpl	%o4 + %o3, %g0
    742  1.2.6.2  tls 	 nop
    743  1.2.6.2  tls 
    744  1.2.6.2  tls 	!!
    745  1.2.6.2  tls 	!! Jump table
    746  1.2.6.2  tls 	!!
    747  1.2.6.2  tls 
    748  1.2.6.2  tls Lmemcpy_block_jmp:
    749  1.2.6.2  tls 	ba,a,pt	%xcc, L100
    750  1.2.6.2  tls 	 nop
    751  1.2.6.2  tls 	ba,a,pt	%xcc, L101
    752  1.2.6.2  tls 	 nop
    753  1.2.6.2  tls 	ba,a,pt	%xcc, L102
    754  1.2.6.2  tls 	 nop
    755  1.2.6.2  tls 	ba,a,pt	%xcc, L103
    756  1.2.6.2  tls 	 nop
    757  1.2.6.2  tls 	ba,a,pt	%xcc, L104
    758  1.2.6.2  tls 	 nop
    759  1.2.6.2  tls 	ba,a,pt	%xcc, L105
    760  1.2.6.2  tls 	 nop
    761  1.2.6.2  tls 	ba,a,pt	%xcc, L106
    762  1.2.6.2  tls 	 nop
    763  1.2.6.2  tls 	ba,a,pt	%xcc, L107
    764  1.2.6.2  tls 	 nop
    765  1.2.6.2  tls #endif
    766  1.2.6.2  tls 
    767  1.2.6.2  tls 	!!
    768  1.2.6.2  tls 	!! Source is block aligned.
    769  1.2.6.2  tls 	!!
    770  1.2.6.2  tls 	!! Just load a block and go.
    771  1.2.6.2  tls 	!!
    772  1.2.6.2  tls L100:
    773  1.2.6.2  tls #ifdef RETURN_NAME
    774  1.2.6.2  tls 	sethi	%hi(1f), %g1
    775  1.2.6.2  tls 	ba,pt	%icc, 2f
    776  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
    777  1.2.6.2  tls 1:
    778  1.2.6.2  tls 	.asciz	"L100"
    779  1.2.6.2  tls 	.align	8
    780  1.2.6.2  tls 2:
    781  1.2.6.2  tls #endif
    782  1.2.6.2  tls 	fmovd	%f0 , %f62
    783  1.2.6.2  tls 	ldda	[%o0] ASI_BLK_P, %f0
    784  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    785  1.2.6.2  tls 	cmp	%o0, %o5
    786  1.2.6.2  tls 	bleu,a,pn	%icc, 3f
    787  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
    788  1.2.6.2  tls 	ba,pt	%icc, 3f
    789  1.2.6.2  tls 	 membar #Sync
    790  1.2.6.2  tls 
    791  1.2.6.2  tls 	.align	32					! ICache align.
    792  1.2.6.2  tls 3:
    793  1.2.6.2  tls 	faligndata	%f62, %f0, %f32
    794  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    795  1.2.6.2  tls 	faligndata	%f0, %f2, %f34
    796  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    797  1.2.6.2  tls 	faligndata	%f2, %f4, %f36
    798  1.2.6.2  tls 	cmp	%o0, %o5
    799  1.2.6.2  tls 	faligndata	%f4, %f6, %f38
    800  1.2.6.2  tls 	faligndata	%f6, %f8, %f40
    801  1.2.6.2  tls 	faligndata	%f8, %f10, %f42
    802  1.2.6.2  tls 	faligndata	%f10, %f12, %f44
    803  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    804  1.2.6.2  tls 	 faligndata	%f12, %f14, %f46
    805  1.2.6.2  tls 
    806  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    807  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
    808  1.2.6.2  tls 	membar	#Sync
    809  1.2.6.2  tls 2:
    810  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    811  1.2.6.2  tls 	faligndata	%f14, %f16, %f32
    812  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    813  1.2.6.2  tls 	faligndata	%f16, %f18, %f34
    814  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
    815  1.2.6.2  tls 	faligndata	%f18, %f20, %f36
    816  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    817  1.2.6.2  tls 	faligndata	%f20, %f22, %f38
    818  1.2.6.2  tls 	cmp	%o0, %o5
    819  1.2.6.2  tls 	faligndata	%f22, %f24, %f40
    820  1.2.6.2  tls 	faligndata	%f24, %f26, %f42
    821  1.2.6.2  tls 	faligndata	%f26, %f28, %f44
    822  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    823  1.2.6.2  tls 	 faligndata	%f28, %f30, %f46
    824  1.2.6.2  tls 
    825  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    826  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
    827  1.2.6.2  tls 	membar	#Sync
    828  1.2.6.2  tls 2:
    829  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    830  1.2.6.2  tls 	faligndata	%f30, %f48, %f32
    831  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    832  1.2.6.2  tls 	faligndata	%f48, %f50, %f34
    833  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
    834  1.2.6.2  tls 	faligndata	%f50, %f52, %f36
    835  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    836  1.2.6.2  tls 	faligndata	%f52, %f54, %f38
    837  1.2.6.2  tls 	cmp	%o0, %o5
    838  1.2.6.2  tls 	faligndata	%f54, %f56, %f40
    839  1.2.6.2  tls 	faligndata	%f56, %f58, %f42
    840  1.2.6.2  tls 	faligndata	%f58, %f60, %f44
    841  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    842  1.2.6.2  tls 	 faligndata	%f60, %f62, %f46
    843  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    844  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16			! Increment is at top
    845  1.2.6.2  tls 	membar	#Sync
    846  1.2.6.2  tls 2:
    847  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    848  1.2.6.2  tls 	ba	3b
    849  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
    850  1.2.6.2  tls 
    851  1.2.6.2  tls 	!!
    852  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+8
    853  1.2.6.2  tls 	!!
    854  1.2.6.2  tls 	!! We need to load almost 1 complete block by hand.
    855  1.2.6.2  tls 	!!
    856  1.2.6.2  tls L101:
    857  1.2.6.2  tls #ifdef RETURN_NAME
    858  1.2.6.2  tls 	sethi	%hi(1f), %g1
    859  1.2.6.2  tls 	ba,pt	%icc, 2f
    860  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
    861  1.2.6.2  tls 1:
    862  1.2.6.2  tls 	.asciz	"L101"
    863  1.2.6.2  tls 	.align	8
    864  1.2.6.2  tls 2:
    865  1.2.6.2  tls #endif
    866  1.2.6.2  tls !	fmovd	%f0, %f0				! Hoist fmovd
    867  1.2.6.2  tls 	ldd	[%o0], %f2
    868  1.2.6.2  tls 	inc	8, %o0
    869  1.2.6.2  tls 	ldd	[%o0], %f4
    870  1.2.6.2  tls 	inc	8, %o0
    871  1.2.6.2  tls 	ldd	[%o0], %f6
    872  1.2.6.2  tls 	inc	8, %o0
    873  1.2.6.2  tls 	ldd	[%o0], %f8
    874  1.2.6.2  tls 	inc	8, %o0
    875  1.2.6.2  tls 	ldd	[%o0], %f10
    876  1.2.6.2  tls 	inc	8, %o0
    877  1.2.6.2  tls 	ldd	[%o0], %f12
    878  1.2.6.2  tls 	inc	8, %o0
    879  1.2.6.2  tls 	ldd	[%o0], %f14
    880  1.2.6.2  tls 	inc	8, %o0
    881  1.2.6.2  tls 
    882  1.2.6.2  tls 	cmp	%o0, %o5
    883  1.2.6.2  tls 	bleu,a,pn	%icc, 3f
    884  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
    885  1.2.6.2  tls 	membar #Sync
    886  1.2.6.2  tls 3:
    887  1.2.6.2  tls 	faligndata	%f0, %f2, %f32
    888  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    889  1.2.6.2  tls 	faligndata	%f2, %f4, %f34
    890  1.2.6.2  tls 	cmp	%o0, %o5
    891  1.2.6.2  tls 	faligndata	%f4, %f6, %f36
    892  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    893  1.2.6.2  tls 	faligndata	%f6, %f8, %f38
    894  1.2.6.2  tls 	faligndata	%f8, %f10, %f40
    895  1.2.6.2  tls 	faligndata	%f10, %f12, %f42
    896  1.2.6.2  tls 	faligndata	%f12, %f14, %f44
    897  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    898  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
    899  1.2.6.2  tls 	membar	#Sync
    900  1.2.6.2  tls 2:
    901  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    902  1.2.6.2  tls 	 faligndata	%f14, %f16, %f46
    903  1.2.6.2  tls 
    904  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    905  1.2.6.2  tls 
    906  1.2.6.2  tls 	faligndata	%f16, %f18, %f32
    907  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    908  1.2.6.2  tls 	faligndata	%f18, %f20, %f34
    909  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
    910  1.2.6.2  tls 	faligndata	%f20, %f22, %f36
    911  1.2.6.2  tls 	cmp	%o0, %o5
    912  1.2.6.2  tls 	faligndata	%f22, %f24, %f38
    913  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    914  1.2.6.2  tls 	faligndata	%f24, %f26, %f40
    915  1.2.6.2  tls 	faligndata	%f26, %f28, %f42
    916  1.2.6.2  tls 	faligndata	%f28, %f30, %f44
    917  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    918  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
    919  1.2.6.2  tls 	membar	#Sync
    920  1.2.6.2  tls 2:
    921  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    922  1.2.6.2  tls 	 faligndata	%f30, %f48, %f46
    923  1.2.6.2  tls 
    924  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    925  1.2.6.2  tls 
    926  1.2.6.2  tls 	faligndata	%f48, %f50, %f32
    927  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    928  1.2.6.2  tls 	faligndata	%f50, %f52, %f34
    929  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
    930  1.2.6.2  tls 	faligndata	%f52, %f54, %f36
    931  1.2.6.2  tls 	cmp	%o0, %o5
    932  1.2.6.2  tls 	faligndata	%f54, %f56, %f38
    933  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    934  1.2.6.2  tls 	faligndata	%f56, %f58, %f40
    935  1.2.6.2  tls 	faligndata	%f58, %f60, %f42
    936  1.2.6.2  tls 	faligndata	%f60, %f62, %f44
    937  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    938  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
    939  1.2.6.2  tls 	membar	#Sync
    940  1.2.6.2  tls 2:
    941  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    942  1.2.6.2  tls 	 faligndata	%f62, %f0, %f46
    943  1.2.6.2  tls 
    944  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
    945  1.2.6.2  tls 	ba	3b
    946  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
    947  1.2.6.2  tls 
    948  1.2.6.2  tls 	!!
    949  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+16
    950  1.2.6.2  tls 	!!
    951  1.2.6.2  tls 	!! We need to load 6 doubles by hand.
    952  1.2.6.2  tls 	!!
    953  1.2.6.2  tls L102:
    954  1.2.6.2  tls #ifdef RETURN_NAME
    955  1.2.6.2  tls 	sethi	%hi(1f), %g1
    956  1.2.6.2  tls 	ba,pt	%icc, 2f
    957  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
    958  1.2.6.2  tls 1:
    959  1.2.6.2  tls 	.asciz	"L102"
    960  1.2.6.2  tls 	.align	8
    961  1.2.6.2  tls 2:
    962  1.2.6.2  tls #endif
    963  1.2.6.2  tls 	ldd	[%o0], %f4
    964  1.2.6.2  tls 	inc	8, %o0
    965  1.2.6.2  tls 	fmovd	%f0, %f2				! Hoist fmovd
    966  1.2.6.2  tls 	ldd	[%o0], %f6
    967  1.2.6.2  tls 	inc	8, %o0
    968  1.2.6.2  tls 
    969  1.2.6.2  tls 	ldd	[%o0], %f8
    970  1.2.6.2  tls 	inc	8, %o0
    971  1.2.6.2  tls 	ldd	[%o0], %f10
    972  1.2.6.2  tls 	inc	8, %o0
    973  1.2.6.2  tls 	ldd	[%o0], %f12
    974  1.2.6.2  tls 	inc	8, %o0
    975  1.2.6.2  tls 	ldd	[%o0], %f14
    976  1.2.6.2  tls 	inc	8, %o0
    977  1.2.6.2  tls 
    978  1.2.6.2  tls 	cmp	%o0, %o5
    979  1.2.6.2  tls 	bleu,a,pn	%icc, 3f
    980  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
    981  1.2.6.2  tls 	membar #Sync
    982  1.2.6.2  tls 3:
    983  1.2.6.2  tls 	faligndata	%f2, %f4, %f32
    984  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
    985  1.2.6.2  tls 	faligndata	%f4, %f6, %f34
    986  1.2.6.2  tls 	cmp	%o0, %o5
    987  1.2.6.2  tls 	faligndata	%f6, %f8, %f36
    988  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
    989  1.2.6.2  tls 	faligndata	%f8, %f10, %f38
    990  1.2.6.2  tls 	faligndata	%f10, %f12, %f40
    991  1.2.6.2  tls 	faligndata	%f12, %f14, %f42
    992  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
    993  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
    994  1.2.6.2  tls 	membar	#Sync
    995  1.2.6.2  tls 2:
    996  1.2.6.2  tls 	faligndata	%f14, %f16, %f44
    997  1.2.6.2  tls 
    998  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
    999  1.2.6.2  tls 	 faligndata	%f16, %f18, %f46
   1000  1.2.6.2  tls 
   1001  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1002  1.2.6.2  tls 
   1003  1.2.6.2  tls 	faligndata	%f18, %f20, %f32
   1004  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1005  1.2.6.2  tls 	faligndata	%f20, %f22, %f34
   1006  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1007  1.2.6.2  tls 	faligndata	%f22, %f24, %f36
   1008  1.2.6.2  tls 	cmp	%o0, %o5
   1009  1.2.6.2  tls 	faligndata	%f24, %f26, %f38
   1010  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1011  1.2.6.2  tls 	faligndata	%f26, %f28, %f40
   1012  1.2.6.2  tls 	faligndata	%f28, %f30, %f42
   1013  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1014  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1015  1.2.6.2  tls 	membar	#Sync
   1016  1.2.6.2  tls 2:
   1017  1.2.6.2  tls 	faligndata	%f30, %f48, %f44
   1018  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1019  1.2.6.2  tls 	 faligndata	%f48, %f50, %f46
   1020  1.2.6.2  tls 
   1021  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1022  1.2.6.2  tls 
   1023  1.2.6.2  tls 	faligndata	%f50, %f52, %f32
   1024  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1025  1.2.6.2  tls 	faligndata	%f52, %f54, %f34
   1026  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1027  1.2.6.2  tls 	faligndata	%f54, %f56, %f36
   1028  1.2.6.2  tls 	cmp	%o0, %o5
   1029  1.2.6.2  tls 	faligndata	%f56, %f58, %f38
   1030  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1031  1.2.6.2  tls 	faligndata	%f58, %f60, %f40
   1032  1.2.6.2  tls 	faligndata	%f60, %f62, %f42
   1033  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1034  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1035  1.2.6.2  tls 	membar	#Sync
   1036  1.2.6.2  tls 2:
   1037  1.2.6.2  tls 	faligndata	%f62, %f0, %f44
   1038  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1039  1.2.6.2  tls 	 faligndata	%f0, %f2, %f46
   1040  1.2.6.2  tls 
   1041  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1042  1.2.6.2  tls 	ba	3b
   1043  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1044  1.2.6.2  tls 
   1045  1.2.6.2  tls 	!!
   1046  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+24
   1047  1.2.6.2  tls 	!!
   1048  1.2.6.2  tls 	!! We need to load 5 doubles by hand.
   1049  1.2.6.2  tls 	!!
   1050  1.2.6.2  tls L103:
   1051  1.2.6.2  tls #ifdef RETURN_NAME
   1052  1.2.6.2  tls 	sethi	%hi(1f), %g1
   1053  1.2.6.2  tls 	ba,pt	%icc, 2f
   1054  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
   1055  1.2.6.2  tls 1:
   1056  1.2.6.2  tls 	.asciz	"L103"
   1057  1.2.6.2  tls 	.align	8
   1058  1.2.6.2  tls 2:
   1059  1.2.6.2  tls #endif
   1060  1.2.6.2  tls 	fmovd	%f0, %f4
   1061  1.2.6.2  tls 	ldd	[%o0], %f6
   1062  1.2.6.2  tls 	inc	8, %o0
   1063  1.2.6.2  tls 	ldd	[%o0], %f8
   1064  1.2.6.2  tls 	inc	8, %o0
   1065  1.2.6.2  tls 	ldd	[%o0], %f10
   1066  1.2.6.2  tls 	inc	8, %o0
   1067  1.2.6.2  tls 	ldd	[%o0], %f12
   1068  1.2.6.2  tls 	inc	8, %o0
   1069  1.2.6.2  tls 	ldd	[%o0], %f14
   1070  1.2.6.2  tls 	inc	8, %o0
   1071  1.2.6.2  tls 
   1072  1.2.6.2  tls 	cmp	%o0, %o5
   1073  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1074  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1075  1.2.6.2  tls 	membar #Sync
   1076  1.2.6.2  tls 2:
   1077  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1078  1.2.6.2  tls 3:
   1079  1.2.6.2  tls 	faligndata	%f4, %f6, %f32
   1080  1.2.6.2  tls 	cmp	%o0, %o5
   1081  1.2.6.2  tls 	faligndata	%f6, %f8, %f34
   1082  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1083  1.2.6.2  tls 	faligndata	%f8, %f10, %f36
   1084  1.2.6.2  tls 	faligndata	%f10, %f12, %f38
   1085  1.2.6.2  tls 	faligndata	%f12, %f14, %f40
   1086  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1087  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
   1088  1.2.6.2  tls 	membar	#Sync
   1089  1.2.6.2  tls 2:
   1090  1.2.6.2  tls 	faligndata	%f14, %f16, %f42
   1091  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1092  1.2.6.2  tls 	faligndata	%f16, %f18, %f44
   1093  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1094  1.2.6.2  tls 	 faligndata	%f18, %f20, %f46
   1095  1.2.6.2  tls 
   1096  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1097  1.2.6.2  tls 
   1098  1.2.6.2  tls 	faligndata	%f20, %f22, %f32
   1099  1.2.6.2  tls 	cmp	%o0, %o5
   1100  1.2.6.2  tls 	faligndata	%f22, %f24, %f34
   1101  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1102  1.2.6.2  tls 	faligndata	%f24, %f26, %f36
   1103  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1104  1.2.6.2  tls 	faligndata	%f26, %f28, %f38
   1105  1.2.6.2  tls 	faligndata	%f28, %f30, %f40
   1106  1.2.6.2  tls 	ble,a,pn	%icc, 2f
   1107  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1108  1.2.6.2  tls 	membar	#Sync
   1109  1.2.6.2  tls 2:
   1110  1.2.6.2  tls 	faligndata	%f30, %f48, %f42
   1111  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1112  1.2.6.2  tls 	faligndata	%f48, %f50, %f44
   1113  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1114  1.2.6.2  tls 	 faligndata	%f50, %f52, %f46
   1115  1.2.6.2  tls 
   1116  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1117  1.2.6.2  tls 
   1118  1.2.6.2  tls 	faligndata	%f52, %f54, %f32
   1119  1.2.6.2  tls 	cmp	%o0, %o5
   1120  1.2.6.2  tls 	faligndata	%f54, %f56, %f34
   1121  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1122  1.2.6.2  tls 	faligndata	%f56, %f58, %f36
   1123  1.2.6.2  tls 	faligndata	%f58, %f60, %f38
   1124  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1125  1.2.6.2  tls 	faligndata	%f60, %f62, %f40
   1126  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1127  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1128  1.2.6.2  tls 	membar	#Sync
   1129  1.2.6.2  tls 2:
   1130  1.2.6.2  tls 	faligndata	%f62, %f0, %f42
   1131  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1132  1.2.6.2  tls 	faligndata	%f0, %f2, %f44
   1133  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1134  1.2.6.2  tls 	 faligndata	%f2, %f4, %f46
   1135  1.2.6.2  tls 
   1136  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1137  1.2.6.2  tls 	ba	3b
   1138  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1139  1.2.6.2  tls 
   1140  1.2.6.2  tls 	!!
   1141  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+32
   1142  1.2.6.2  tls 	!!
   1143  1.2.6.2  tls 	!! We need to load 4 doubles by hand.
   1144  1.2.6.2  tls 	!!
   1145  1.2.6.2  tls L104:
   1146  1.2.6.2  tls #ifdef RETURN_NAME
   1147  1.2.6.2  tls 	sethi	%hi(1f), %g1
   1148  1.2.6.2  tls 	ba,pt	%icc, 2f
   1149  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
   1150  1.2.6.2  tls 1:
   1151  1.2.6.2  tls 	.asciz	"L104"
   1152  1.2.6.2  tls 	.align	8
   1153  1.2.6.2  tls 2:
   1154  1.2.6.2  tls #endif
   1155  1.2.6.2  tls 	fmovd	%f0, %f6
   1156  1.2.6.2  tls 	ldd	[%o0], %f8
   1157  1.2.6.2  tls 	inc	8, %o0
   1158  1.2.6.2  tls 	ldd	[%o0], %f10
   1159  1.2.6.2  tls 	inc	8, %o0
   1160  1.2.6.2  tls 	ldd	[%o0], %f12
   1161  1.2.6.2  tls 	inc	8, %o0
   1162  1.2.6.2  tls 	ldd	[%o0], %f14
   1163  1.2.6.2  tls 	inc	8, %o0
   1164  1.2.6.2  tls 
   1165  1.2.6.2  tls 	cmp	%o0, %o5
   1166  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1167  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1168  1.2.6.2  tls 	membar #Sync
   1169  1.2.6.2  tls 2:
   1170  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1171  1.2.6.2  tls 3:
   1172  1.2.6.2  tls 	faligndata	%f6, %f8, %f32
   1173  1.2.6.2  tls 	cmp	%o0, %o5
   1174  1.2.6.2  tls 	faligndata	%f8, %f10, %f34
   1175  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1176  1.2.6.2  tls 	faligndata	%f10, %f12, %f36
   1177  1.2.6.2  tls 	faligndata	%f12, %f14, %f38
   1178  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1179  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
   1180  1.2.6.2  tls 	membar	#Sync
   1181  1.2.6.2  tls 2:
   1182  1.2.6.2  tls 	faligndata	%f14, %f16, %f40
   1183  1.2.6.2  tls 	faligndata	%f16, %f18, %f42
   1184  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1185  1.2.6.2  tls 	faligndata	%f18, %f20, %f44
   1186  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1187  1.2.6.2  tls 	 faligndata	%f20, %f22, %f46
   1188  1.2.6.2  tls 
   1189  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1190  1.2.6.2  tls 
   1191  1.2.6.2  tls 	faligndata	%f22, %f24, %f32
   1192  1.2.6.2  tls 	cmp	%o0, %o5
   1193  1.2.6.2  tls 	faligndata	%f24, %f26, %f34
   1194  1.2.6.2  tls 	faligndata	%f26, %f28, %f36
   1195  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1196  1.2.6.2  tls 	faligndata	%f28, %f30, %f38
   1197  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1198  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1199  1.2.6.2  tls 	membar	#Sync
   1200  1.2.6.2  tls 2:
   1201  1.2.6.2  tls 	faligndata	%f30, %f48, %f40
   1202  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1203  1.2.6.2  tls 	faligndata	%f48, %f50, %f42
   1204  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1205  1.2.6.2  tls 	faligndata	%f50, %f52, %f44
   1206  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1207  1.2.6.2  tls 	 faligndata	%f52, %f54, %f46
   1208  1.2.6.2  tls 
   1209  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1210  1.2.6.2  tls 
   1211  1.2.6.2  tls 	faligndata	%f54, %f56, %f32
   1212  1.2.6.2  tls 	cmp	%o0, %o5
   1213  1.2.6.2  tls 	faligndata	%f56, %f58, %f34
   1214  1.2.6.2  tls 	faligndata	%f58, %f60, %f36
   1215  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1216  1.2.6.2  tls 	faligndata	%f60, %f62, %f38
   1217  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1218  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1219  1.2.6.2  tls 	membar	#Sync
   1220  1.2.6.2  tls 2:
   1221  1.2.6.2  tls 	faligndata	%f62, %f0, %f40
   1222  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1223  1.2.6.2  tls 	faligndata	%f0, %f2, %f42
   1224  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1225  1.2.6.2  tls 	faligndata	%f2, %f4, %f44
   1226  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1227  1.2.6.2  tls 	 faligndata	%f4, %f6, %f46
   1228  1.2.6.2  tls 
   1229  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1230  1.2.6.2  tls 	ba	3b
   1231  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1232  1.2.6.2  tls 
   1233  1.2.6.2  tls 	!!
   1234  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+40
   1235  1.2.6.2  tls 	!!
   1236  1.2.6.2  tls 	!! We need to load 3 doubles by hand.
   1237  1.2.6.2  tls 	!!
   1238  1.2.6.2  tls L105:
   1239  1.2.6.2  tls #ifdef RETURN_NAME
   1240  1.2.6.2  tls 	sethi	%hi(1f), %g1
   1241  1.2.6.2  tls 	ba,pt	%icc, 2f
   1242  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
   1243  1.2.6.2  tls 1:
   1244  1.2.6.2  tls 	.asciz	"L105"
   1245  1.2.6.2  tls 	.align	8
   1246  1.2.6.2  tls 2:
   1247  1.2.6.2  tls #endif
   1248  1.2.6.2  tls 	fmovd	%f0, %f8
   1249  1.2.6.2  tls 	ldd	[%o0], %f10
   1250  1.2.6.2  tls 	inc	8, %o0
   1251  1.2.6.2  tls 	ldd	[%o0], %f12
   1252  1.2.6.2  tls 	inc	8, %o0
   1253  1.2.6.2  tls 	ldd	[%o0], %f14
   1254  1.2.6.2  tls 	inc	8, %o0
   1255  1.2.6.2  tls 
   1256  1.2.6.2  tls 	cmp	%o0, %o5
   1257  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1258  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1259  1.2.6.2  tls 	membar #Sync
   1260  1.2.6.2  tls 2:
   1261  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1262  1.2.6.2  tls 3:
   1263  1.2.6.2  tls 	faligndata	%f8, %f10, %f32
   1264  1.2.6.2  tls 	cmp	%o0, %o5
   1265  1.2.6.2  tls 	faligndata	%f10, %f12, %f34
   1266  1.2.6.2  tls 	faligndata	%f12, %f14, %f36
   1267  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1268  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
   1269  1.2.6.2  tls 	membar	#Sync
   1270  1.2.6.2  tls 2:
   1271  1.2.6.2  tls 	faligndata	%f14, %f16, %f38
   1272  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1273  1.2.6.2  tls 	faligndata	%f16, %f18, %f40
   1274  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1275  1.2.6.2  tls 	faligndata	%f18, %f20, %f42
   1276  1.2.6.2  tls 	faligndata	%f20, %f22, %f44
   1277  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1278  1.2.6.2  tls 	 faligndata	%f22, %f24, %f46
   1279  1.2.6.2  tls 
   1280  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1281  1.2.6.2  tls 
   1282  1.2.6.2  tls 	faligndata	%f24, %f26, %f32
   1283  1.2.6.2  tls 	cmp	%o0, %o5
   1284  1.2.6.2  tls 	faligndata	%f26, %f28, %f34
   1285  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1286  1.2.6.2  tls 	faligndata	%f28, %f30, %f36
   1287  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1288  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1289  1.2.6.2  tls 	membar	#Sync
   1290  1.2.6.2  tls 2:
   1291  1.2.6.2  tls 	faligndata	%f30, %f48, %f38
   1292  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1293  1.2.6.2  tls 	faligndata	%f48, %f50, %f40
   1294  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1295  1.2.6.2  tls 	faligndata	%f50, %f52, %f42
   1296  1.2.6.2  tls 	faligndata	%f52, %f54, %f44
   1297  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1298  1.2.6.2  tls 	 faligndata	%f54, %f56, %f46
   1299  1.2.6.2  tls 
   1300  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1301  1.2.6.2  tls 
   1302  1.2.6.2  tls 	faligndata	%f56, %f58, %f32
   1303  1.2.6.2  tls 	cmp	%o0, %o5
   1304  1.2.6.2  tls 	faligndata	%f58, %f60, %f34
   1305  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1306  1.2.6.2  tls 	faligndata	%f60, %f62, %f36
   1307  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1308  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1309  1.2.6.2  tls 	membar	#Sync
   1310  1.2.6.2  tls 2:
   1311  1.2.6.2  tls 	faligndata	%f62, %f0, %f38
   1312  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1313  1.2.6.2  tls 	faligndata	%f0, %f2, %f40
   1314  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1315  1.2.6.2  tls 	faligndata	%f2, %f4, %f42
   1316  1.2.6.2  tls 	faligndata	%f4, %f6, %f44
   1317  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1318  1.2.6.2  tls 	 faligndata	%f6, %f8, %f46
   1319  1.2.6.2  tls 
   1320  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1321  1.2.6.2  tls 	ba	3b
   1322  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1323  1.2.6.2  tls 
   1324  1.2.6.2  tls 
   1325  1.2.6.2  tls 	!!
   1326  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+48
   1327  1.2.6.2  tls 	!!
   1328  1.2.6.2  tls 	!! We need to load 2 doubles by hand.
   1329  1.2.6.2  tls 	!!
   1330  1.2.6.2  tls L106:
   1331  1.2.6.2  tls #ifdef RETURN_NAME
   1332  1.2.6.2  tls 	sethi	%hi(1f), %g1
   1333  1.2.6.2  tls 	ba,pt	%icc, 2f
   1334  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
   1335  1.2.6.2  tls 1:
   1336  1.2.6.2  tls 	.asciz	"L106"
   1337  1.2.6.2  tls 	.align	8
   1338  1.2.6.2  tls 2:
   1339  1.2.6.2  tls #endif
   1340  1.2.6.2  tls 	fmovd	%f0, %f10
   1341  1.2.6.2  tls 	ldd	[%o0], %f12
   1342  1.2.6.2  tls 	inc	8, %o0
   1343  1.2.6.2  tls 	ldd	[%o0], %f14
   1344  1.2.6.2  tls 	inc	8, %o0
   1345  1.2.6.2  tls 
   1346  1.2.6.2  tls 	cmp	%o0, %o5
   1347  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1348  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1349  1.2.6.2  tls 	membar #Sync
   1350  1.2.6.2  tls 2:
   1351  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1352  1.2.6.2  tls 3:
   1353  1.2.6.2  tls 	faligndata	%f10, %f12, %f32
   1354  1.2.6.2  tls 	cmp	%o0, %o5
   1355  1.2.6.2  tls 	faligndata	%f12, %f14, %f34
   1356  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1357  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
   1358  1.2.6.2  tls 	membar	#Sync
   1359  1.2.6.2  tls 2:
   1360  1.2.6.2  tls 	faligndata	%f14, %f16, %f36
   1361  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1362  1.2.6.2  tls 	faligndata	%f16, %f18, %f38
   1363  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1364  1.2.6.2  tls 	faligndata	%f18, %f20, %f40
   1365  1.2.6.2  tls 	faligndata	%f20, %f22, %f42
   1366  1.2.6.2  tls 	faligndata	%f22, %f24, %f44
   1367  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1368  1.2.6.2  tls 	 faligndata	%f24, %f26, %f46
   1369  1.2.6.2  tls 
   1370  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1371  1.2.6.2  tls 
   1372  1.2.6.2  tls 	faligndata	%f26, %f28, %f32
   1373  1.2.6.2  tls 	cmp	%o0, %o5
   1374  1.2.6.2  tls 	faligndata	%f28, %f30, %f34
   1375  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1376  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1377  1.2.6.2  tls 	membar	#Sync
   1378  1.2.6.2  tls 2:
   1379  1.2.6.2  tls 	faligndata	%f30, %f48, %f36
   1380  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1381  1.2.6.2  tls 	faligndata	%f48, %f50, %f38
   1382  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1383  1.2.6.2  tls 	faligndata	%f50, %f52, %f40
   1384  1.2.6.2  tls 	faligndata	%f52, %f54, %f42
   1385  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1386  1.2.6.2  tls 	faligndata	%f54, %f56, %f44
   1387  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1388  1.2.6.2  tls 	 faligndata	%f56, %f58, %f46
   1389  1.2.6.2  tls 
   1390  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1391  1.2.6.2  tls 
   1392  1.2.6.2  tls 	faligndata	%f58, %f60, %f32
   1393  1.2.6.2  tls 	cmp	%o0, %o5
   1394  1.2.6.2  tls 	faligndata	%f60, %f62, %f34
   1395  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1396  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1397  1.2.6.2  tls 	membar	#Sync
   1398  1.2.6.2  tls 2:
   1399  1.2.6.2  tls 	faligndata	%f62, %f0, %f36
   1400  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1401  1.2.6.2  tls 	faligndata	%f0, %f2, %f38
   1402  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1403  1.2.6.2  tls 	faligndata	%f2, %f4, %f40
   1404  1.2.6.2  tls 	faligndata	%f4, %f6, %f42
   1405  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1406  1.2.6.2  tls 	faligndata	%f6, %f8, %f44
   1407  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1408  1.2.6.2  tls 	 faligndata	%f8, %f10, %f46
   1409  1.2.6.2  tls 
   1410  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1411  1.2.6.2  tls 	ba	3b
   1412  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1413  1.2.6.2  tls 
   1414  1.2.6.2  tls 
   1415  1.2.6.2  tls 	!!
   1416  1.2.6.2  tls 	!! Source at BLOCK_ALIGN+56
   1417  1.2.6.2  tls 	!!
   1418  1.2.6.2  tls 	!! We need to load 1 double by hand.
   1419  1.2.6.2  tls 	!!
   1420  1.2.6.2  tls L107:
   1421  1.2.6.2  tls #ifdef RETURN_NAME
   1422  1.2.6.2  tls 	sethi	%hi(1f), %g1
   1423  1.2.6.2  tls 	ba,pt	%icc, 2f
   1424  1.2.6.2  tls 	 or	%g1, %lo(1f), %g1
   1425  1.2.6.2  tls 1:
   1426  1.2.6.2  tls 	.asciz	"L107"
   1427  1.2.6.2  tls 	.align	8
   1428  1.2.6.2  tls 2:
   1429  1.2.6.2  tls #endif
   1430  1.2.6.2  tls 	fmovd	%f0, %f12
   1431  1.2.6.2  tls 	ldd	[%o0], %f14
   1432  1.2.6.2  tls 	inc	8, %o0
   1433  1.2.6.2  tls 
   1434  1.2.6.2  tls 	cmp	%o0, %o5
   1435  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1436  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1437  1.2.6.2  tls 	membar #Sync
   1438  1.2.6.2  tls 2:
   1439  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1440  1.2.6.2  tls 3:
   1441  1.2.6.2  tls 	faligndata	%f12, %f14, %f32
   1442  1.2.6.2  tls 	cmp	%o0, %o5
   1443  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1444  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f48
   1445  1.2.6.2  tls 	membar	#Sync
   1446  1.2.6.2  tls 2:
   1447  1.2.6.2  tls 	faligndata	%f14, %f16, %f34
   1448  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1449  1.2.6.2  tls 	faligndata	%f16, %f18, %f36
   1450  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1451  1.2.6.2  tls 	faligndata	%f18, %f20, %f38
   1452  1.2.6.2  tls 	faligndata	%f20, %f22, %f40
   1453  1.2.6.2  tls 	faligndata	%f22, %f24, %f42
   1454  1.2.6.2  tls 	faligndata	%f24, %f26, %f44
   1455  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1456  1.2.6.2  tls 	 faligndata	%f26, %f28, %f46
   1457  1.2.6.2  tls 
   1458  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1459  1.2.6.2  tls 
   1460  1.2.6.2  tls 	faligndata	%f28, %f30, %f32
   1461  1.2.6.2  tls 	cmp	%o0, %o5
   1462  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1463  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f0
   1464  1.2.6.2  tls 	membar	#Sync
   1465  1.2.6.2  tls 2:
   1466  1.2.6.2  tls 	faligndata	%f30, %f48, %f34
   1467  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1468  1.2.6.2  tls 	faligndata	%f48, %f50, %f36
   1469  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1470  1.2.6.2  tls 	faligndata	%f50, %f52, %f38
   1471  1.2.6.2  tls 	faligndata	%f52, %f54, %f40
   1472  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1473  1.2.6.2  tls 	faligndata	%f54, %f56, %f42
   1474  1.2.6.2  tls 	faligndata	%f56, %f58, %f44
   1475  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1476  1.2.6.2  tls 	 faligndata	%f58, %f60, %f46
   1477  1.2.6.2  tls 
   1478  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1479  1.2.6.2  tls 
   1480  1.2.6.2  tls 	faligndata	%f60, %f62, %f32
   1481  1.2.6.2  tls 	cmp	%o0, %o5
   1482  1.2.6.2  tls 	bleu,a,pn	%icc, 2f
   1483  1.2.6.2  tls 	 ldda	[%o0] ASI_BLK_P, %f16
   1484  1.2.6.2  tls 	membar	#Sync
   1485  1.2.6.2  tls 2:
   1486  1.2.6.2  tls 	faligndata	%f62, %f0, %f34
   1487  1.2.6.2  tls 	dec	BLOCK_SIZE, %o2
   1488  1.2.6.2  tls 	faligndata	%f0, %f2, %f36
   1489  1.2.6.2  tls 	inc	BLOCK_SIZE, %o1
   1490  1.2.6.2  tls 	faligndata	%f2, %f4, %f38
   1491  1.2.6.2  tls 	faligndata	%f4, %f6, %f40
   1492  1.2.6.2  tls 	inc	BLOCK_SIZE, %o0
   1493  1.2.6.2  tls 	faligndata	%f6, %f8, %f42
   1494  1.2.6.2  tls 	faligndata	%f8, %f10, %f44
   1495  1.2.6.2  tls 
   1496  1.2.6.2  tls 	brlez,pn	%o2, Lmemcpy_blockdone
   1497  1.2.6.2  tls 	 faligndata	%f10, %f12, %f46
   1498  1.2.6.2  tls 
   1499  1.2.6.2  tls 	stda	%f32, [%o1] ASI_STORE
   1500  1.2.6.2  tls 	ba	3b
   1501  1.2.6.2  tls 	 inc	BLOCK_SIZE, %o1
   1502  1.2.6.2  tls 
   1503  1.2.6.2  tls Lmemcpy_blockdone:
   1504  1.2.6.2  tls 	inc	BLOCK_SIZE, %o2				! Fixup our overcommit
   1505  1.2.6.2  tls 	membar	#Sync					! Finish any pending loads
   1506  1.2.6.2  tls #define	FINISH_REG(f)				\
   1507  1.2.6.2  tls 	deccc	8, %o2;				\
   1508  1.2.6.2  tls 	bl,a	Lmemcpy_blockfinish;		\
   1509  1.2.6.2  tls 	 fmovd	f, %f48;			\
   1510  1.2.6.2  tls 	std	f, [%o1];			\
   1511  1.2.6.2  tls 	inc	8, %o1
   1512  1.2.6.2  tls 
   1513  1.2.6.2  tls 	FINISH_REG(%f32)
   1514  1.2.6.2  tls 	FINISH_REG(%f34)
   1515  1.2.6.2  tls 	FINISH_REG(%f36)
   1516  1.2.6.2  tls 	FINISH_REG(%f38)
   1517  1.2.6.2  tls 	FINISH_REG(%f40)
   1518  1.2.6.2  tls 	FINISH_REG(%f42)
   1519  1.2.6.2  tls 	FINISH_REG(%f44)
   1520  1.2.6.2  tls 	FINISH_REG(%f46)
   1521  1.2.6.2  tls 	FINISH_REG(%f48)
   1522  1.2.6.2  tls #undef FINISH_REG
   1523  1.2.6.2  tls 	!!
   1524  1.2.6.2  tls 	!! The low 3 bits have the sub-word bits needed to be
   1525  1.2.6.2  tls 	!! stored [because (x-8)&0x7 == x].
   1526  1.2.6.2  tls 	!!
   1527  1.2.6.2  tls Lmemcpy_blockfinish:
   1528  1.2.6.2  tls 	brz,pn	%o2, 2f					! 100% complete?
   1529  1.2.6.2  tls 	 fmovd	%f48, %f4
   1530  1.2.6.2  tls 	cmp	%o2, 8					! Exactly 8 bytes?
   1531  1.2.6.2  tls 	bz,a,pn	CCCR, 2f
   1532  1.2.6.2  tls 	 std	%f4, [%o1]
   1533  1.2.6.2  tls 
   1534  1.2.6.2  tls 	btst	4, %o2					! Word store?
   1535  1.2.6.2  tls 	bz	CCCR, 1f
   1536  1.2.6.2  tls 	 nop
   1537  1.2.6.2  tls 	st	%f4, [%o1]
   1538  1.2.6.2  tls 	inc	4, %o1
   1539  1.2.6.2  tls 1:
   1540  1.2.6.2  tls 	btst	2, %o2
   1541  1.2.6.2  tls 	fzero	%f0
   1542  1.2.6.2  tls 	bz	1f
   1543  1.2.6.2  tls 
   1544  1.2.6.2  tls 	 mov	-6, %o4
   1545  1.2.6.2  tls 	alignaddr %o1, %o4, %g0
   1546  1.2.6.2  tls 
   1547  1.2.6.2  tls 	faligndata %f0, %f4, %f8
   1548  1.2.6.2  tls 
   1549  1.2.6.2  tls 	stda	%f8, [%o1] ASI_FL16_P			! Store short
   1550  1.2.6.2  tls 	inc	2, %o1
   1551  1.2.6.2  tls 1:
   1552  1.2.6.2  tls 	btst	1, %o2					! Byte aligned?
   1553  1.2.6.2  tls 	bz	2f
   1554  1.2.6.2  tls 
   1555  1.2.6.2  tls 	 mov	-7, %o0					! Calculate dest - 7
   1556  1.2.6.2  tls 	alignaddr %o1, %o0, %g0				! Calculate shift mask and dest.
   1557  1.2.6.2  tls 
   1558  1.2.6.2  tls 	faligndata %f0, %f4, %f8			! Move 1st byte to low part of f8
   1559  1.2.6.2  tls 
   1560  1.2.6.2  tls 	stda	%f8, [%o1] ASI_FL8_P			! Store 1st byte
   1561  1.2.6.2  tls 	inc	1, %o1					! Update address
   1562  1.2.6.2  tls 2:
   1563  1.2.6.2  tls 	membar	#Sync
   1564  1.2.6.2  tls #if 0
   1565  1.2.6.2  tls 	!!
   1566  1.2.6.2  tls 	!! verify copy success.
   1567  1.2.6.2  tls 	!!
   1568  1.2.6.2  tls 
   1569  1.2.6.2  tls 	mov	%i0, %o2
   1570  1.2.6.2  tls 	mov	%i1, %o4
   1571  1.2.6.2  tls 	mov	%i2, %l4
   1572  1.2.6.2  tls 0:
   1573  1.2.6.2  tls 	ldub	[%o2], %o1
   1574  1.2.6.2  tls 	inc	%o2
   1575  1.2.6.2  tls 	ldub	[%o4], %o3
   1576  1.2.6.2  tls 	inc	%o4
   1577  1.2.6.2  tls 	cmp	%o3, %o1
   1578  1.2.6.2  tls 	bnz	1f
   1579  1.2.6.2  tls 	 dec	%l4
   1580  1.2.6.2  tls 	brnz	%l4, 0b
   1581  1.2.6.2  tls 	 nop
   1582  1.2.6.2  tls 	ba	2f
   1583  1.2.6.2  tls 	 nop
   1584  1.2.6.2  tls 
   1585  1.2.6.2  tls 1:
   1586  1.2.6.2  tls 	set	block_disable, %o0
   1587  1.2.6.2  tls 	stx	%o0, [%o0]
   1588  1.2.6.2  tls 
   1589  1.2.6.2  tls 	set	0f, %o0
   1590  1.2.6.2  tls 	call	prom_printf
   1591  1.2.6.2  tls 	 sub	%i2, %l4, %o5
   1592  1.2.6.2  tls 	set	1f, %o0
   1593  1.2.6.2  tls 	mov	%i0, %o2
   1594  1.2.6.2  tls 	mov	%i1, %o1
   1595  1.2.6.2  tls 	call	prom_printf
   1596  1.2.6.2  tls 	 mov	%i2, %o3
   1597  1.2.6.2  tls 	ta	1
   1598  1.2.6.2  tls 	.data
   1599  1.2.6.2  tls 	_ALIGN
   1600  1.2.6.2  tls 0:	.asciz	"block memcpy failed: %x@%p != %x@%p byte %d\r\n"
   1601  1.2.6.2  tls 1:	.asciz	"memcpy(%p, %p, %lx)\r\n"
   1602  1.2.6.2  tls 	_ALIGN
   1603  1.2.6.2  tls 	.text
   1604  1.2.6.2  tls 2:
   1605  1.2.6.2  tls #endif
   1606  1.2.6.2  tls #if defined(_KERNEL) && !defined(_RUMPKERNEL)
   1607  1.2.6.2  tls 
   1608  1.2.6.2  tls /*
   1609  1.2.6.2  tls  * Weve saved our possible fpstate, now disable the fpu
   1610  1.2.6.2  tls  * and continue with life.
   1611  1.2.6.2  tls  */
   1612  1.2.6.2  tls 	RESTORE_FPU
   1613  1.2.6.2  tls 	ret
   1614  1.2.6.2  tls 	 restore	%g1, 0, %o0			! Return DEST for memcpy
   1615  1.2.6.2  tls #endif
   1616  1.2.6.2  tls  	retl
   1617  1.2.6.2  tls 	 mov	%g1, %o0
   1618  1.2.6.2  tls /*
   1619  1.2.6.2  tls  * Use block_disable to turn off block insns for
   1620  1.2.6.2  tls  * memcpy/memset
   1621  1.2.6.2  tls  */
   1622  1.2.6.2  tls 	.data
   1623  1.2.6.2  tls 	.align	8
   1624  1.2.6.2  tls 	.globl	block_disable
   1625  1.2.6.2  tls block_disable:	.xword	1
   1626  1.2.6.2  tls 	.text
   1627  1.2.6.2  tls #endif	/* USE_BLOCK_STORE_LOAD */
   1628