1dbbd9e4bSmacallan/*
2dbbd9e4bSmacallan * VISmoveImage.S: High speed moveImage operations utilizing the UltraSPARC
3dbbd9e4bSmacallan *                 Visual Instruction Set.
4dbbd9e4bSmacallan *
5dbbd9e4bSmacallan * Copyright (C) 1998,1999 Jakub Jelinek (jakub@redhat.com)
6dbbd9e4bSmacallan *
7dbbd9e4bSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy
8dbbd9e4bSmacallan * of this software and associated documentation files (the "Software"), to deal
9dbbd9e4bSmacallan * in the Software without restriction, including without limitation the rights
10dbbd9e4bSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11dbbd9e4bSmacallan * copies of the Software, and to permit persons to whom the Software is
12dbbd9e4bSmacallan * furnished to do so, subject to the following conditions:
13dbbd9e4bSmacallan *
14dbbd9e4bSmacallan * The above copyright notice and this permission notice shall be included in
15dbbd9e4bSmacallan * all copies or substantial portions of the Software.
16dbbd9e4bSmacallan *
17dbbd9e4bSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18dbbd9e4bSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19dbbd9e4bSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20dbbd9e4bSmacallan * JAKUB JELINEK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21dbbd9e4bSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22dbbd9e4bSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23dbbd9e4bSmacallan */
24dbbd9e4bSmacallan
25dbbd9e4bSmacallan
26dbbd9e4bSmacallan/* NOTE NOTE NOTE: All loads in these routines _MUST_ be 64-byte block
27dbbd9e4bSmacallan *                 loads.  If any non-block loads are done to the frame-
28dbbd9e4bSmacallan *                 buffer when prefetching is enabled (which we use, when
29dbbd9e4bSmacallan *		   available) this will cause lockups on FFB2 due to a
30dbbd9e4bSmacallan *		   hardware bug. -DaveM
31dbbd9e4bSmacallan */
32dbbd9e4bSmacallan
33dbbd9e4bSmacallan/*
34dbbd9e4bSmacallan   void
35dbbd9e4bSmacallan   VISmoveImageLR(unsigned char *src, unsigned char *dst, long w, long h, long skind, long dkind)
36dbbd9e4bSmacallan   {
37dbbd9e4bSmacallan     int i;
38dbbd9e4bSmacallan
39dbbd9e4bSmacallan     // Handles copying non-overlapping images and for overlapping images
40dbbd9e4bSmacallan     // copying where dstx <= srcx.
41dbbd9e4bSmacallan     // To copy overlapping images where dsty > srcy, set src and dst
42dbbd9e4bSmacallan     // to start of the last scanline and negate both skind and dkind arguments.
43dbbd9e4bSmacallan
44dbbd9e4bSmacallan     assert(abs(skind) >= w && abs(dkind) >= w);
45dbbd9e4bSmacallan
46dbbd9e4bSmacallan     while (h--) {
47dbbd9e4bSmacallan       for (i = 0; i < w; i++)
48dbbd9e4bSmacallan         *d++ = *s++;
49dbbd9e4bSmacallan       s += skind - w;
50dbbd9e4bSmacallan       d += dkind - w;
51dbbd9e4bSmacallan     }
52dbbd9e4bSmacallan   }
53dbbd9e4bSmacallan
54dbbd9e4bSmacallan   void
55dbbd9e4bSmacallan   VISmoveImageRL(unsigned char *src, unsigned char *dst, long w, long h, long skind, long dkind)
56dbbd9e4bSmacallan   {
57dbbd9e4bSmacallan     int i;
58dbbd9e4bSmacallan
59dbbd9e4bSmacallan     // Handles copying non-overlapping images and for overlapping images
60dbbd9e4bSmacallan     // copying where dstx >= srcx.
61dbbd9e4bSmacallan     // To copy overlapping images where dsty > srcy, set src and dst
62dbbd9e4bSmacallan     // to start of the last scanline and negate both skind and dkind arguments.
63dbbd9e4bSmacallan
64dbbd9e4bSmacallan     assert(abs(skind) >= w && abs(dkind) >= w);
65dbbd9e4bSmacallan
66dbbd9e4bSmacallan     s += w;
67dbbd9e4bSmacallan     d += w;
68dbbd9e4bSmacallan     while (h--) {
69dbbd9e4bSmacallan       for (i = 0; i < w; i++)
70dbbd9e4bSmacallan         *--d = *--s;
71dbbd9e4bSmacallan       s += skind + w;
72dbbd9e4bSmacallan       d += dkind + w;
73dbbd9e4bSmacallan     }
74dbbd9e4bSmacallan   }
75dbbd9e4bSmacallan
76dbbd9e4bSmacallan */
77dbbd9e4bSmacallan
78dbbd9e4bSmacallan#if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__)
79731e20d8Smacallan
80731e20d8Smacallan .register %g2, #scratch
81731e20d8Smacallan .register %g3, #scratch
82731e20d8Smacallan
83dbbd9e4bSmacallan#define SMUL			mulx
84dbbd9e4bSmacallan#define ICC			xcc
857a5333bcSmrg.register %g2,#scratch
867a5333bcSmrg.register %g3,#scratch
87dbbd9e4bSmacallan#else
88dbbd9e4bSmacallan#define SMUL			smul
89dbbd9e4bSmacallan#define ICC			icc
90dbbd9e4bSmacallan#endif
91dbbd9e4bSmacallan
92dbbd9e4bSmacallan#define ASI_PST8_P		0xc0
93dbbd9e4bSmacallan#define ASI_BLK_P		0xf0
94dbbd9e4bSmacallan#define SYNC			0x40
95dbbd9e4bSmacallan
96dbbd9e4bSmacallan/* Register usage:								*/
97dbbd9e4bSmacallan/*   integer registers:								*/
98dbbd9e4bSmacallan#define src			i0
99dbbd9e4bSmacallan#define dst			i1
100dbbd9e4bSmacallan#define w			i2
101dbbd9e4bSmacallan#define h			i3
102dbbd9e4bSmacallan#define skind			i4
103dbbd9e4bSmacallan#define dkind			i5
104dbbd9e4bSmacallan#define branchbase		l0
105dbbd9e4bSmacallan#define tmp1			l1
106dbbd9e4bSmacallan#define tmp4			l2
107dbbd9e4bSmacallan#define tmp5			l3
108dbbd9e4bSmacallan#define leftw			l4
109dbbd9e4bSmacallan#define rightw			l5
110dbbd9e4bSmacallan#define srcstart		l6
111dbbd9e4bSmacallan#define mode			l7
112dbbd9e4bSmacallan#define fregset			o0		/* Must be [og]? for v8plusa */
113dbbd9e4bSmacallan#define srcend			o1
114dbbd9e4bSmacallan#define srcn			o2
115dbbd9e4bSmacallan#define srcnext			o3
116dbbd9e4bSmacallan#define rightw2			o4
117dbbd9e4bSmacallan#define tmp2			o5		/* Must be [og]? for v8plusa */
118dbbd9e4bSmacallan#define narrow			o7
119dbbd9e4bSmacallan#define prepw			g1		/* Must be [og]? for v8plusa */
120dbbd9e4bSmacallan#define srcstop			g2
121dbbd9e4bSmacallan#define srcnotdone		g3
122dbbd9e4bSmacallan#define curw			g4		/* Must be [og]? for v8plusa */
123dbbd9e4bSmacallan#define leftw2			rightw2
124dbbd9e4bSmacallan#define tmp3			g5		/* Must be [og]? for v8plusa */
125dbbd9e4bSmacallan/*   floating point registers:							*/
126dbbd9e4bSmacallan#define ftmp1			f0
127dbbd9e4bSmacallan#define ftmp2			f2
128dbbd9e4bSmacallan#define ftmp3			f4
129dbbd9e4bSmacallan#define ftmp4			f6
130dbbd9e4bSmacallan#define ftmp5			f8
131dbbd9e4bSmacallan#define ftmp6			f10
132dbbd9e4bSmacallan#define ftmp7			f12
133dbbd9e4bSmacallan#define ftmp8			f14
134dbbd9e4bSmacallan#define store_regs		f16-f31
135dbbd9e4bSmacallan#define load_bank1		f32-f46
136dbbd9e4bSmacallan#define load_bank2		f48-f62
137dbbd9e4bSmacallan#define fnop			fmovd	%ftmp2, %ftmp2
138dbbd9e4bSmacallan
139dbbd9e4bSmacallan#define LOAD(f32,f46,tgt,tgtr)							\
140dbbd9e4bSmacallan	subcc			%curw, 64, %curw;				\
141dbbd9e4bSmacallan	bleu,pn			%ICC, tgt;					\
142dbbd9e4bSmacallan	 fmovd			%f46, %ftmp1;					\
143dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32;				\
144dbbd9e4bSmacallan	add			%src, 64, %src;					\
145dbbd9e4bSmacallantgtr:
146dbbd9e4bSmacallan
147dbbd9e4bSmacallan#define PREPLOAD(f32,f46,tgt,tgtr)						\
148dbbd9e4bSmacallan	brlez,pn		%prepw, tgt;					\
149dbbd9e4bSmacallan	 fmovd			%f46, %ftmp1;					\
150dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32;				\
151dbbd9e4bSmacallan	add			%src, 64, %src;					\
152dbbd9e4bSmacallantgtr:
153dbbd9e4bSmacallan
154dbbd9e4bSmacallan#define STORE									\
155dbbd9e4bSmacallan	stda			%f16, [%dst] ASI_BLK_P;
156dbbd9e4bSmacallan
157dbbd9e4bSmacallan#define FREG_FROB(f0,A0,F2,f2,A1,F4,f4,A2,F6,f6,A3,F8,f8,A4,F10,f10,A5,F12,f12,A6,F14,f14,A7,F16) 	\
158dbbd9e4bSmacallan	A0									\
159dbbd9e4bSmacallan	faligndata		%f0,%F2,%f16;					\
160dbbd9e4bSmacallan	A1									\
161dbbd9e4bSmacallan	faligndata		%f2,%F4,%f18;					\
162dbbd9e4bSmacallan	A2									\
163dbbd9e4bSmacallan	faligndata		%f4,%F6,%f20;					\
164dbbd9e4bSmacallan	A3									\
165dbbd9e4bSmacallan	faligndata		%f6,%F8,%f22;					\
166dbbd9e4bSmacallan	A4									\
167dbbd9e4bSmacallan	faligndata		%f8,%F10,%f24;					\
168dbbd9e4bSmacallan	A5									\
169dbbd9e4bSmacallan	faligndata		%f10,%F12,%f26;					\
170dbbd9e4bSmacallan	A6									\
171dbbd9e4bSmacallan	faligndata		%f12,%F14,%f28;					\
172dbbd9e4bSmacallan	A7									\
173dbbd9e4bSmacallan	faligndata		%f14,%F16,%f30;
174dbbd9e4bSmacallan
175dbbd9e4bSmacallan	.section		".rodata"
176dbbd9e4bSmacallan	.asciz			"VISmoveImage (C) 1998,1999 Jakub Jelinek"
177dbbd9e4bSmacallan
178dbbd9e4bSmacallan/* The code might not be self-explanatory, but it was written to be processed
179dbbd9e4bSmacallan * by machines, not humans.  Comments are deliberately left as an exercise
180dbbd9e4bSmacallan * to the occasional reader.  */
181dbbd9e4bSmacallan
182dbbd9e4bSmacallan	.text
183dbbd9e4bSmacallan	.globl			VISmoveImageLR
184dbbd9e4bSmacallan	.align			32
185dbbd9e4bSmacallanVISmoveImageLR:
186731e20d8Smacallan#ifdef __arch64__
187731e20d8Smacallan	save			%sp, -192, %sp				! Group 0
188731e20d8Smacallan#else
189731e20d8Smacallan	save			%sp, -96, %sp				! Group 0
190731e20d8Smacallan#endif
191dbbd9e4bSmacallan0:	rd			%pc, %tmp3				! Group 1
192dbbd9e4bSmacallan	sub			%src, %dst, %mode			! Group 7
193dbbd9e4bSmacallan	brz,pn			%h, return
194dbbd9e4bSmacallan	 neg			%dst, %leftw				! Group 8
195dbbd9e4bSmacallan	mov			%src, %srcstart
196dbbd9e4bSmacallan	andn			%src, 63, %src				! Group 9
197dbbd9e4bSmacallan	cmp			%w, 128
198dbbd9e4bSmacallan	blu,pn			%ICC, prepare_narrow
199dbbd9e4bSmacallan	 and			%mode, 63, %mode			! Group 10
200dbbd9e4bSmacallan	add			%dst, %w, %rightw
201dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32			! Group 11
202dbbd9e4bSmacallan	add			%src, 64, %src				! Group 12
203dbbd9e4bSmacallan	clr			%narrow
204dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48			! Group 13
205dbbd9e4bSmacallan	and			%leftw, 63, %leftw			! Group 14
206dbbd9e4bSmacallan	sub			%rightw, 1, %rightw
207dbbd9e4bSmacallan	andn			%dst, 63, %dst				! Group 15
208dbbd9e4bSmacallan	and			%rightw, 63, %rightw
209dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0				! Group 16
210dbbd9e4bSmacallan	add			%rightw, 1, %rightw			! Group 17
211dbbd9e4bSmacallan	clr			%fregset
212dbbd9e4bSmacallan	add			%src, 64, %src				! Group 18
213dbbd9e4bSmacallan	add			%tmp3, (BranchBase - 0b), %branchbase
214dbbd9e4bSmacallan	mov			64, %prepw				! Group 19
215dbbd9e4bSmacallan	ba,pt			%xcc, roll_wide
216dbbd9e4bSmacallan	 sub			%h, 1, %srcnotdone
217dbbd9e4bSmacallanprepare_narrow:
218dbbd9e4bSmacallan#if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__)
219dbbd9e4bSmacallan	brlez,pn		%w, return
220dbbd9e4bSmacallan#else
221dbbd9e4bSmacallan	tst			%w
222dbbd9e4bSmacallan	ble,pn			%icc, return
223dbbd9e4bSmacallan#endif
224dbbd9e4bSmacallan	 add			%dst, %w, %rightw			! Group 11
225dbbd9e4bSmacallan	and			%leftw, 63, %leftw
226dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32			! Group 12
227dbbd9e4bSmacallan	sub			%rightw, 1, %rightw			! Group 13
228dbbd9e4bSmacallan	andn			%dst, 63, %dst
229dbbd9e4bSmacallan	and			%rightw, 63, %rightw			! Group 14
230dbbd9e4bSmacallan	sub			%w, %leftw, %tmp2
231dbbd9e4bSmacallan	add			%rightw, 1, %rightw			! Group 15
232dbbd9e4bSmacallan	clr			%fregset
233dbbd9e4bSmacallan	subcc			%tmp2, %rightw, %curw			! Group 16
234dbbd9e4bSmacallan	and			%srcstart, 63, %tmp1
235dbbd9e4bSmacallan	mov			%curw, %prepw				! Group 17
236dbbd9e4bSmacallan	add			%tmp1, %w, %tmp1
237dbbd9e4bSmacallan	bg,pt			%ICC, 1f
238dbbd9e4bSmacallan	 add			%src, 64, %src				! Group 18
239dbbd9e4bSmacallan	cmp			%tmp1, 128
240dbbd9e4bSmacallan	movg			%icc, 64, %prepw			! Group 19
241dbbd9e4bSmacallan1:	add			%srcstart, %w, %srcend
242dbbd9e4bSmacallan	sub			%h, 1, %srcnotdone			! Group 20
243dbbd9e4bSmacallan	add			%srcstart, %skind, %srcnext
244dbbd9e4bSmacallan	add			%srcend, 63, %tmp4			! Group 21
245dbbd9e4bSmacallan	andn			%srcnext, 63, %srcnext
246dbbd9e4bSmacallan	add			%w, 64, %tmp2				! Group 22
247dbbd9e4bSmacallan	cmp			%skind, 0
248dbbd9e4bSmacallan	bl,pn			%ICC, 1f
249dbbd9e4bSmacallan	 clr			%srcstop				! Group 23
250dbbd9e4bSmacallan	cmp			%skind, %tmp2
251dbbd9e4bSmacallan	bgeu,pt			%ICC, 2f
252dbbd9e4bSmacallan	 andn			%tmp4, 63, %tmp2			! Group 24
253dbbd9e4bSmacallan	SMUL			%skind, %srcnotdone, %srcstop
254dbbd9e4bSmacallan	ba,pt			%xcc, 2f
255dbbd9e4bSmacallan	 add			%tmp4, %srcstop, %srcstop
256dbbd9e4bSmacallan1:	addcc			%tmp2, %skind, %g0
257dbbd9e4bSmacallan	ble,pt			%ICC, 2f
258dbbd9e4bSmacallan	 sub			%srcnext, 64, %tmp2
259dbbd9e4bSmacallan	SMUL			%skind, %srcnotdone, %srcstop
260dbbd9e4bSmacallan	sub			%srcstart, 64, %narrow
261dbbd9e4bSmacallan	add			%srcstop, %narrow, %srcstop
262dbbd9e4bSmacallan2:	add			%srcnext, 64, %narrow
263dbbd9e4bSmacallan	andn			%tmp4, 63, %srcend			! Group 25
264dbbd9e4bSmacallan	mov			%srcnext, %srcn
265dbbd9e4bSmacallan	cmp			%narrow, %srcend			! Group 26
266dbbd9e4bSmacallan	andn			%srcstop, 63, %srcstop
267dbbd9e4bSmacallan	move			%ICC, %tmp2, %srcnext			! Group 27
268dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0				! Group 28
269dbbd9e4bSmacallan	add			%tmp3, (BranchBase - 0b), %branchbase
270dbbd9e4bSmacallan	mov			1, %narrow				! Group 29
271dbbd9e4bSmacallan	cmp			%srcnext, %srcstop
272dbbd9e4bSmacallan	move			%ICC, 0, %srcnotdone			! Group 30
273dbbd9e4bSmacallan	cmp			%tmp1, 64
274dbbd9e4bSmacallan	bleu,pn			%ICC, 1f
275dbbd9e4bSmacallan	 dec			%h					! Group 31
276dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48			! Group 32
277dbbd9e4bSmacallan	ba,pt			%xcc, roll_narrow			! Group 33
278dbbd9e4bSmacallan	 addcc			%src, 64, %src
279dbbd9e4bSmacallan1:	brz,a,pn		%srcnotdone, roll_narrow
280dbbd9e4bSmacallan	 membar			#Sync
281dbbd9e4bSmacallan	ldda			[%srcnext] ASI_BLK_P, %f48
282dbbd9e4bSmacallan	ba,pt			%xcc, roll_narrow
283dbbd9e4bSmacallan	 addcc			%srcnext, 64, %src
284dbbd9e4bSmacallan
285dbbd9e4bSmacallan	.align			32
286dbbd9e4bSmacallanprepr0:	faligndata		%f32, %f34, %f16
287dbbd9e4bSmacallan	faligndata		%f34, %f36, %f18
288dbbd9e4bSmacallan	faligndata		%f36, %f38, %f20
289dbbd9e4bSmacallan	faligndata		%f38, %f40, %f22
290dbbd9e4bSmacallan	faligndata		%f40, %f42, %f24
291dbbd9e4bSmacallan	faligndata		%f42, %f44, %f26
292dbbd9e4bSmacallan	ba,pt			%xcc, 4f
293dbbd9e4bSmacallan	 faligndata		%f44, %f46, %f28
294dbbd9e4bSmacallan	.align			32
295dbbd9e4bSmacallanprepr1:	faligndata		%f48, %f50, %f16
296dbbd9e4bSmacallan	faligndata		%f50, %f52, %f18
297dbbd9e4bSmacallan	faligndata		%f52, %f54, %f20
298dbbd9e4bSmacallan	faligndata		%f54, %f56, %f22
299dbbd9e4bSmacallan	faligndata		%f56, %f58, %f24
300dbbd9e4bSmacallan	faligndata		%f58, %f60, %f26
301dbbd9e4bSmacallan	ba,pt			%xcc, 5f
302dbbd9e4bSmacallan	 faligndata		%f60, %f62, %f28
303dbbd9e4bSmacallan	.align			32
304dbbd9e4bSmacallanprepr2:	faligndata		%f34, %f36, %f16
305dbbd9e4bSmacallan	faligndata		%f36, %f38, %f18
306dbbd9e4bSmacallan	faligndata		%f38, %f40, %f20
307dbbd9e4bSmacallan	faligndata		%f40, %f42, %f22
308dbbd9e4bSmacallan	faligndata		%f42, %f44, %f24
309dbbd9e4bSmacallan	faligndata		%f44, %f46, %f26
310dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
311dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f28
312dbbd9e4bSmacallan	.align			32
313dbbd9e4bSmacallanprepr3:	faligndata		%f50, %f52, %f16
314dbbd9e4bSmacallan	faligndata		%f52, %f54, %f18
315dbbd9e4bSmacallan	faligndata		%f54, %f56, %f20
316dbbd9e4bSmacallan	faligndata		%f56, %f58, %f22
317dbbd9e4bSmacallan	faligndata		%f58, %f60, %f24
318dbbd9e4bSmacallan	faligndata		%f60, %f62, %f26
319dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
320dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f28
321dbbd9e4bSmacallan	.align			32
322dbbd9e4bSmacallanprepr4:	faligndata		%f36, %f38, %f16
323dbbd9e4bSmacallan	faligndata		%f38, %f40, %f18
324dbbd9e4bSmacallan	faligndata		%f40, %f42, %f20
325dbbd9e4bSmacallan	faligndata		%f42, %f44, %f22
326dbbd9e4bSmacallan	faligndata		%f44, %f46, %f24
327dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
328dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f26
329dbbd9e4bSmacallan	.align			32
330dbbd9e4bSmacallanprepr5:	faligndata		%f52, %f54, %f16
331dbbd9e4bSmacallan	faligndata		%f54, %f56, %f18
332dbbd9e4bSmacallan	faligndata		%f56, %f58, %f20
333dbbd9e4bSmacallan	faligndata		%f58, %f60, %f22
334dbbd9e4bSmacallan	faligndata		%f60, %f62, %f24
335dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
336dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f26
337dbbd9e4bSmacallan	.align			32
338dbbd9e4bSmacallanprepr6:	faligndata		%f38, %f40, %f16
339dbbd9e4bSmacallan	faligndata		%f40, %f42, %f18
340dbbd9e4bSmacallan	faligndata		%f42, %f44, %f20
341dbbd9e4bSmacallan	faligndata		%f44, %f46, %f22
342dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
343dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f24
344dbbd9e4bSmacallan	.align			32
345dbbd9e4bSmacallanprepr7:	faligndata		%f54, %f56, %f16
346dbbd9e4bSmacallan	faligndata		%f56, %f58, %f18
347dbbd9e4bSmacallan	faligndata		%f58, %f60, %f20
348dbbd9e4bSmacallan	faligndata		%f60, %f62, %f22
349dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
350dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f24
351dbbd9e4bSmacallan	.align			32
352dbbd9e4bSmacallanprepr8:	faligndata		%f40, %f42, %f16
353dbbd9e4bSmacallan	faligndata		%f42, %f44, %f18
354dbbd9e4bSmacallan	faligndata		%f44, %f46, %f20
355dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
356dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f22
357dbbd9e4bSmacallan	.align			32
358dbbd9e4bSmacallanprepr9:	faligndata		%f56, %f58, %f16
359dbbd9e4bSmacallan	faligndata		%f58, %f60, %f18
360dbbd9e4bSmacallan	faligndata		%f60, %f62, %f20
361dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
362dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f22
363dbbd9e4bSmacallan	.align			32
364dbbd9e4bSmacallanprepr10:faligndata		%f42, %f44, %f16
365dbbd9e4bSmacallan	faligndata		%f44, %f46, %f18
366dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
367dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f20
368dbbd9e4bSmacallan	.align			32
369dbbd9e4bSmacallanprepr11:faligndata		%f58, %f60, %f16
370dbbd9e4bSmacallan	faligndata		%f60, %f62, %f18
371dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
372dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f20
373dbbd9e4bSmacallan	.align			32
374dbbd9e4bSmacallanprepr12:faligndata		%f44, %f46, %f16
375dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
376dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f18
377dbbd9e4bSmacallan	.align			32
378dbbd9e4bSmacallanprepr13:faligndata		%f60, %f62, %f16
379dbbd9e4bSmacallan	ba,pt			%xcc, narrowst
380dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f18
381dbbd9e4bSmacallan	.align			32
382dbbd9e4bSmacallanprepr14:ba,pt			%xcc, narrowst
383dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f16
384dbbd9e4bSmacallan	nop
385dbbd9e4bSmacallan	nop
386dbbd9e4bSmacallan4:	ba,pt			%xcc, narrowst
387dbbd9e4bSmacallan	 faligndata		%f46, %f0, %f30
388dbbd9e4bSmacallan	.align			32
389dbbd9e4bSmacallanprepr15:ba,pt			%xcc, narrowst
390dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f16
391dbbd9e4bSmacallan	nop
392dbbd9e4bSmacallan	nop
393dbbd9e4bSmacallan5:	ba,pt			%xcc, narrowst
394dbbd9e4bSmacallan	 faligndata		%f62, %f0, %f30
395dbbd9e4bSmacallan
396dbbd9e4bSmacallan	.align			32
397dbbd9e4bSmacallanprepn0:	faligndata		%ftmp1, %f32, %f30
398dbbd9e4bSmacallan	ba,pt			%xcc, leftst
399dbbd9e4bSmacallan	 mov			(vis0 - BranchBase), %tmp3
400dbbd9e4bSmacallan	nop
401dbbd9e4bSmacallan1:	ba,pt			%xcc, leftst
402dbbd9e4bSmacallan	 mov			(vis12 - BranchBase), %tmp3
403dbbd9e4bSmacallan	.align			32
404dbbd9e4bSmacallanprepn1:	faligndata		%ftmp1, %f48, %f30
405dbbd9e4bSmacallan	ba,pt			%xcc, leftst
406dbbd9e4bSmacallan	 mov			(vis1 - BranchBase), %tmp3
407dbbd9e4bSmacallan	nop
408dbbd9e4bSmacallan2:	ba,pt			%xcc, leftst
409dbbd9e4bSmacallan	 mov			(vis13 - BranchBase), %tmp3
410dbbd9e4bSmacallan	.align			32
411dbbd9e4bSmacallanprepn2: faligndata		%ftmp1, %f32, %f28
412dbbd9e4bSmacallan	faligndata		%f32, %f34, %f30
413dbbd9e4bSmacallan	ba,pt			%xcc, leftst
414dbbd9e4bSmacallan	 mov			(vis2 - BranchBase), %tmp3
415dbbd9e4bSmacallan3:	faligndata		%f44, %f46, %f30
416dbbd9e4bSmacallan	ba,pt			%xcc, leftst
417dbbd9e4bSmacallan	 mov			(vis14 - BranchBase), %tmp3
418dbbd9e4bSmacallan	.align			32
419dbbd9e4bSmacallanprepn3: faligndata		%ftmp1, %f48, %f28
420dbbd9e4bSmacallan	faligndata		%f48, %f50, %f30
421dbbd9e4bSmacallan	ba,pt			%xcc, leftst
422dbbd9e4bSmacallan	 mov			(vis3 - BranchBase), %tmp3
423dbbd9e4bSmacallan	.align			32
424dbbd9e4bSmacallanprepn4: faligndata		%ftmp1, %f32, %f26
425dbbd9e4bSmacallan	faligndata		%f32, %f34, %f28
426dbbd9e4bSmacallan	faligndata		%f34, %f36, %f30
427dbbd9e4bSmacallan	ba,pt			%xcc, leftst
428dbbd9e4bSmacallan	 mov			(vis4 - BranchBase), %tmp3
429dbbd9e4bSmacallan	.align			32
430dbbd9e4bSmacallanprepn5:	faligndata		%ftmp1, %f48, %f26
431dbbd9e4bSmacallan	faligndata		%f48, %f50, %f28
432dbbd9e4bSmacallan	faligndata		%f50, %f52, %f30
433dbbd9e4bSmacallan	ba,pt			%xcc, leftst
434dbbd9e4bSmacallan	 mov			(vis5 - BranchBase), %tmp3
435dbbd9e4bSmacallan	.align			32
436dbbd9e4bSmacallanprepn6: faligndata		%ftmp1, %f32, %f24
437dbbd9e4bSmacallan	faligndata		%f32, %f34, %f26
438dbbd9e4bSmacallan	faligndata		%f34, %f36, %f28
439dbbd9e4bSmacallan	faligndata		%f36, %f38, %f30
440dbbd9e4bSmacallan	ba,pt			%xcc, leftst
441dbbd9e4bSmacallan	 mov			(vis6 - BranchBase), %tmp3
442dbbd9e4bSmacallan	.align			32
443dbbd9e4bSmacallanprepn7:	faligndata		%ftmp1, %f48, %f24
444dbbd9e4bSmacallan	faligndata		%f48, %f50, %f26
445dbbd9e4bSmacallan	faligndata		%f50, %f52, %f28
446dbbd9e4bSmacallan	faligndata		%f52, %f54, %f30
447dbbd9e4bSmacallan	ba,pt			%xcc, leftst
448dbbd9e4bSmacallan	 mov			(vis7 - BranchBase), %tmp3
449dbbd9e4bSmacallan	.align			32
450dbbd9e4bSmacallanprepn8:	faligndata		%ftmp1, %f32, %f22
451dbbd9e4bSmacallan	faligndata		%f32, %f34, %f24
452dbbd9e4bSmacallan	faligndata		%f34, %f36, %f26
453dbbd9e4bSmacallan	faligndata		%f36, %f38, %f28
454dbbd9e4bSmacallan	faligndata		%f38, %f40, %f30
455dbbd9e4bSmacallan	ba,pt			%xcc, leftst
456dbbd9e4bSmacallan	 mov			(vis8 - BranchBase), %tmp3
457dbbd9e4bSmacallan	.align			32
458dbbd9e4bSmacallanprepn9:	faligndata		%ftmp1, %f48, %f22
459dbbd9e4bSmacallan	faligndata		%f48, %f50, %f24
460dbbd9e4bSmacallan	faligndata		%f50, %f52, %f26
461dbbd9e4bSmacallan	faligndata		%f52, %f54, %f28
462dbbd9e4bSmacallan	faligndata		%f54, %f56, %f30
463dbbd9e4bSmacallan	ba,pt			%xcc, leftst
464dbbd9e4bSmacallan	 mov			(vis9 - BranchBase), %tmp3
465dbbd9e4bSmacallan	.align			32
466dbbd9e4bSmacallanprepn10:faligndata		%ftmp1, %f32, %f20
467dbbd9e4bSmacallan	faligndata		%f32, %f34, %f22
468dbbd9e4bSmacallan	faligndata		%f34, %f36, %f24
469dbbd9e4bSmacallan	faligndata		%f36, %f38, %f26
470dbbd9e4bSmacallan	faligndata		%f38, %f40, %f28
471dbbd9e4bSmacallan	faligndata		%f40, %f42, %f30
472dbbd9e4bSmacallan	ba,pt			%xcc, leftst
473dbbd9e4bSmacallan	 mov			(vis10 - BranchBase), %tmp3
474dbbd9e4bSmacallan	.align			32
475dbbd9e4bSmacallanprepn11:faligndata		%ftmp1, %f48, %f20
476dbbd9e4bSmacallan	faligndata		%f48, %f50, %f22
477dbbd9e4bSmacallan	faligndata		%f50, %f52, %f24
478dbbd9e4bSmacallan	faligndata		%f52, %f54, %f26
479dbbd9e4bSmacallan	faligndata		%f54, %f56, %f28
480dbbd9e4bSmacallan	faligndata		%f56, %f58, %f30
481dbbd9e4bSmacallan	ba,pt			%xcc, leftst
482dbbd9e4bSmacallan	 mov			(vis11 - BranchBase), %tmp3
483dbbd9e4bSmacallan	.align			32
484dbbd9e4bSmacallanprepn12:faligndata		%ftmp1, %f32, %f18
485dbbd9e4bSmacallan	faligndata		%f32, %f34, %f20
486dbbd9e4bSmacallan	faligndata		%f34, %f36, %f22
487dbbd9e4bSmacallan	faligndata		%f36, %f38, %f24
488dbbd9e4bSmacallan	faligndata		%f38, %f40, %f26
489dbbd9e4bSmacallan	faligndata		%f40, %f42, %f28
490dbbd9e4bSmacallan	ba,pt			%xcc, 1b
491dbbd9e4bSmacallan	 faligndata		%f42, %f44, %f30
492dbbd9e4bSmacallan	.align			32
493dbbd9e4bSmacallanprepn13:faligndata		%ftmp1, %f48, %f18
494dbbd9e4bSmacallan	faligndata		%f48, %f50, %f20
495dbbd9e4bSmacallan	faligndata		%f50, %f52, %f22
496dbbd9e4bSmacallan	faligndata		%f52, %f54, %f24
497dbbd9e4bSmacallan	faligndata		%f54, %f56, %f26
498dbbd9e4bSmacallan	faligndata		%f56, %f58, %f28
499dbbd9e4bSmacallan	ba,pt			%xcc, 2b
500dbbd9e4bSmacallan	 faligndata		%f58, %f60, %f30
501dbbd9e4bSmacallan	.align			32
502dbbd9e4bSmacallanprepn14:faligndata		%ftmp1, %f32, %f16
503dbbd9e4bSmacallan	faligndata		%f32, %f34, %f18
504dbbd9e4bSmacallan	faligndata		%f34, %f36, %f20
505dbbd9e4bSmacallan	faligndata		%f36, %f38, %f22
506dbbd9e4bSmacallan	faligndata		%f38, %f40, %f24
507dbbd9e4bSmacallan	faligndata		%f40, %f42, %f26
508dbbd9e4bSmacallan	ba,pt			%xcc, 3b
509dbbd9e4bSmacallan	 faligndata		%f42, %f44, %f28
510dbbd9e4bSmacallan	.align			32
511dbbd9e4bSmacallanprepn15:faligndata		%ftmp1, %f48, %f16
512dbbd9e4bSmacallan	faligndata		%f48, %f50, %f18
513dbbd9e4bSmacallan	faligndata		%f50, %f52, %f20
514dbbd9e4bSmacallan	faligndata		%f52, %f54, %f22
515dbbd9e4bSmacallan	faligndata		%f54, %f56, %f24
516dbbd9e4bSmacallan	faligndata		%f56, %f58, %f26
517dbbd9e4bSmacallan	faligndata		%f58, %f60, %f28
518dbbd9e4bSmacallan	faligndata		%f60, %f62, %f30
519dbbd9e4bSmacallan	ba,pt			%xcc, leftst
520dbbd9e4bSmacallan	 mov			(vis15 - BranchBase), %tmp3
521dbbd9e4bSmacallan
522dbbd9e4bSmacallan	.align			64
523dbbd9e4bSmacallanBranchBase:
524dbbd9e4bSmacallanprepl0:	FREG_FROB(f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep0e,prep0r),f48)
525dbbd9e4bSmacallan	ba,pt			%xcc, leftst
526dbbd9e4bSmacallan	 mov			(vis1 - BranchBase), %tmp3
527dbbd9e4bSmacallan	.align			64
528dbbd9e4bSmacallanprepl1:	FREG_FROB(f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep1e,prep1r),f32)
529dbbd9e4bSmacallan	ba,pt			%xcc, leftst
530dbbd9e4bSmacallan	 mov			(vis0 - BranchBase), %tmp3
531dbbd9e4bSmacallan	.align			64
532dbbd9e4bSmacallanprepl2:	FREG_FROB(f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep2e,prep2r),f48,f48,,f50)
533dbbd9e4bSmacallan	ba,pt			%xcc, leftst
534dbbd9e4bSmacallan	 mov			(vis3 - BranchBase), %tmp3
535dbbd9e4bSmacallan	.align			64
536dbbd9e4bSmacallanprepl3:	FREG_FROB(f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep3e,prep3r),f32,f32,,f34)
537dbbd9e4bSmacallan	ba,pt			%xcc, leftst
538dbbd9e4bSmacallan	 mov			(vis2 - BranchBase), %tmp3
539dbbd9e4bSmacallan	.align			64
540dbbd9e4bSmacallanprepl4:	FREG_FROB(f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep4e,prep4r),f48,f48,,f50,f50,,f52)
541dbbd9e4bSmacallan	ba,pt			%xcc, leftst
542dbbd9e4bSmacallan	 mov			(vis5 - BranchBase), %tmp3
543dbbd9e4bSmacallan	.align			64
544dbbd9e4bSmacallanprepl5:	FREG_FROB(f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep5e,prep5r),f32,f32,,f34,f34,,f36)
545dbbd9e4bSmacallan	ba,pt			%xcc, leftst
546dbbd9e4bSmacallan	 mov			(vis4 - BranchBase), %tmp3
547dbbd9e4bSmacallan	.align			64
548dbbd9e4bSmacallanprepl6:	FREG_FROB(f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep6e,prep6r),f48,f48,,f50,f50,,f52,f52,,f54)
549dbbd9e4bSmacallan	ba,pt			%xcc, leftst
550dbbd9e4bSmacallan	 mov			(vis7 - BranchBase), %tmp3
551dbbd9e4bSmacallan	.align			64
552dbbd9e4bSmacallanprepl7:	FREG_FROB(f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep7e,prep7r),f32,f32,,f34,f34,,f36,f36,,f38)
553dbbd9e4bSmacallan	ba,pt			%xcc, leftst
554dbbd9e4bSmacallan	 mov			(vis6 - BranchBase), %tmp3
555dbbd9e4bSmacallan	.align			64
556dbbd9e4bSmacallanprepl8:	FREG_FROB(f40,,f42,f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep8e,prep8r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56)
557dbbd9e4bSmacallan	ba,pt			%xcc, leftst
558dbbd9e4bSmacallan	 mov			(vis9 - BranchBase), %tmp3
559dbbd9e4bSmacallan	.align			64
560dbbd9e4bSmacallanprepl9:	FREG_FROB(f56,,f58,f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep9e,prep9r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40)
561dbbd9e4bSmacallan	ba,pt			%xcc, leftst
562dbbd9e4bSmacallan	 mov			(vis8 - BranchBase), %tmp3
563dbbd9e4bSmacallan	.align			64
564dbbd9e4bSmacallanprepl10:FREG_FROB(f42,,f44,f44,,f46,ftmp1,PREPLOAD(f32,f46,prep10e,prep10r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58)
565dbbd9e4bSmacallan	ba,pt			%xcc, leftst
566dbbd9e4bSmacallan	 mov			(vis11 - BranchBase), %tmp3
567dbbd9e4bSmacallan	.align			64
568dbbd9e4bSmacallanprepl11:FREG_FROB(f58,,f60,f60,,f62,ftmp1,PREPLOAD(f48,f62,prep11e,prep11r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42)
569dbbd9e4bSmacallan	ba,pt			%xcc, leftst
570dbbd9e4bSmacallan	 mov			(vis10 - BranchBase), %tmp3
571dbbd9e4bSmacallan	.align			64
572dbbd9e4bSmacallanprepl12:FREG_FROB(f44,,f46,ftmp1,PREPLOAD(f32,f46,prep12e,prep12r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60)
573dbbd9e4bSmacallan	ba,pt			%xcc, leftst
574dbbd9e4bSmacallan	 mov			(vis13 - BranchBase), %tmp3
575dbbd9e4bSmacallan	.align			64
576dbbd9e4bSmacallanprepl13:FREG_FROB(f60,,f62,ftmp1,PREPLOAD(f48,f62,prep13e,prep13r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44)
577dbbd9e4bSmacallan	ba,pt			%xcc, leftst
578dbbd9e4bSmacallan	 mov			(vis12 - BranchBase), %tmp3
579dbbd9e4bSmacallan	.align			64
580dbbd9e4bSmacallanprepl14:FREG_FROB(ftmp1,PREPLOAD(f32,f46,prep14e,prep14r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62)
581dbbd9e4bSmacallan	ba,pt			%xcc, leftst
582dbbd9e4bSmacallan	 mov			(vis15 - BranchBase), %tmp3
583dbbd9e4bSmacallan	.align			64
584dbbd9e4bSmacallanprepl15:FREG_FROB(ftmp1,PREPLOAD(f48,f62,prep15e,prep15r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46)
585dbbd9e4bSmacallan	ba,pt			%xcc, leftst
586dbbd9e4bSmacallan	 mov			(vis14 - BranchBase), %tmp3
587dbbd9e4bSmacallan
588dbbd9e4bSmacallan	.align			128
589dbbd9e4bSmacallanvis0:	FREG_FROB(f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis0e,vis0r),f48)
590dbbd9e4bSmacallan	STORE
591dbbd9e4bSmacallan	add			%dst, 64, %dst
592dbbd9e4bSmacallanvis1:	FREG_FROB(f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis1e,vis1r),f32)
593dbbd9e4bSmacallan	STORE
594dbbd9e4bSmacallan	ba,pt			%xcc, vis0
595dbbd9e4bSmacallan	 add			%dst, 64, %dst
596dbbd9e4bSmacallan	.align			128
597dbbd9e4bSmacallanvis2:	FREG_FROB(f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis2e,vis2r),f48,f48,,f50)
598dbbd9e4bSmacallan	STORE
599dbbd9e4bSmacallan	add			%dst, 64, %dst
600dbbd9e4bSmacallanvis3:	FREG_FROB(f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis3e,vis3r),f32,f32,,f34)
601dbbd9e4bSmacallan	STORE
602dbbd9e4bSmacallan	ba,pt			%xcc, vis2
603dbbd9e4bSmacallan	 add			%dst, 64, %dst
604dbbd9e4bSmacallan	.align			128
605dbbd9e4bSmacallanvis4:	FREG_FROB(f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis4e,vis4r),f48,f48,,f50,f50,,f52)
606dbbd9e4bSmacallan	STORE
607dbbd9e4bSmacallan	add			%dst, 64, %dst
608dbbd9e4bSmacallanvis5:	FREG_FROB(f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis5e,vis5r),f32,f32,,f34,f34,,f36)
609dbbd9e4bSmacallan	STORE
610dbbd9e4bSmacallan	ba,pt			%xcc, vis4
611dbbd9e4bSmacallan	 add			%dst, 64, %dst
612dbbd9e4bSmacallan	.align			128
613dbbd9e4bSmacallanvis6:	FREG_FROB(f38,,f40,f40,,f42,f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis6e,vis6r),f48,f48,,f50,f50,,f52,f52,,f54)
614dbbd9e4bSmacallan	STORE
615dbbd9e4bSmacallan	add			%dst, 64, %dst
616dbbd9e4bSmacallanvis7:	FREG_FROB(f54,,f56,f56,,f58,f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis7e,vis7r),f32,f32,,f34,f34,,f36,f36,,f38)
617dbbd9e4bSmacallan	STORE
618dbbd9e4bSmacallan	ba,pt			%xcc, vis6
619dbbd9e4bSmacallan	 add			%dst, 64, %dst
620dbbd9e4bSmacallan	.align			128
621dbbd9e4bSmacallanvis8:	FREG_FROB(f40,,f42,f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis8e,vis8r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56)
622dbbd9e4bSmacallan	STORE
623dbbd9e4bSmacallan	add			%dst, 64, %dst
624dbbd9e4bSmacallanvis9:	FREG_FROB(f56,,f58,f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis9e,vis9r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40)
625dbbd9e4bSmacallan	STORE
626dbbd9e4bSmacallan	ba,pt			%xcc, vis8
627dbbd9e4bSmacallan	 add			%dst, 64, %dst
628dbbd9e4bSmacallan	.align			128
629dbbd9e4bSmacallanvis10:	FREG_FROB(f42,,f44,f44,,f46,ftmp1,LOAD(f32,f46,vis10e,vis10r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58)
630dbbd9e4bSmacallan	STORE
631dbbd9e4bSmacallan	add			%dst, 64, %dst
632dbbd9e4bSmacallanvis11:	FREG_FROB(f58,,f60,f60,,f62,ftmp1,LOAD(f48,f62,vis11e,vis11r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42)
633dbbd9e4bSmacallan	STORE
634dbbd9e4bSmacallan	ba,pt			%xcc, vis10
635dbbd9e4bSmacallan	 add			%dst, 64, %dst
636dbbd9e4bSmacallan	.align			128
637dbbd9e4bSmacallanvis12:	FREG_FROB(f44,,f46,ftmp1,LOAD(f32,f46,vis12e,vis12r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60)
638dbbd9e4bSmacallan	STORE
639dbbd9e4bSmacallan	add			%dst, 64, %dst
640dbbd9e4bSmacallanvis13:	FREG_FROB(f60,,f62,ftmp1,LOAD(f48,f62,vis13e,vis13r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44)
641dbbd9e4bSmacallan	STORE
642dbbd9e4bSmacallan	ba,pt			%xcc, vis12
643dbbd9e4bSmacallan	 add			%dst, 64, %dst
644dbbd9e4bSmacallan	.align			128
645dbbd9e4bSmacallanvis14:	FREG_FROB(ftmp1,LOAD(f32,f46,vis14e,vis14r),f48,f48,,f50,f50,,f52,f52,,f54,f54,,f56,f56,,f58,f58,,f60,f60,,f62)
646dbbd9e4bSmacallan	STORE
647dbbd9e4bSmacallan	add			%dst, 64, %dst
648dbbd9e4bSmacallanvis15:	FREG_FROB(ftmp1,LOAD(f48,f62,vis15e,vis15r),f32,f32,,f34,f34,,f36,f36,,f38,f38,,f40,f40,,f42,f42,,f44,f44,,f46)
649dbbd9e4bSmacallan	STORE
650dbbd9e4bSmacallan	ba,pt			%xcc, vis14
651dbbd9e4bSmacallan	 add			%dst, 64, %dst
652dbbd9e4bSmacallan
653dbbd9e4bSmacallanleftst:	brlz,pn			%curw, narrowst + 4
654dbbd9e4bSmacallan	 and			%leftw, 0x38, %tmp1
655dbbd9e4bSmacallan	andcc			%leftw, 7, %g0
656dbbd9e4bSmacallan	be,pn			%icc, 1f
657dbbd9e4bSmacallan	 neg			%tmp1
658dbbd9e4bSmacallan	sub			%dst, %leftw, %tmp4
659dbbd9e4bSmacallan	add			%branchbase, (2f - BranchBase - 8), %tmp2
660dbbd9e4bSmacallan	edge8			%tmp4, %g0, %tmp5
661dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
662dbbd9e4bSmacallan	 andn			%tmp4, 7, %tmp4
663dbbd9e4bSmacallan1:	addcc			%branchbase, (3f - BranchBase), %tmp2
664dbbd9e4bSmacallan	sra			%tmp1, 1, %tmp1
665dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
666dbbd9e4bSmacallan	 nop
667dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x00
668dbbd9e4bSmacallan	 stda			%f16, [%tmp4 + %tmp5] ASI_PST8_P
669dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x04
670dbbd9e4bSmacallan	 stda			%f18, [%tmp4 + %tmp5] ASI_PST8_P
671dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x08
672dbbd9e4bSmacallan	 stda			%f20, [%tmp4 + %tmp5] ASI_PST8_P
673dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x0c
674dbbd9e4bSmacallan	 stda			%f22, [%tmp4 + %tmp5] ASI_PST8_P
675dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x10
676dbbd9e4bSmacallan	 stda			%f24, [%tmp4 + %tmp5] ASI_PST8_P
677dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x14
678dbbd9e4bSmacallan	 stda			%f26, [%tmp4 + %tmp5] ASI_PST8_P
679dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x18
680dbbd9e4bSmacallan	 stda			%f28, [%tmp4 + %tmp5] ASI_PST8_P
681dbbd9e4bSmacallan	jmpl			%branchbase + %tmp3, %g0
682dbbd9e4bSmacallan	 stda			%f30, [%tmp4 + %tmp5] ASI_PST8_P
683dbbd9e4bSmacallan2:	std			%f18, [%dst - 0x38]
684dbbd9e4bSmacallan	std			%f20, [%dst - 0x30]
685dbbd9e4bSmacallan	std			%f22, [%dst - 0x28]
686dbbd9e4bSmacallan	std			%f24, [%dst - 0x20]
687dbbd9e4bSmacallan	std			%f26, [%dst - 0x18]
688dbbd9e4bSmacallan	std			%f28, [%dst - 0x10]
689dbbd9e4bSmacallan	std			%f30, [%dst - 0x08]
690dbbd9e4bSmacallan3:	jmpl			%branchbase + %tmp3, %g0
691dbbd9e4bSmacallan	 nop
692dbbd9e4bSmacallan
693dbbd9e4bSmacallanrightst:cmp			%rightw, 64
694dbbd9e4bSmacallan	be,pn			%icc, 2f
695dbbd9e4bSmacallan	 and			%rightw, 0x38, %tmp1
696dbbd9e4bSmacallan	andcc			%rightw, 7, %g0
697dbbd9e4bSmacallan	be,pn			%icc, 1f
698dbbd9e4bSmacallan	 neg			%tmp1
699dbbd9e4bSmacallan	sub			%g0, %rightw, %tmp5
700dbbd9e4bSmacallan	add			%branchbase, (3f - BranchBase - 8), %tmp2
701dbbd9e4bSmacallan	edge8l			%tmp5, %g0, %tmp5
702dbbd9e4bSmacallan	add			%mode, %skind, %mode
703dbbd9e4bSmacallan	jmpl			%tmp1 + %tmp2, %g0
704dbbd9e4bSmacallan	 sub			%dst, %tmp1, %tmp4
705dbbd9e4bSmacallan1:	addcc			%branchbase, (4f - BranchBase), %tmp2
706dbbd9e4bSmacallan	sra			%tmp1, 1, %tmp1
707dbbd9e4bSmacallan	jmpl			%tmp1 + %tmp2, %g0
708dbbd9e4bSmacallan2:	 add			%mode, %skind, %mode
709dbbd9e4bSmacallan	ba,pt			%xcc, 4f
710dbbd9e4bSmacallan	 stda			%f16, [%dst] ASI_BLK_P
711dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x00
712dbbd9e4bSmacallan	 stda			%f30, [%tmp4 + %tmp5] ASI_PST8_P
713dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x04
714dbbd9e4bSmacallan	 stda			%f28, [%tmp4 + %tmp5] ASI_PST8_P
715dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x08
716dbbd9e4bSmacallan	 stda			%f26, [%tmp4 + %tmp5] ASI_PST8_P
717dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x0c
718dbbd9e4bSmacallan	 stda			%f24, [%tmp4 + %tmp5] ASI_PST8_P
719dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x10
720dbbd9e4bSmacallan	 stda			%f22, [%tmp4 + %tmp5] ASI_PST8_P
721dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x14
722dbbd9e4bSmacallan	 stda			%f20, [%tmp4 + %tmp5] ASI_PST8_P
723dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x18
724dbbd9e4bSmacallan	 stda			%f18, [%tmp4 + %tmp5] ASI_PST8_P
725dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x1c
726dbbd9e4bSmacallan	 stda			%f16, [%tmp4 + %tmp5] ASI_PST8_P
727dbbd9e4bSmacallan3:	std			%f28, [%dst + 0x30]
728dbbd9e4bSmacallan	std			%f26, [%dst + 0x28]
729dbbd9e4bSmacallan	std			%f24, [%dst + 0x20]
730dbbd9e4bSmacallan	std			%f22, [%dst + 0x18]
731dbbd9e4bSmacallan	std			%f20, [%dst + 0x10]
732dbbd9e4bSmacallan	std			%f18, [%dst + 0x08]
733dbbd9e4bSmacallan	std			%f16, [%dst + 0x00]
734dbbd9e4bSmacallan4:	brnz,pn			%narrow, narrow_cont
735dbbd9e4bSmacallan	 deccc			%srcnotdone
736dbbd9e4bSmacallan	blu,pn			%ICC, return
737dbbd9e4bSmacallan	 sub			%mode, %dkind, %mode
738dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0
739dbbd9e4bSmacallan	and			%mode, 63, %mode
740dbbd9e4bSmacallan	cmp			%srcnext, %srcn
741dbbd9e4bSmacallan	bne,a,pn		%ICC, 6f
742dbbd9e4bSmacallan	 xor			%fregset, 64, %fregset
743dbbd9e4bSmacallan	brnz,a,pn		%fregset, 5f
744dbbd9e4bSmacallan	 ldda			[%src] ASI_BLK_P, %f32
745dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48
746dbbd9e4bSmacallan5:	add			%src, 64, %src
747dbbd9e4bSmacallan6:	add			%dst, %dkind, %dst
748dbbd9e4bSmacallan	sub			%w, %rightw, %tmp1
749dbbd9e4bSmacallan	add			%srcstart, %skind, %srcstart
750dbbd9e4bSmacallan	sub			%dst, %tmp1, %dst
751dbbd9e4bSmacallan	add			%rightw, %dkind, %rightw
752dbbd9e4bSmacallan	andn			%dst, 63, %dst
753dbbd9e4bSmacallan	sub			%rightw, 1, %rightw
754dbbd9e4bSmacallan	sub			%leftw, %dkind, %leftw
755dbbd9e4bSmacallan	and			%rightw, 63, %rightw
756dbbd9e4bSmacallan	and			%leftw, 63, %leftw
757dbbd9e4bSmacallan	add			%rightw, 1, %rightw
758dbbd9e4bSmacallanroll_wide:
759dbbd9e4bSmacallan	add			%srcstart, %w, %srcend
760dbbd9e4bSmacallan	add			%srcstart, %skind, %srcnext
761dbbd9e4bSmacallan	fnop
762dbbd9e4bSmacallan	add			%srcend, 63, %srcend
763dbbd9e4bSmacallan	andncc			%srcnext, 63, %srcnext
764dbbd9e4bSmacallan	fnop
765dbbd9e4bSmacallan	sll			%mode, 4, %tmp1
766dbbd9e4bSmacallan	andn			%srcend, 63, %srcend
767dbbd9e4bSmacallan	fnop
768dbbd9e4bSmacallan	mov			%srcnext, %srcn
769dbbd9e4bSmacallan	and			%mode, 7, %rightw2
770dbbd9e4bSmacallan	fnop
771dbbd9e4bSmacallan	andn			%tmp1, 127, %tmp1
772dbbd9e4bSmacallan	add			%rightw2, %rightw, %rightw2
773dbbd9e4bSmacallan	fnop
774dbbd9e4bSmacallan	add			%tmp1, %fregset, %tmp1
775dbbd9e4bSmacallan	subcc			%w, %leftw, %tmp3
776dbbd9e4bSmacallan	fnop
777dbbd9e4bSmacallan	srl			%tmp1, 1, %tmp2
778dbbd9e4bSmacallan	add			%srcnext, 64, %tmp4
779dbbd9e4bSmacallan	fnop
780dbbd9e4bSmacallan	add			%tmp2, (prepn0 - BranchBase), %tmp2
781dbbd9e4bSmacallan	cmp			%mode, %leftw
782dbbd9e4bSmacallan	movgeu			%icc, %tmp2, %tmp1
783dbbd9e4bSmacallan	cmp			%tmp4, %srcend
784dbbd9e4bSmacallan	add			%dst, 64, %tmp4
785dbbd9e4bSmacallan	move			%ICC, %srcend, %srcnext
786dbbd9e4bSmacallan	movrnz			%leftw, %tmp4, %dst
787dbbd9e4bSmacallan	jmpl			%branchbase + %tmp1, %g0
788dbbd9e4bSmacallan	 sub			%tmp3, %rightw, %curw
789dbbd9e4bSmacallan
790dbbd9e4bSmacallanreturn: return			%i7+8
791731e20d8Smacallan#if defined(__NetBSD__)
792731e20d8Smacallan	 wr			%g0, 4, %fprs
793731e20d8Smacallan#else
794dbbd9e4bSmacallan	 wr			%g0, 0, %fprs
795731e20d8Smacallan#endif
796dbbd9e4bSmacallan
797dbbd9e4bSmacallannarrowst:
798dbbd9e4bSmacallan	and			%leftw, 0x38, %tmp1
799dbbd9e4bSmacallan	sub			%dst, %leftw, %tmp5
800dbbd9e4bSmacallan	sub			%w, 1, %fregset
801dbbd9e4bSmacallan	andcc			%leftw, 7, %tmp4
802dbbd9e4bSmacallan	add			%fregset, %tmp5, %fregset
803dbbd9e4bSmacallan	neg			%tmp1
804dbbd9e4bSmacallan	and			%fregset, 7, %srcend
805dbbd9e4bSmacallan	be,pn			%icc, 1f
806dbbd9e4bSmacallan	 edge8			%g0, %srcend, %srcend
807dbbd9e4bSmacallan	add			%tmp1, (2f - BranchBase - 8), %tmp1
808dbbd9e4bSmacallan	edge8			%tmp5, %fregset, %tmp2
809dbbd9e4bSmacallan	andn			%tmp5, 7, %tmp5
810dbbd9e4bSmacallan	andn			%fregset, 7, %fregset
811dbbd9e4bSmacallan	jmpl			%branchbase + %tmp1, %g0
812dbbd9e4bSmacallan	 sub			%w, %tmp4, %tmp4
813dbbd9e4bSmacallan1:	addcc			%branchbase, (3f - BranchBase - 8), %tmp2
814dbbd9e4bSmacallan	sll			%tmp1, 1, %tmp1
815dbbd9e4bSmacallan	andn			%fregset, 7, %fregset
816dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
817dbbd9e4bSmacallan	 mov			%w, %tmp4
818dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x00
819dbbd9e4bSmacallan	 stda			%f16, [%tmp5 + %tmp2] ASI_PST8_P
820dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x10
821dbbd9e4bSmacallan	 stda			%f18, [%tmp5 + %tmp2] ASI_PST8_P
822dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x20
823dbbd9e4bSmacallan	 stda			%f20, [%tmp5 + %tmp2] ASI_PST8_P
824dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x30
825dbbd9e4bSmacallan	 stda			%f22, [%tmp5 + %tmp2] ASI_PST8_P
826dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x40
827dbbd9e4bSmacallan	 stda			%f24, [%tmp5 + %tmp2] ASI_PST8_P
828dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x50
829dbbd9e4bSmacallan	 stda			%f26, [%tmp5 + %tmp2] ASI_PST8_P
830dbbd9e4bSmacallan	ba,pt			%xcc, 2f+0x60
831dbbd9e4bSmacallan	 stda			%f28, [%tmp5 + %tmp2] ASI_PST8_P
832dbbd9e4bSmacallan	ba,pt			%xcc, 4f
833dbbd9e4bSmacallan	 stda			%f30, [%tmp5 + %tmp2] ASI_PST8_P
834dbbd9e4bSmacallan2:	subcc			%tmp4, 8, %tmp4
835dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x00
836dbbd9e4bSmacallan	 fnop
837dbbd9e4bSmacallan	std			%f18, [%dst - 0x38]
838dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
839dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x10
840dbbd9e4bSmacallan	 fnop
841dbbd9e4bSmacallan	std			%f20, [%dst - 0x30]
842dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
843dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x20
844dbbd9e4bSmacallan	 fnop
845dbbd9e4bSmacallan	std			%f22, [%dst - 0x28]
846dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
847dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x30
848dbbd9e4bSmacallan	 fnop
849dbbd9e4bSmacallan	std			%f24, [%dst - 0x20]
850dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
851dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x40
852dbbd9e4bSmacallan	 fnop
853dbbd9e4bSmacallan	std			%f26, [%dst - 0x18]
854dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
855dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x50
856dbbd9e4bSmacallan	 fnop
857dbbd9e4bSmacallan	std			%f28, [%dst - 0x10]
858dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
859dbbd9e4bSmacallan	bl,pn			%icc, 3f+0x60
860dbbd9e4bSmacallan	 fnop
861dbbd9e4bSmacallan	std			%f30, [%dst - 0x08]
862dbbd9e4bSmacallan	ba,pt			%xcc, 4f
863dbbd9e4bSmacallan	 nop
864dbbd9e4bSmacallan3:	cmp			%tmp4, -8
865dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
866dbbd9e4bSmacallan	 stda			%f18, [%fregset + %srcend] ASI_PST8_P
867dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
868dbbd9e4bSmacallan	cmp			%tmp4, -8
869dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
870dbbd9e4bSmacallan	 stda			%f20, [%fregset + %srcend] ASI_PST8_P
871dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
872dbbd9e4bSmacallan	cmp			%tmp4, -8
873dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
874dbbd9e4bSmacallan	 stda			%f22, [%fregset + %srcend] ASI_PST8_P
875dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
876dbbd9e4bSmacallan	cmp			%tmp4, -8
877dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
878dbbd9e4bSmacallan	 stda			%f24, [%fregset + %srcend] ASI_PST8_P
879dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
880dbbd9e4bSmacallan	cmp			%tmp4, -8
881dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
882dbbd9e4bSmacallan	 stda			%f26, [%fregset + %srcend] ASI_PST8_P
883dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
884dbbd9e4bSmacallan	cmp			%tmp4, -8
885dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
886dbbd9e4bSmacallan	 stda			%f28, [%fregset + %srcend] ASI_PST8_P
887dbbd9e4bSmacallan	ba,a,pt			%xcc, 4f
888dbbd9e4bSmacallan	cmp			%tmp4, -8
889dbbd9e4bSmacallan	bg,a,pn			%ICC, 4f
890dbbd9e4bSmacallan	 stda			%f30, [%fregset + %srcend] ASI_PST8_P
891dbbd9e4bSmacallan4:	and			%tmp3, 60, %tmp3
892dbbd9e4bSmacallan	sub			%dst, 64, %dst
893dbbd9e4bSmacallan	add			%tmp3, 96, %tmp3
894dbbd9e4bSmacallan	add			%mode, %skind, %mode
895dbbd9e4bSmacallan	brz,pn			%h, return
896dbbd9e4bSmacallan	 and			%tmp3, 64, %fregset
897dbbd9e4bSmacallannarrow_cont:
898dbbd9e4bSmacallan	deccc			%h
899dbbd9e4bSmacallan	blu,pn			%ICC, return
900dbbd9e4bSmacallan	 sub			%mode, %dkind, %mode
901dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0
902dbbd9e4bSmacallan	and			%mode, 63, %mode
903dbbd9e4bSmacallan	sub			%srcnext, %srcn, %tmp4
904dbbd9e4bSmacallan	add			%dst, %dkind, %dst
905dbbd9e4bSmacallan	sub			%w, %rightw, %tmp1
906dbbd9e4bSmacallan	add			%srcstart, %skind, %srcstart
907dbbd9e4bSmacallan	sub			%dst, %tmp1, %dst
908dbbd9e4bSmacallan	add			%rightw, %dkind, %rightw
909dbbd9e4bSmacallan	sub			%leftw, %dkind, %leftw
910dbbd9e4bSmacallan	sub			%rightw, 1, %rightw
911dbbd9e4bSmacallan	and			%leftw, 63, %leftw
912dbbd9e4bSmacallan	and			%rightw, 63, %rightw
913dbbd9e4bSmacallan	add			%srcstart, %w, %srcend
914dbbd9e4bSmacallan	add			%rightw, 1, %rightw
915dbbd9e4bSmacallan	add			%srcstart, %skind, %srcnext
916dbbd9e4bSmacallan	add			%srcend, 63, %srcend
917dbbd9e4bSmacallan	andn			%srcnext, 63, %srcnext
918dbbd9e4bSmacallan	andn			%srcend, 63, %srcend
919dbbd9e4bSmacallan	sub			%w, %leftw, %prepw
920dbbd9e4bSmacallan	mov			%srcnext, %srcn
921dbbd9e4bSmacallan	add			%srcnext, 64, %tmp2
922dbbd9e4bSmacallan	sub			%srcnext, 64, %tmp3
923dbbd9e4bSmacallan	and			%srcstart, 63, %tmp1
924dbbd9e4bSmacallan	cmp			%skind, 0
925dbbd9e4bSmacallan	movg			%ICC, %srcend, %tmp3
926dbbd9e4bSmacallan	sub			%prepw, %rightw, %prepw
927dbbd9e4bSmacallan	add			%tmp1, %w, %tmp1
928dbbd9e4bSmacallan	cmp			%tmp2, %srcend
929dbbd9e4bSmacallan	mov			%prepw, %curw
930dbbd9e4bSmacallan	move			%ICC, %tmp3, %srcnext
931dbbd9e4bSmacallan	mov			%h, %srcnotdone
932dbbd9e4bSmacallan	cmp			%tmp1, 128
933dbbd9e4bSmacallan	movg			%ICC, 64, %prepw
934dbbd9e4bSmacallan	andn			%dst, 63, %dst
935dbbd9e4bSmacallan	cmp			%srcnext, %srcstop
936dbbd9e4bSmacallan	move			%ICC, 0, %srcnotdone
937dbbd9e4bSmacallan	brnz,a,pn		%tmp4, roll_narrow
938dbbd9e4bSmacallan	 xor			%fregset, 64, %fregset
939dbbd9e4bSmacallan	cmp			%tmp1, 64
940dbbd9e4bSmacallan	bg,pt			%ICC, 5f
941dbbd9e4bSmacallan	 tst			%srcnotdone
942dbbd9e4bSmacallan	be,a,pn			%ICC, roll_narrow
943dbbd9e4bSmacallan	 membar			SYNC
944dbbd9e4bSmacallan	mov			%srcnext, %src
945dbbd9e4bSmacallan5:	brnz,a,pn		%fregset, 6f
946dbbd9e4bSmacallan	 ldda			[%src] ASI_BLK_P, %f32
947dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48
948dbbd9e4bSmacallan6:	addcc			%src, 64, %src
949dbbd9e4bSmacallanroll_narrow:
950dbbd9e4bSmacallan	fnop
951dbbd9e4bSmacallan	sll			%mode, 4, %tmp2
952dbbd9e4bSmacallan	and			%mode, 7, %rightw2
953dbbd9e4bSmacallan	fnop
954dbbd9e4bSmacallan	andn			%tmp2, 127, %tmp2
955dbbd9e4bSmacallan	add			%dst, 64, %tmp4
956dbbd9e4bSmacallan	fnop
957dbbd9e4bSmacallan	add			%tmp2, %fregset, %tmp2
958dbbd9e4bSmacallan	addcc			%rightw2, %rightw, %rightw2
959dbbd9e4bSmacallan	fnop
960dbbd9e4bSmacallan	srl			%tmp2, 1, %tmp3
961dbbd9e4bSmacallan	movrnz			%leftw, %tmp4, %dst
962dbbd9e4bSmacallan	fnop
963dbbd9e4bSmacallan	add			%tmp3, (prepr0 - BranchBase), %tmp4
964dbbd9e4bSmacallan	cmp			%tmp1, 64
965dbbd9e4bSmacallan	movleu			%icc, %tmp4, %tmp2
966dbbd9e4bSmacallan	fnop
967dbbd9e4bSmacallan	add			%tmp3, (prepn0 - BranchBase), %tmp4
968dbbd9e4bSmacallan	cmp			%mode, %leftw
969dbbd9e4bSmacallan	fnop
970dbbd9e4bSmacallan	movgeu			%icc, %tmp4, %tmp2
971dbbd9e4bSmacallan	jmpl			%branchbase + %tmp2, %g0
972dbbd9e4bSmacallan	 and			%tmp3, 32, %tmp3
973dbbd9e4bSmacallan
974dbbd9e4bSmacallan#define VISEND(i,tgt,tgtr,fi,fset)				\
975dbbd9e4bSmacallantgt:								\
976dbbd9e4bSmacallan	brnz,pt			%curw, 1f;			\
977dbbd9e4bSmacallan	 cmp			%rightw2, (64 - (i&14)*4);	\
978dbbd9e4bSmacallan	bgu,pn			%icc, tgtr - 8;			\
979dbbd9e4bSmacallan	 tst			%srcnotdone;			\
980dbbd9e4bSmacallan	be,a,pn			%ICC, tgtr;			\
981dbbd9e4bSmacallan	 membar			SYNC;				\
982dbbd9e4bSmacallan	ba,pt			%xcc, tgtr - 8;			\
983dbbd9e4bSmacallan	 mov			%srcnext, %src;			\
984dbbd9e4bSmacallan1:	mov			((i & 1) ^ 1) * 64, %fregset;	\
985dbbd9e4bSmacallan	bleu,pn			%icc, rightst;			\
986dbbd9e4bSmacallan	 faligndata		%ftmp1, %ftmp2, %fi;		\
987dbbd9e4bSmacallan	brz,a,pn		%srcnotdone, 3f;		\
988dbbd9e4bSmacallan	 membar			SYNC;				\
989dbbd9e4bSmacallan	ldda			[%srcnext] ASI_BLK_P, %fset;	\
990dbbd9e4bSmacallan	add			%srcnext, 64, %src;		\
991dbbd9e4bSmacallan3:	mov			(i & 1) * 64, %fregset;
992dbbd9e4bSmacallan
993dbbd9e4bSmacallanVISEND(0,vis0e,vis0r,f30,f32)
994dbbd9e4bSmacallan	ba,pt			%xcc, rightst
995dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f30
996dbbd9e4bSmacallanVISEND(1,vis1e,vis1r,f30,f48)
997dbbd9e4bSmacallan	ba,pt			%xcc, rightst
998dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f30
999dbbd9e4bSmacallanVISEND(2,vis2e,vis2r,f28,f32)
1000dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f28
1001dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1002dbbd9e4bSmacallan	 faligndata		%f48, %f50, %f30
1003dbbd9e4bSmacallanVISEND(3,vis3e,vis3r,f28,f48)
1004dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f28
1005dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1006dbbd9e4bSmacallan	 faligndata		%f32, %f34, %f30
1007dbbd9e4bSmacallanVISEND(4,vis4e,vis4r,f26,f32)
1008dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f26
1009dbbd9e4bSmacallan	faligndata		%f48, %f50, %f28
1010dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1011dbbd9e4bSmacallan	 faligndata		%f50, %f52, %f30
1012dbbd9e4bSmacallanVISEND(5,vis5e,vis5r,f26,f48)
1013dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f26
1014dbbd9e4bSmacallan	faligndata		%f32, %f34, %f28
1015dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1016dbbd9e4bSmacallan	 faligndata		%f34, %f36, %f30
1017dbbd9e4bSmacallanVISEND(6,vis6e,vis6r,f24,f32)
1018dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f24
1019dbbd9e4bSmacallan	faligndata		%f48, %f50, %f26
1020dbbd9e4bSmacallan	faligndata		%f50, %f52, %f28
1021dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1022dbbd9e4bSmacallan	 faligndata		%f52, %f54, %f30
1023dbbd9e4bSmacallanVISEND(7,vis7e,vis7r,f24,f48)
1024dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f24
1025dbbd9e4bSmacallan	faligndata		%f32, %f34, %f26
1026dbbd9e4bSmacallan	faligndata		%f34, %f36, %f28
1027dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1028dbbd9e4bSmacallan	 faligndata		%f36, %f38, %f30
1029dbbd9e4bSmacallanVISEND(8,vis8e,vis8r,f22,f32)
1030dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f22
1031dbbd9e4bSmacallan	faligndata		%f48, %f50, %f24
1032dbbd9e4bSmacallan	faligndata		%f50, %f52, %f26
1033dbbd9e4bSmacallan	faligndata		%f52, %f54, %f28
1034dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1035dbbd9e4bSmacallan	 faligndata		%f54, %f56, %f30
1036dbbd9e4bSmacallanVISEND(9,vis9e,vis9r,f22,f48)
1037dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f22
1038dbbd9e4bSmacallan	faligndata		%f32, %f34, %f24
1039dbbd9e4bSmacallan	faligndata		%f34, %f36, %f26
1040dbbd9e4bSmacallan	faligndata		%f36, %f38, %f28
1041dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1042dbbd9e4bSmacallan	 faligndata		%f38, %f40, %f30
1043dbbd9e4bSmacallanVISEND(10,vis10e,vis10r,f20,f32)
1044dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f20
1045dbbd9e4bSmacallan	faligndata		%f48, %f50, %f22
1046dbbd9e4bSmacallan	faligndata		%f50, %f52, %f24
1047dbbd9e4bSmacallan	faligndata		%f52, %f54, %f26
1048dbbd9e4bSmacallan	faligndata		%f54, %f56, %f28
1049dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1050dbbd9e4bSmacallan	 faligndata		%f56, %f58, %f30
1051dbbd9e4bSmacallanVISEND(11,vis11e,vis11r,f20,f48)
1052dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f20
1053dbbd9e4bSmacallan	faligndata		%f32, %f34, %f22
1054dbbd9e4bSmacallan	faligndata		%f34, %f36, %f24
1055dbbd9e4bSmacallan	faligndata		%f36, %f38, %f26
1056dbbd9e4bSmacallan	faligndata		%f38, %f40, %f28
1057dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1058dbbd9e4bSmacallan	 faligndata		%f40, %f42, %f30
1059dbbd9e4bSmacallanVISEND(12,vis12e,vis12r,f18,f32)
1060dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f18
1061dbbd9e4bSmacallan	faligndata		%f48, %f50, %f20
1062dbbd9e4bSmacallan	faligndata		%f50, %f52, %f22
1063dbbd9e4bSmacallan	faligndata		%f52, %f54, %f24
1064dbbd9e4bSmacallan	faligndata		%f54, %f56, %f26
1065dbbd9e4bSmacallan	faligndata		%f56, %f58, %f28
1066dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1067dbbd9e4bSmacallan	 faligndata		%f58, %f60, %f30
1068dbbd9e4bSmacallanVISEND(13,vis13e,vis13r,f18,f48)
1069dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f18
1070dbbd9e4bSmacallan	faligndata		%f32, %f34, %f20
1071dbbd9e4bSmacallan	faligndata		%f34, %f36, %f22
1072dbbd9e4bSmacallan	faligndata		%f36, %f38, %f24
1073dbbd9e4bSmacallan	faligndata		%f38, %f40, %f26
1074dbbd9e4bSmacallan	faligndata		%f40, %f42, %f28
1075dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1076dbbd9e4bSmacallan	 faligndata		%f42, %f44, %f30
1077dbbd9e4bSmacallanVISEND(14,vis14e,vis14r,f16,f32)
1078dbbd9e4bSmacallan	faligndata		%ftmp1, %f48, %f16
1079dbbd9e4bSmacallan	faligndata		%f48, %f50, %f18
1080dbbd9e4bSmacallan	faligndata		%f50, %f52, %f20
1081dbbd9e4bSmacallan	faligndata		%f52, %f54, %f22
1082dbbd9e4bSmacallan	faligndata		%f54, %f56, %f24
1083dbbd9e4bSmacallan	faligndata		%f56, %f58, %f26
1084dbbd9e4bSmacallan	faligndata		%f58, %f60, %f28
1085dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1086dbbd9e4bSmacallan	 faligndata		%f60, %f62, %f30
1087dbbd9e4bSmacallanVISEND(15,vis15e,vis15r,f16,f48)
1088dbbd9e4bSmacallan	faligndata		%ftmp1, %f32, %f16
1089dbbd9e4bSmacallan	faligndata		%f32, %f34, %f18
1090dbbd9e4bSmacallan	faligndata		%f34, %f36, %f20
1091dbbd9e4bSmacallan	faligndata		%f36, %f38, %f22
1092dbbd9e4bSmacallan	faligndata		%f38, %f40, %f24
1093dbbd9e4bSmacallan	faligndata		%f40, %f42, %f26
1094dbbd9e4bSmacallan	faligndata		%f42, %f44, %f28
1095dbbd9e4bSmacallan	ba,pt			%xcc, rightst
1096dbbd9e4bSmacallan	 faligndata		%f44, %f46, %f30
1097dbbd9e4bSmacallan
1098dbbd9e4bSmacallan#define PREPEND(tgt,tgtr)					\
1099dbbd9e4bSmacallantgt:								\
1100dbbd9e4bSmacallan	brnz,pt			%srcnotdone, tgtr - 8;		\
1101dbbd9e4bSmacallan	 mov			%srcnext, %src;			\
1102dbbd9e4bSmacallan	ba,pt			%xcc, tgtr;			\
1103dbbd9e4bSmacallan	 membar			SYNC;
1104dbbd9e4bSmacallan
1105dbbd9e4bSmacallan	.align			16
1106dbbd9e4bSmacallanPREPEND(prep0e,prep0r)
1107dbbd9e4bSmacallanPREPEND(prep1e,prep1r)
1108dbbd9e4bSmacallanPREPEND(prep2e,prep2r)
1109dbbd9e4bSmacallanPREPEND(prep3e,prep3r)
1110dbbd9e4bSmacallanPREPEND(prep4e,prep4r)
1111dbbd9e4bSmacallanPREPEND(prep5e,prep5r)
1112dbbd9e4bSmacallanPREPEND(prep6e,prep6r)
1113dbbd9e4bSmacallanPREPEND(prep7e,prep7r)
1114dbbd9e4bSmacallanPREPEND(prep8e,prep8r)
1115dbbd9e4bSmacallanPREPEND(prep9e,prep9r)
1116dbbd9e4bSmacallanPREPEND(prep10e,prep10r)
1117dbbd9e4bSmacallanPREPEND(prep11e,prep11r)
1118dbbd9e4bSmacallanPREPEND(prep12e,prep12r)
1119dbbd9e4bSmacallanPREPEND(prep13e,prep13r)
1120dbbd9e4bSmacallanPREPEND(prep14e,prep14r)
1121dbbd9e4bSmacallanPREPEND(prep15e,prep15r)
1122dbbd9e4bSmacallan
1123dbbd9e4bSmacallan/* The other way is even more complicated, as the hardware prefers
1124dbbd9e4bSmacallan * going from lower addresses up.  */
1125dbbd9e4bSmacallan
1126dbbd9e4bSmacallan#define RLOAD(f32,tgt,tgtr)							\
1127dbbd9e4bSmacallan	subcc			%curw, 64, %curw;				\
1128dbbd9e4bSmacallan	bleu,pn			%ICC, tgt;					\
1129dbbd9e4bSmacallan	 fmovd			%f32, %ftmp1;					\
1130dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32;				\
1131dbbd9e4bSmacallan	sub			%src, 64, %src;					\
1132dbbd9e4bSmacallantgtr:
1133dbbd9e4bSmacallan
1134dbbd9e4bSmacallan#if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__) || !defined(SIGNAL_CLOBBERS_OG_REGS)
1135dbbd9e4bSmacallan
1136dbbd9e4bSmacallan#define RPREPLOAD(f32,tgt,tgtr)							\
1137dbbd9e4bSmacallan	brlez,pn		%prepw, tgt;					\
1138dbbd9e4bSmacallan	 fmovd			%f32, %ftmp1;					\
1139dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32;				\
1140dbbd9e4bSmacallan	sub			%src, 64, %src;					\
1141dbbd9e4bSmacallantgtr:
1142dbbd9e4bSmacallan
1143dbbd9e4bSmacallan#else
1144dbbd9e4bSmacallan
1145dbbd9e4bSmacallan#define RPREPLOAD(f32,tgt,tgtr)							\
1146dbbd9e4bSmacallan	tst			%prepw;						\
1147dbbd9e4bSmacallan	ble,pn			%ICC, tgt;					\
1148dbbd9e4bSmacallan	 fmovd			%f32, %ftmp1;					\
1149dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32;				\
1150dbbd9e4bSmacallan	sub			%src, 64, %src;					\
1151dbbd9e4bSmacallantgtr:
1152dbbd9e4bSmacallan
1153dbbd9e4bSmacallan#endif
1154dbbd9e4bSmacallan
1155dbbd9e4bSmacallan	.globl			VISmoveImageRL
1156dbbd9e4bSmacallan	.align			32
1157dbbd9e4bSmacallanVISmoveImageRL:
1158731e20d8Smacallan#ifdef __arch64__
1159731e20d8Smacallan	save			%sp, -192, %sp				! Group 0
1160731e20d8Smacallan#else
1161731e20d8Smacallan	save			%sp, -96, %sp				! Group 0
1162731e20d8Smacallan#endif
1163dbbd9e4bSmacallan0:	rd			%pc, %tmp3				! Group 1
1164dbbd9e4bSmacallan	and			%dst, 63, %leftw			! Group 7
1165dbbd9e4bSmacallan	mov			64, %tmp1
1166dbbd9e4bSmacallan	add			%src, %w, %src				! Group 8
1167dbbd9e4bSmacallan	add			%dst, %w, %dst
1168dbbd9e4bSmacallan	sub			%src, %dst, %mode			! Group 9
1169dbbd9e4bSmacallan	brz,pn			%h, rreturn
1170dbbd9e4bSmacallan	 sub			%dst, 1, %rightw			! Group 10
1171dbbd9e4bSmacallan	mov			%src, %srcstart
1172dbbd9e4bSmacallan	dec			%src					! Group 11
1173dbbd9e4bSmacallan	cmp			%w, 128
1174dbbd9e4bSmacallan	blu,pn			%ICC, rprepare_narrow
1175dbbd9e4bSmacallan	 and			%mode, 63, %mode			! Group 12
1176dbbd9e4bSmacallan	andn			%src, 63, %src
1177dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32			! Group 13
1178dbbd9e4bSmacallan	sub			%src, 64, %src				! Group 14
1179dbbd9e4bSmacallan	clr			%narrow
1180dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48			! Group 15
1181dbbd9e4bSmacallan	sub			%tmp1, %leftw, %leftw			! Group 16
1182dbbd9e4bSmacallan	and			%rightw, 63, %rightw
1183dbbd9e4bSmacallan	dec			%dst					! Group 17
1184dbbd9e4bSmacallan	add			%rightw, 1, %rightw
1185dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0				! Group 18
1186dbbd9e4bSmacallan	andn			%dst, 63, %dst				! Group 19
1187dbbd9e4bSmacallan	clr			%fregset
1188dbbd9e4bSmacallan	sub			%src, 64, %src				! Group 20
1189dbbd9e4bSmacallan	add			%tmp3, (BranchBaseR - 0b), %branchbase
1190dbbd9e4bSmacallan	mov			64, %prepw				! Group 21
1191dbbd9e4bSmacallan	ba,pt			%xcc, rroll_wide
1192dbbd9e4bSmacallan	 sub			%h, 1, %srcnotdone
1193dbbd9e4bSmacallanrprepare_narrow:
1194dbbd9e4bSmacallan#if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__)
1195dbbd9e4bSmacallan	brlez,pn		%w, rreturn
1196dbbd9e4bSmacallan#else
1197dbbd9e4bSmacallan	tst			%w
1198dbbd9e4bSmacallan	ble,pn			%icc, rreturn
1199dbbd9e4bSmacallan#endif
1200dbbd9e4bSmacallan	 andn			%src, 63, %src				! Group 12
1201dbbd9e4bSmacallan	sub			%tmp1, %leftw, %leftw
1202dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f32			! Group 13
1203dbbd9e4bSmacallan	and			%rightw, 63, %rightw			! Group 14
1204dbbd9e4bSmacallan	dec			%dst
1205dbbd9e4bSmacallan	add			%rightw, 1, %rightw			! Group 15
1206dbbd9e4bSmacallan	andn			%dst, 63, %dst
1207dbbd9e4bSmacallan	sub			%w, %leftw, %tmp2			! Group 16
1208dbbd9e4bSmacallan	sub			%srcstart, 1, %tmp1
1209dbbd9e4bSmacallan	subcc			%tmp2, %rightw, %curw			! Group 17
1210dbbd9e4bSmacallan	and			%tmp1, 63, %tmp1
1211dbbd9e4bSmacallan	mov			%curw, %prepw				! Group 18
1212dbbd9e4bSmacallan	sub			%tmp1, %w, %tmp1
1213dbbd9e4bSmacallan	bg,pt			%ICC, 1f
1214dbbd9e4bSmacallan	 sub			%src, 64, %src				! Group 19
1215dbbd9e4bSmacallan	cmp			%tmp1, -65
1216dbbd9e4bSmacallan	movl			%ICC, 64, %prepw			! Group 20
1217dbbd9e4bSmacallan1:	add			%srcstart, %skind, %srcnext
1218dbbd9e4bSmacallan	sub			%h, 1, %srcnotdone			! Group 21
1219dbbd9e4bSmacallan	sub			%srcstart, %w, %tmp4
1220dbbd9e4bSmacallan	sub			%srcnext, 1, %srcnext			! Group 22
1221dbbd9e4bSmacallan	andn			%tmp4, 63, %srcend
1222dbbd9e4bSmacallan	andn			%srcnext, 63, %srcnext			! Group 23
1223dbbd9e4bSmacallan	cmp			%skind, 0
1224dbbd9e4bSmacallan	add			%w, 64, %tmp2				! Group 24
1225dbbd9e4bSmacallan	mov			%srcnext, %srcn
1226dbbd9e4bSmacallan	bl,pn			%ICC, 1f
1227dbbd9e4bSmacallan	 clr			%srcstop				! Group 25
1228dbbd9e4bSmacallan	cmp			%skind, %tmp2
1229dbbd9e4bSmacallan	bgeu,pt			%ICC, 3f
1230dbbd9e4bSmacallan	 add			%srcnext, 64, %fregset
1231dbbd9e4bSmacallan	SMUL			%skind, %srcnotdone, %srcstop
1232dbbd9e4bSmacallan	ba,pt			%xcc, 2f
1233dbbd9e4bSmacallan	 add			%srcstart, 63, %tmp4
1234dbbd9e4bSmacallan1:	addcc			%tmp2, %skind, %g0
1235dbbd9e4bSmacallan	ble,pt			%ICC, 3f
1236dbbd9e4bSmacallan	 sub			%srcnext, 64, %fregset
1237dbbd9e4bSmacallan	SMUL			%skind, %srcnotdone, %srcstop
1238dbbd9e4bSmacallan	sub			%tmp4, 64, %tmp4
1239dbbd9e4bSmacallan2:	add			%srcstop, %tmp4, %srcstop
1240dbbd9e4bSmacallan3:	cmp			%srcnext, %srcend
1241dbbd9e4bSmacallan	andn			%srcstop, 63, %srcstop
1242dbbd9e4bSmacallan	move			%ICC, %fregset, %srcnext
1243dbbd9e4bSmacallan	cmp			%srcnext, %srcstop
1244dbbd9e4bSmacallan	clr			%fregset
1245dbbd9e4bSmacallan	move			%ICC, 0, %srcnotdone
1246dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0
1247dbbd9e4bSmacallan	add			%tmp3, (BranchBaseR - 0b), %branchbase
1248dbbd9e4bSmacallan	mov			1, %narrow
1249dbbd9e4bSmacallan	cmp			%tmp1, -1
1250dbbd9e4bSmacallan	bge,pn			%ICC, 1f
1251dbbd9e4bSmacallan	 dec			%h
1252dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48
1253dbbd9e4bSmacallan	ba,pt			%xcc, rroll_narrow
1254dbbd9e4bSmacallan	 subcc			%src, 64, %src
1255dbbd9e4bSmacallan1:	brz,a,pn		%srcnotdone, rroll_narrow
1256dbbd9e4bSmacallan	 membar			#Sync
1257dbbd9e4bSmacallan	ldda			[%srcnext] ASI_BLK_P, %f48
1258dbbd9e4bSmacallan	ba,pt			%xcc, rroll_narrow
1259dbbd9e4bSmacallan	 subcc			%srcnext, 64, %src
1260dbbd9e4bSmacallan
1261dbbd9e4bSmacallan	.align			32
1262dbbd9e4bSmacallanrrepr0:	ba,pt			%xcc, narrowstr
1263dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f30
1264dbbd9e4bSmacallan	nop
1265dbbd9e4bSmacallan	nop
1266dbbd9e4bSmacallan1:	ba,pt			%xcc, narrowstr
1267dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f16
1268dbbd9e4bSmacallan	.align			32
1269dbbd9e4bSmacallanrrepr1:	ba,pt			%xcc, narrowstr
1270dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f30
1271dbbd9e4bSmacallan	nop
1272dbbd9e4bSmacallan	nop
1273dbbd9e4bSmacallan2:	ba,pt			%xcc, narrowstr
1274dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f16
1275dbbd9e4bSmacallan	.align			32
1276dbbd9e4bSmacallanrrepr2:	faligndata		%f32, %f34, %f30
1277dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1278dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f28
1279dbbd9e4bSmacallan	.align			32
1280dbbd9e4bSmacallanrrepr3:	faligndata		%f48, %f50, %f30
1281dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1282dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f28
1283dbbd9e4bSmacallan	.align			32
1284dbbd9e4bSmacallanrrepr4:	faligndata		%f34, %f36, %f30
1285dbbd9e4bSmacallan	faligndata		%f32, %f34, %f28
1286dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1287dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f26
1288dbbd9e4bSmacallan	.align			32
1289dbbd9e4bSmacallanrrepr5: faligndata		%f50, %f52, %f30
1290dbbd9e4bSmacallan	faligndata		%f48, %f50, %f28
1291dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1292dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f26
1293dbbd9e4bSmacallan	.align			32
1294dbbd9e4bSmacallanrrepr6:	faligndata		%f36, %f38, %f30
1295dbbd9e4bSmacallan	faligndata		%f34, %f36, %f28
1296dbbd9e4bSmacallan	faligndata		%f32, %f34, %f26
1297dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1298dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f24
1299dbbd9e4bSmacallan	.align			32
1300dbbd9e4bSmacallanrrepr7: faligndata		%f52, %f54, %f30
1301dbbd9e4bSmacallan	faligndata		%f50, %f52, %f28
1302dbbd9e4bSmacallan	faligndata		%f48, %f50, %f26
1303dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1304dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f24
1305dbbd9e4bSmacallan	.align			32
1306dbbd9e4bSmacallanrrepr8:	faligndata		%f38, %f40, %f30
1307dbbd9e4bSmacallan	faligndata		%f36, %f38, %f28
1308dbbd9e4bSmacallan	faligndata		%f34, %f36, %f26
1309dbbd9e4bSmacallan	faligndata		%f32, %f34, %f24
1310dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1311dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f22
1312dbbd9e4bSmacallan	.align			32
1313dbbd9e4bSmacallanrrepr9: faligndata		%f54, %f56, %f30
1314dbbd9e4bSmacallan	faligndata		%f52, %f54, %f28
1315dbbd9e4bSmacallan	faligndata		%f50, %f52, %f26
1316dbbd9e4bSmacallan	faligndata		%f48, %f50, %f24
1317dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1318dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f22
1319dbbd9e4bSmacallan	.align			32
1320dbbd9e4bSmacallanrrepr10:faligndata		%f40, %f42, %f30
1321dbbd9e4bSmacallan	faligndata		%f38, %f40, %f28
1322dbbd9e4bSmacallan	faligndata		%f36, %f38, %f26
1323dbbd9e4bSmacallan	faligndata		%f34, %f36, %f24
1324dbbd9e4bSmacallan	faligndata		%f32, %f34, %f22
1325dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1326dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f20
1327dbbd9e4bSmacallan	.align			32
1328dbbd9e4bSmacallanrrepr11:faligndata		%f56, %f58, %f30
1329dbbd9e4bSmacallan	faligndata		%f54, %f56, %f28
1330dbbd9e4bSmacallan	faligndata		%f52, %f54, %f26
1331dbbd9e4bSmacallan	faligndata		%f50, %f52, %f24
1332dbbd9e4bSmacallan	faligndata		%f48, %f50, %f22
1333dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1334dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f20
1335dbbd9e4bSmacallan	.align			32
1336dbbd9e4bSmacallanrrepr12:faligndata		%f42, %f44, %f30
1337dbbd9e4bSmacallan	faligndata		%f40, %f42, %f28
1338dbbd9e4bSmacallan	faligndata		%f38, %f40, %f26
1339dbbd9e4bSmacallan	faligndata		%f36, %f38, %f24
1340dbbd9e4bSmacallan	faligndata		%f34, %f36, %f22
1341dbbd9e4bSmacallan	faligndata		%f32, %f34, %f20
1342dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1343dbbd9e4bSmacallan	 faligndata		%ftmp1, %f32, %f18
1344dbbd9e4bSmacallan	.align			32
1345dbbd9e4bSmacallanrrepr13:faligndata		%f58, %f60, %f30
1346dbbd9e4bSmacallan	faligndata		%f56, %f58, %f28
1347dbbd9e4bSmacallan	faligndata		%f54, %f56, %f26
1348dbbd9e4bSmacallan	faligndata		%f52, %f54, %f24
1349dbbd9e4bSmacallan	faligndata		%f50, %f52, %f22
1350dbbd9e4bSmacallan	faligndata		%f48, %f50, %f20
1351dbbd9e4bSmacallan	ba,pt			%xcc, narrowstr
1352dbbd9e4bSmacallan	 faligndata		%ftmp1, %f48, %f18
1353dbbd9e4bSmacallan	.align			32
1354dbbd9e4bSmacallanrrepr14:faligndata		%f44, %f46, %f30
1355dbbd9e4bSmacallan	faligndata		%f42, %f44, %f28
1356dbbd9e4bSmacallan	faligndata		%f40, %f42, %f26
1357dbbd9e4bSmacallan	faligndata		%f38, %f40, %f24
1358dbbd9e4bSmacallan	faligndata		%f36, %f38, %f22
1359dbbd9e4bSmacallan	faligndata		%f34, %f36, %f20
1360dbbd9e4bSmacallan	ba,pt			%xcc, 1b
1361dbbd9e4bSmacallan	 faligndata		%f32, %f34, %f18
1362dbbd9e4bSmacallan	.align			32
1363dbbd9e4bSmacallanrrepr15:faligndata		%f60, %f62, %f30
1364dbbd9e4bSmacallan	faligndata		%f58, %f60, %f28
1365dbbd9e4bSmacallan	faligndata		%f56, %f58, %f26
1366dbbd9e4bSmacallan	faligndata		%f54, %f56, %f24
1367dbbd9e4bSmacallan	faligndata		%f52, %f54, %f22
1368dbbd9e4bSmacallan	faligndata		%f50, %f52, %f20
1369dbbd9e4bSmacallan	ba,pt			%xcc, 2b
1370dbbd9e4bSmacallan	 faligndata		%f48, %f50, %f18
1371dbbd9e4bSmacallan
1372dbbd9e4bSmacallan	.align			32
1373dbbd9e4bSmacallanrrepn0:	faligndata		%f46, %ftmp1, %f30
1374dbbd9e4bSmacallan	faligndata		%f44, %f46, %f28
1375dbbd9e4bSmacallan	faligndata		%f42, %f44, %f26
1376dbbd9e4bSmacallan	faligndata		%f40, %f42, %f24
1377dbbd9e4bSmacallan	faligndata		%f38, %f40, %f22
1378dbbd9e4bSmacallan	faligndata		%f36, %f38, %f20
1379dbbd9e4bSmacallan	ba,pt			%xcc, 1f
1380dbbd9e4bSmacallan	 faligndata		%f34, %f36, %f18
1381dbbd9e4bSmacallan	.align			32
1382dbbd9e4bSmacallanrrepn1:	faligndata		%f62, %ftmp1, %f30
1383dbbd9e4bSmacallan	faligndata		%f60, %f62, %f28
1384dbbd9e4bSmacallan	faligndata		%f58, %f60, %f26
1385dbbd9e4bSmacallan	faligndata		%f56, %f58, %f24
1386dbbd9e4bSmacallan	faligndata		%f54, %f56, %f22
1387dbbd9e4bSmacallan	faligndata		%f52, %f54, %f20
1388dbbd9e4bSmacallan	ba,pt			%xcc, 2f
1389dbbd9e4bSmacallan	 faligndata		%f50, %f52, %f18
1390dbbd9e4bSmacallan	.align			32
1391dbbd9e4bSmacallanrrepn2:	faligndata		%f46, %ftmp1, %f28
1392dbbd9e4bSmacallan	faligndata		%f44, %f46, %f26
1393dbbd9e4bSmacallan	faligndata		%f42, %f44, %f24
1394dbbd9e4bSmacallan	faligndata		%f40, %f42, %f22
1395dbbd9e4bSmacallan	faligndata		%f38, %f40, %f20
1396dbbd9e4bSmacallan	faligndata		%f36, %f38, %f18
1397dbbd9e4bSmacallan	ba,pt			%xcc, 3f
1398dbbd9e4bSmacallan	 faligndata		%f34, %f36, %f16
1399dbbd9e4bSmacallan	.align			32
1400dbbd9e4bSmacallanrrepn3:	faligndata		%f62, %ftmp1, %f28
1401dbbd9e4bSmacallan	faligndata		%f60, %f62, %f26
1402dbbd9e4bSmacallan	faligndata		%f58, %f60, %f24
1403dbbd9e4bSmacallan	faligndata		%f56, %f58, %f22
1404dbbd9e4bSmacallan	faligndata		%f54, %f56, %f20
1405dbbd9e4bSmacallan	faligndata		%f52, %f54, %f18
1406dbbd9e4bSmacallan	ba,pt			%xcc, 4f
1407dbbd9e4bSmacallan	 faligndata		%f50, %f52, %f16
1408dbbd9e4bSmacallan	.align			32
1409dbbd9e4bSmacallanrrepn4:	faligndata		%f46, %ftmp1, %f26
1410dbbd9e4bSmacallan	faligndata		%f44, %f46, %f24
1411dbbd9e4bSmacallan	faligndata		%f42, %f44, %f22
1412dbbd9e4bSmacallan	faligndata		%f40, %f42, %f20
1413dbbd9e4bSmacallan	faligndata		%f38, %f40, %f18
1414dbbd9e4bSmacallan	faligndata		%f36, %f38, %f16
1415dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1416dbbd9e4bSmacallan	 mov			(rvis4 - BranchBaseR), %tmp3
1417dbbd9e4bSmacallan	.align			32
1418dbbd9e4bSmacallanrrepn5:	faligndata		%f62, %ftmp1, %f26
1419dbbd9e4bSmacallan	faligndata		%f60, %f62, %f24
1420dbbd9e4bSmacallan	faligndata		%f58, %f60, %f22
1421dbbd9e4bSmacallan	faligndata		%f56, %f58, %f20
1422dbbd9e4bSmacallan	faligndata		%f54, %f56, %f18
1423dbbd9e4bSmacallan	faligndata		%f52, %f54, %f16
1424dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1425dbbd9e4bSmacallan	 mov			(rvis5 - BranchBaseR), %tmp3
1426dbbd9e4bSmacallan	.align			32
1427dbbd9e4bSmacallanrrepn6:	faligndata		%f46, %ftmp1, %f24
1428dbbd9e4bSmacallan	faligndata		%f44, %f46, %f22
1429dbbd9e4bSmacallan	faligndata		%f42, %f44, %f20
1430dbbd9e4bSmacallan	faligndata		%f40, %f42, %f18
1431dbbd9e4bSmacallan	faligndata		%f38, %f40, %f16
1432dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1433dbbd9e4bSmacallan	 mov			(rvis6 - BranchBaseR), %tmp3
1434dbbd9e4bSmacallan	.align			32
1435dbbd9e4bSmacallanrrepn7:	faligndata		%f62, %ftmp1, %f24
1436dbbd9e4bSmacallan	faligndata		%f60, %f62, %f22
1437dbbd9e4bSmacallan	faligndata		%f58, %f60, %f20
1438dbbd9e4bSmacallan	faligndata		%f56, %f58, %f18
1439dbbd9e4bSmacallan	faligndata		%f54, %f56, %f16
1440dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1441dbbd9e4bSmacallan	 mov			(rvis7 - BranchBaseR), %tmp3
1442dbbd9e4bSmacallan	.align			32
1443dbbd9e4bSmacallanrrepn8:	faligndata		%f46, %ftmp1, %f22
1444dbbd9e4bSmacallan	faligndata		%f44, %f46, %f20
1445dbbd9e4bSmacallan	faligndata		%f42, %f44, %f18
1446dbbd9e4bSmacallan	faligndata		%f40, %f42, %f16
1447dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1448dbbd9e4bSmacallan	 mov			(rvis8 - BranchBaseR), %tmp3
1449dbbd9e4bSmacallan	.align			32
1450dbbd9e4bSmacallanrrepn9:	faligndata		%f62, %ftmp1, %f22
1451dbbd9e4bSmacallan	faligndata		%f60, %f62, %f20
1452dbbd9e4bSmacallan	faligndata		%f58, %f60, %f18
1453dbbd9e4bSmacallan	faligndata		%f56, %f58, %f16
1454dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1455dbbd9e4bSmacallan	 mov			(rvis9 - BranchBaseR), %tmp3
1456dbbd9e4bSmacallan	.align			32
1457dbbd9e4bSmacallanrrepn10:faligndata		%f46, %ftmp1, %f20
1458dbbd9e4bSmacallan	faligndata		%f44, %f46, %f18
1459dbbd9e4bSmacallan	faligndata		%f42, %f44, %f16
1460dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1461dbbd9e4bSmacallan	 mov			(rvis10 - BranchBaseR), %tmp3
1462dbbd9e4bSmacallan	.align			32
1463dbbd9e4bSmacallanrrepn11:faligndata		%f62, %ftmp1, %f20
1464dbbd9e4bSmacallan	faligndata		%f60, %f62, %f18
1465dbbd9e4bSmacallan	faligndata		%f58, %f60, %f16
1466dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1467dbbd9e4bSmacallan	 mov			(rvis11 - BranchBaseR), %tmp3
1468dbbd9e4bSmacallan	.align			32
1469dbbd9e4bSmacallanrrepn12:faligndata		%f46, %ftmp1, %f18
1470dbbd9e4bSmacallan	faligndata		%f44, %f46, %f16
1471dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1472dbbd9e4bSmacallan	 mov			(rvis12 - BranchBaseR), %tmp3
1473dbbd9e4bSmacallan	nop
1474dbbd9e4bSmacallan	nop
1475dbbd9e4bSmacallan3:	ba,pt			%xcc, rightstr
1476dbbd9e4bSmacallan	 mov			(rvis2 - BranchBaseR), %tmp3
1477dbbd9e4bSmacallan	.align			32
1478dbbd9e4bSmacallanrrepn13:faligndata		%f62, %ftmp1, %f18
1479dbbd9e4bSmacallan	faligndata		%f60, %f62, %f16
1480dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1481dbbd9e4bSmacallan	 mov			(rvis13 - BranchBaseR), %tmp3
1482dbbd9e4bSmacallan	nop
1483dbbd9e4bSmacallan	nop
1484dbbd9e4bSmacallan4:	ba,pt			%xcc, rightstr
1485dbbd9e4bSmacallan	 mov			(rvis3 - BranchBaseR), %tmp3
1486dbbd9e4bSmacallan	.align			32
1487dbbd9e4bSmacallanrrepn14:faligndata		%f46, %ftmp1, %f16
1488dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1489dbbd9e4bSmacallan	 mov			(rvis14 - BranchBaseR), %tmp3
1490dbbd9e4bSmacallan	nop
1491dbbd9e4bSmacallan1:	faligndata		%f32, %f34, %f16
1492dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1493dbbd9e4bSmacallan	 mov			(rvis0 - BranchBaseR), %tmp3
1494dbbd9e4bSmacallan	.align			32
1495dbbd9e4bSmacallanrrepn15:faligndata		%f62, %ftmp1, %f16
1496dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1497dbbd9e4bSmacallan	 mov			(rvis15 - BranchBaseR), %tmp3
1498dbbd9e4bSmacallan	nop
1499dbbd9e4bSmacallan2:	faligndata		%f48, %f50, %f16
1500dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1501dbbd9e4bSmacallan	 mov			(rvis1 - BranchBaseR), %tmp3
1502dbbd9e4bSmacallan
1503dbbd9e4bSmacallan	.align			64
1504dbbd9e4bSmacallanBranchBaseR:
1505dbbd9e4bSmacallanrrepl0:	RPREPLOAD(f32,rrep0e,rrep0r)
1506dbbd9e4bSmacallan	fnop
1507dbbd9e4bSmacallan	faligndata		%f48, %f50, %f16
1508dbbd9e4bSmacallan	faligndata		%f50, %f52, %f18
1509dbbd9e4bSmacallan	faligndata		%f52, %f54, %f20
1510dbbd9e4bSmacallan	faligndata		%f54, %f56, %f22
1511dbbd9e4bSmacallan	faligndata		%f56, %f58, %f24
1512dbbd9e4bSmacallan	faligndata		%f58, %f60, %f26
1513dbbd9e4bSmacallan	faligndata		%f60, %f62, %f28
1514dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f30
1515dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1516dbbd9e4bSmacallan	 mov			(rvis1 - BranchBaseR), %tmp3
1517dbbd9e4bSmacallan	nop
1518dbbd9e4bSmacallan	nop
1519dbbd9e4bSmacallan	.align			32
1520dbbd9e4bSmacallanrrepl1:	RPREPLOAD(f48,rrep1e,rrep1r)
1521dbbd9e4bSmacallan	fnop
1522dbbd9e4bSmacallan	faligndata		%f32, %f34, %f16
1523dbbd9e4bSmacallan	faligndata		%f34, %f36, %f18
1524dbbd9e4bSmacallan	faligndata		%f36, %f38, %f20
1525dbbd9e4bSmacallan	faligndata		%f38, %f40, %f22
1526dbbd9e4bSmacallan	faligndata		%f40, %f42, %f24
1527dbbd9e4bSmacallan	faligndata		%f42, %f44, %f26
1528dbbd9e4bSmacallan	faligndata		%f44, %f46, %f28
1529dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f30
1530dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1531dbbd9e4bSmacallan	 mov			(rvis0 - BranchBaseR), %tmp3
1532dbbd9e4bSmacallan	nop
1533dbbd9e4bSmacallan	nop
1534dbbd9e4bSmacallan	.align			32
1535dbbd9e4bSmacallanrrepl2:	faligndata		%f32, %f34, %f30
1536dbbd9e4bSmacallan	RPREPLOAD(f32,rrep2e,rrep2r)
1537dbbd9e4bSmacallan	fnop
1538dbbd9e4bSmacallan	fnop
1539dbbd9e4bSmacallan	faligndata		%f50, %f52, %f16
1540dbbd9e4bSmacallan	faligndata		%f52, %f54, %f18
1541dbbd9e4bSmacallan	faligndata		%f54, %f56, %f20
1542dbbd9e4bSmacallan	faligndata		%f56, %f58, %f22
1543dbbd9e4bSmacallan	faligndata		%f58, %f60, %f24
1544dbbd9e4bSmacallan	faligndata		%f60, %f62, %f26
1545dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f28
1546dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1547dbbd9e4bSmacallan	 mov			(rvis3 - BranchBaseR), %tmp3
1548dbbd9e4bSmacallan	nop
1549dbbd9e4bSmacallan	.align			32
1550dbbd9e4bSmacallanrrepl3:	faligndata		%f48, %f50, %f30
1551dbbd9e4bSmacallan	RPREPLOAD(f48,rrep3e,rrep3r)
1552dbbd9e4bSmacallan	fnop
1553dbbd9e4bSmacallan	fnop
1554dbbd9e4bSmacallan	faligndata		%f34, %f36, %f16
1555dbbd9e4bSmacallan	faligndata		%f36, %f38, %f18
1556dbbd9e4bSmacallan	faligndata		%f38, %f40, %f20
1557dbbd9e4bSmacallan	faligndata		%f40, %f42, %f22
1558dbbd9e4bSmacallan	faligndata		%f42, %f44, %f24
1559dbbd9e4bSmacallan	faligndata		%f44, %f46, %f26
1560dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f28
1561dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1562dbbd9e4bSmacallan	 mov			(rvis2 - BranchBaseR), %tmp3
1563dbbd9e4bSmacallan	nop
1564dbbd9e4bSmacallan	.align			32
1565dbbd9e4bSmacallanrrepl4:	faligndata		%f32, %f34, %f28
1566dbbd9e4bSmacallan	faligndata		%f34, %f36, %f30
1567dbbd9e4bSmacallan	RPREPLOAD(f32,rrep4e,rrep4r)
1568dbbd9e4bSmacallan	fnop
1569dbbd9e4bSmacallan	fnop
1570dbbd9e4bSmacallan	fnop
1571dbbd9e4bSmacallan	faligndata		%f52, %f54, %f16
1572dbbd9e4bSmacallan	faligndata		%f54, %f56, %f18
1573dbbd9e4bSmacallan	faligndata		%f56, %f58, %f20
1574dbbd9e4bSmacallan	faligndata		%f58, %f60, %f22
1575dbbd9e4bSmacallan	faligndata		%f60, %f62, %f24
1576dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f26
1577dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1578dbbd9e4bSmacallan	 mov			(rvis5 - BranchBaseR), %tmp3
1579dbbd9e4bSmacallan	.align			32
1580dbbd9e4bSmacallanrrepl5:	faligndata		%f48, %f50, %f28
1581dbbd9e4bSmacallan	faligndata		%f50, %f52, %f30
1582dbbd9e4bSmacallan	RPREPLOAD(f48,rrep5e,rrep5r)
1583dbbd9e4bSmacallan	fnop
1584dbbd9e4bSmacallan	fnop
1585dbbd9e4bSmacallan	fnop
1586dbbd9e4bSmacallan	faligndata		%f36, %f38, %f16
1587dbbd9e4bSmacallan	faligndata		%f38, %f40, %f18
1588dbbd9e4bSmacallan	faligndata		%f40, %f42, %f20
1589dbbd9e4bSmacallan	faligndata		%f42, %f44, %f22
1590dbbd9e4bSmacallan	faligndata		%f44, %f46, %f24
1591dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f26
1592dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1593dbbd9e4bSmacallan	 mov			(rvis4 - BranchBaseR), %tmp3
1594dbbd9e4bSmacallan	.align			32
1595dbbd9e4bSmacallanrrepl6:	faligndata		%f32, %f34, %f26
1596dbbd9e4bSmacallan	faligndata		%f34, %f36, %f28
1597dbbd9e4bSmacallan	faligndata		%f36, %f38, %f30
1598dbbd9e4bSmacallan	RPREPLOAD(f32,rrep6e,rrep6r)
1599dbbd9e4bSmacallan	fnop
1600dbbd9e4bSmacallan	fnop
1601dbbd9e4bSmacallan	fnop
1602dbbd9e4bSmacallan	fnop
1603dbbd9e4bSmacallan	faligndata		%f54, %f56, %f16
1604dbbd9e4bSmacallan	faligndata		%f56, %f58, %f18
1605dbbd9e4bSmacallan	faligndata		%f58, %f60, %f20
1606dbbd9e4bSmacallan	faligndata		%f60, %f62, %f22
1607dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f24
1608dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1609dbbd9e4bSmacallan	 mov			(rvis7 - BranchBaseR), %tmp3
1610dbbd9e4bSmacallan	.align			32
1611dbbd9e4bSmacallanrrepl7:	faligndata		%f48, %f50, %f26
1612dbbd9e4bSmacallan	faligndata		%f50, %f52, %f28
1613dbbd9e4bSmacallan	faligndata		%f52, %f54, %f30
1614dbbd9e4bSmacallan	RPREPLOAD(f48,rrep7e,rrep7r)
1615dbbd9e4bSmacallan	fnop
1616dbbd9e4bSmacallan	fnop
1617dbbd9e4bSmacallan	fnop
1618dbbd9e4bSmacallan	fnop
1619dbbd9e4bSmacallan	faligndata		%f38, %f40, %f16
1620dbbd9e4bSmacallan	faligndata		%f40, %f42, %f18
1621dbbd9e4bSmacallan	faligndata		%f42, %f44, %f20
1622dbbd9e4bSmacallan	faligndata		%f44, %f46, %f22
1623dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f24
1624dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1625dbbd9e4bSmacallan	 mov			(rvis6 - BranchBaseR), %tmp3
1626dbbd9e4bSmacallan	.align			32
1627dbbd9e4bSmacallanrrepl8:	faligndata		%f32, %f34, %f24
1628dbbd9e4bSmacallan	faligndata		%f34, %f36, %f26
1629dbbd9e4bSmacallan	faligndata		%f36, %f38, %f28
1630dbbd9e4bSmacallan	faligndata		%f38, %f40, %f30
1631dbbd9e4bSmacallan	RPREPLOAD(f32,rrep8e,rrep8r)
1632dbbd9e4bSmacallan	fnop
1633dbbd9e4bSmacallan	fnop
1634dbbd9e4bSmacallan	fnop
1635dbbd9e4bSmacallan	fnop
1636dbbd9e4bSmacallan	fnop
1637dbbd9e4bSmacallan	faligndata		%f56, %f58, %f16
1638dbbd9e4bSmacallan	faligndata		%f58, %f60, %f18
1639dbbd9e4bSmacallan	faligndata		%f60, %f62, %f20
1640dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f22
1641dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1642dbbd9e4bSmacallan	 mov			(rvis9 - BranchBaseR), %tmp3
1643dbbd9e4bSmacallan	.align			32
1644dbbd9e4bSmacallanrrepl9:	faligndata		%f48, %f50, %f24
1645dbbd9e4bSmacallan	faligndata		%f50, %f52, %f26
1646dbbd9e4bSmacallan	faligndata		%f52, %f54, %f28
1647dbbd9e4bSmacallan	faligndata		%f54, %f56, %f30
1648dbbd9e4bSmacallan	RPREPLOAD(f48,rrep9e,rrep9r)
1649dbbd9e4bSmacallan	fnop
1650dbbd9e4bSmacallan	fnop
1651dbbd9e4bSmacallan	fnop
1652dbbd9e4bSmacallan	fnop
1653dbbd9e4bSmacallan	fnop
1654dbbd9e4bSmacallan	faligndata		%f40, %f42, %f16
1655dbbd9e4bSmacallan	faligndata		%f42, %f44, %f18
1656dbbd9e4bSmacallan	faligndata		%f44, %f46, %f20
1657dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f22
1658dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1659dbbd9e4bSmacallan	 mov			(rvis8 - BranchBaseR), %tmp3
1660dbbd9e4bSmacallan	.align			32
1661dbbd9e4bSmacallanrrepl10:faligndata		%f32, %f34, %f22
1662dbbd9e4bSmacallan	faligndata		%f34, %f36, %f24
1663dbbd9e4bSmacallan	faligndata		%f36, %f38, %f26
1664dbbd9e4bSmacallan	faligndata		%f38, %f40, %f28
1665dbbd9e4bSmacallan	faligndata		%f40, %f42, %f30
1666dbbd9e4bSmacallan	RPREPLOAD(f32,rrep10e,rrep10r)
1667dbbd9e4bSmacallan	fnop
1668dbbd9e4bSmacallan	fnop
1669dbbd9e4bSmacallan	fnop
1670dbbd9e4bSmacallan	fnop
1671dbbd9e4bSmacallan	fnop
1672dbbd9e4bSmacallan	fnop
1673dbbd9e4bSmacallan	faligndata		%f58, %f60, %f16
1674dbbd9e4bSmacallan	faligndata		%f60, %f62, %f18
1675dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f20
1676dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1677dbbd9e4bSmacallan	 mov			(rvis11 - BranchBaseR), %tmp3
1678dbbd9e4bSmacallan	.align			32
1679dbbd9e4bSmacallanrrepl11:faligndata		%f48, %f50, %f22
1680dbbd9e4bSmacallan	faligndata		%f50, %f52, %f24
1681dbbd9e4bSmacallan	faligndata		%f52, %f54, %f26
1682dbbd9e4bSmacallan	faligndata		%f54, %f56, %f28
1683dbbd9e4bSmacallan	faligndata		%f56, %f58, %f30
1684dbbd9e4bSmacallan	RPREPLOAD(f48,rrep11e,rrep11r)
1685dbbd9e4bSmacallan	fnop
1686dbbd9e4bSmacallan	fnop
1687dbbd9e4bSmacallan	fnop
1688dbbd9e4bSmacallan	fnop
1689dbbd9e4bSmacallan	fnop
1690dbbd9e4bSmacallan	fnop
1691dbbd9e4bSmacallan	faligndata		%f42, %f44, %f16
1692dbbd9e4bSmacallan	faligndata		%f44, %f46, %f18
1693dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f20
1694dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1695dbbd9e4bSmacallan	 mov			(rvis10 - BranchBaseR), %tmp3
1696dbbd9e4bSmacallan	.align			32
1697dbbd9e4bSmacallanrrepl12:faligndata		%f32, %f34, %f20
1698dbbd9e4bSmacallan	faligndata		%f34, %f36, %f22
1699dbbd9e4bSmacallan	faligndata		%f36, %f38, %f24
1700dbbd9e4bSmacallan	faligndata		%f38, %f40, %f26
1701dbbd9e4bSmacallan	faligndata		%f40, %f42, %f28
1702dbbd9e4bSmacallan	faligndata		%f42, %f44, %f30
1703dbbd9e4bSmacallan	RPREPLOAD(f32,rrep12e,rrep12r)
1704dbbd9e4bSmacallan	fnop
1705dbbd9e4bSmacallan	fnop
1706dbbd9e4bSmacallan	fnop
1707dbbd9e4bSmacallan	fnop
1708dbbd9e4bSmacallan	fnop
1709dbbd9e4bSmacallan	fnop
1710dbbd9e4bSmacallan	fnop
1711dbbd9e4bSmacallan	faligndata		%f60, %f62, %f16
1712dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f18
1713dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1714dbbd9e4bSmacallan	 mov			(rvis13 - BranchBaseR), %tmp3
1715dbbd9e4bSmacallan	.align			32
1716dbbd9e4bSmacallanrrepl13:faligndata		%f48, %f50, %f20
1717dbbd9e4bSmacallan	faligndata		%f50, %f52, %f22
1718dbbd9e4bSmacallan	faligndata		%f52, %f54, %f24
1719dbbd9e4bSmacallan	faligndata		%f54, %f56, %f26
1720dbbd9e4bSmacallan	faligndata		%f56, %f58, %f28
1721dbbd9e4bSmacallan	faligndata		%f58, %f60, %f30
1722dbbd9e4bSmacallan	RPREPLOAD(f48,rrep13e,rrep13r)
1723dbbd9e4bSmacallan	fnop
1724dbbd9e4bSmacallan	fnop
1725dbbd9e4bSmacallan	fnop
1726dbbd9e4bSmacallan	fnop
1727dbbd9e4bSmacallan	fnop
1728dbbd9e4bSmacallan	fnop
1729dbbd9e4bSmacallan	fnop
1730dbbd9e4bSmacallan	faligndata		%f44, %f46, %f16
1731dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f18
1732dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1733dbbd9e4bSmacallan	 mov			(rvis12 - BranchBaseR), %tmp3
1734dbbd9e4bSmacallan	.align			32
1735dbbd9e4bSmacallanrrepl14:faligndata		%f32, %f34, %f18
1736dbbd9e4bSmacallan	faligndata		%f34, %f36, %f20
1737dbbd9e4bSmacallan	faligndata		%f36, %f38, %f22
1738dbbd9e4bSmacallan	faligndata		%f38, %f40, %f24
1739dbbd9e4bSmacallan	faligndata		%f40, %f42, %f26
1740dbbd9e4bSmacallan	faligndata		%f42, %f44, %f28
1741dbbd9e4bSmacallan	faligndata		%f44, %f46, %f30
1742dbbd9e4bSmacallan	RPREPLOAD(f32,rrep14e,rrep14r)
1743dbbd9e4bSmacallan	fnop
1744dbbd9e4bSmacallan	fnop
1745dbbd9e4bSmacallan	fnop
1746dbbd9e4bSmacallan	fnop
1747dbbd9e4bSmacallan	fnop
1748dbbd9e4bSmacallan	fnop
1749dbbd9e4bSmacallan	fnop
1750dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f16
1751dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1752dbbd9e4bSmacallan	 mov			(rvis15 - BranchBaseR), %tmp3
1753dbbd9e4bSmacallan	.align			32
1754dbbd9e4bSmacallanrrepl15:faligndata		%f48, %f50, %f18
1755dbbd9e4bSmacallan	faligndata		%f50, %f52, %f20
1756dbbd9e4bSmacallan	faligndata		%f52, %f54, %f22
1757dbbd9e4bSmacallan	faligndata		%f54, %f56, %f24
1758dbbd9e4bSmacallan	faligndata		%f56, %f58, %f26
1759dbbd9e4bSmacallan	faligndata		%f58, %f60, %f28
1760dbbd9e4bSmacallan	faligndata		%f60, %f62, %f30
1761dbbd9e4bSmacallan	RPREPLOAD(f48,rrep15e,rrep15r)
1762dbbd9e4bSmacallan	fnop
1763dbbd9e4bSmacallan	fnop
1764dbbd9e4bSmacallan	fnop
1765dbbd9e4bSmacallan	fnop
1766dbbd9e4bSmacallan	fnop
1767dbbd9e4bSmacallan	fnop
1768dbbd9e4bSmacallan	fnop
1769dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f16
1770dbbd9e4bSmacallan	ba,pt			%xcc, rightstr
1771dbbd9e4bSmacallan	 mov			(rvis14 - BranchBaseR), %tmp3
1772dbbd9e4bSmacallan
1773dbbd9e4bSmacallan	.align			64
1774dbbd9e4bSmacallanrvis0:	nop
1775dbbd9e4bSmacallan	RLOAD(f32,rvis0e,rvis0r)
1776dbbd9e4bSmacallan	fnop
1777dbbd9e4bSmacallan	faligndata		%f48, %f50, %f16
1778dbbd9e4bSmacallan	faligndata		%f50, %f52, %f18
1779dbbd9e4bSmacallan	faligndata		%f52, %f54, %f20
1780dbbd9e4bSmacallan	faligndata		%f54, %f56, %f22
1781dbbd9e4bSmacallan	faligndata		%f56, %f58, %f24
1782dbbd9e4bSmacallan	faligndata		%f58, %f60, %f26
1783dbbd9e4bSmacallan	faligndata		%f60, %f62, %f28
1784dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f30
1785dbbd9e4bSmacallan	STORE
1786dbbd9e4bSmacallan	sub			%dst, 64, %dst
1787dbbd9e4bSmacallanrvis1:	RLOAD(f48,rvis1e,rvis1r)
1788dbbd9e4bSmacallan	fnop
1789dbbd9e4bSmacallan	faligndata		%f32, %f34, %f16
1790dbbd9e4bSmacallan	faligndata		%f34, %f36, %f18
1791dbbd9e4bSmacallan	faligndata		%f36, %f38, %f20
1792dbbd9e4bSmacallan	faligndata		%f38, %f40, %f22
1793dbbd9e4bSmacallan	faligndata		%f40, %f42, %f24
1794dbbd9e4bSmacallan	faligndata		%f42, %f44, %f26
1795dbbd9e4bSmacallan	faligndata		%f44, %f46, %f28
1796dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f30
1797dbbd9e4bSmacallan	STORE
1798dbbd9e4bSmacallan	ba,pt			%xcc, rvis0+0x04
1799dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1800dbbd9e4bSmacallan	.align			64
1801dbbd9e4bSmacallanrvis2:	faligndata		%f32, %f34, %ftmp8
1802dbbd9e4bSmacallan	RLOAD(f32,rvis2e,rvis2r)
1803dbbd9e4bSmacallan	fnop
1804dbbd9e4bSmacallan	fnop
1805dbbd9e4bSmacallan	faligndata		%f50, %f52, %f16
1806dbbd9e4bSmacallan	faligndata		%f52, %f54, %f18
1807dbbd9e4bSmacallan	faligndata		%f54, %f56, %f20
1808dbbd9e4bSmacallan	faligndata		%f56, %f58, %f22
1809dbbd9e4bSmacallan	faligndata		%f58, %f60, %f24
1810dbbd9e4bSmacallan	faligndata		%f60, %f62, %f26
1811dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f28
1812dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1813dbbd9e4bSmacallan	STORE
1814dbbd9e4bSmacallan	sub			%dst, 64, %dst
1815dbbd9e4bSmacallanrvis3:	faligndata		%f48, %f50, %ftmp8
1816dbbd9e4bSmacallan	RLOAD(f48,rvis3e,rvis3r)
1817dbbd9e4bSmacallan	fnop
1818dbbd9e4bSmacallan	fnop
1819dbbd9e4bSmacallan	faligndata		%f34, %f36, %f16
1820dbbd9e4bSmacallan	faligndata		%f36, %f38, %f18
1821dbbd9e4bSmacallan	faligndata		%f38, %f40, %f20
1822dbbd9e4bSmacallan	faligndata		%f40, %f42, %f22
1823dbbd9e4bSmacallan	faligndata		%f42, %f44, %f24
1824dbbd9e4bSmacallan	faligndata		%f44, %f46, %f26
1825dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f28
1826dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1827dbbd9e4bSmacallan	STORE
1828dbbd9e4bSmacallan	ba,pt			%xcc, rvis2
1829dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1830dbbd9e4bSmacallan	.align			64
1831dbbd9e4bSmacallanrvis4:	faligndata		%f32, %f34, %ftmp7
1832dbbd9e4bSmacallan	faligndata		%f34, %f36, %ftmp8
1833dbbd9e4bSmacallan	RLOAD(f32,rvis4e,rvis4r)
1834dbbd9e4bSmacallan	fnop
1835dbbd9e4bSmacallan	fnop
1836dbbd9e4bSmacallan	fnop
1837dbbd9e4bSmacallan	faligndata		%f52, %f54, %f16
1838dbbd9e4bSmacallan	faligndata		%f54, %f56, %f18
1839dbbd9e4bSmacallan	faligndata		%f56, %f58, %f20
1840dbbd9e4bSmacallan	faligndata		%f58, %f60, %f22
1841dbbd9e4bSmacallan	faligndata		%f60, %f62, %f24
1842dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f26
1843dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1844dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1845dbbd9e4bSmacallan	STORE
1846dbbd9e4bSmacallan	sub			%dst, 64, %dst
1847dbbd9e4bSmacallanrvis5:	faligndata		%f48, %f50, %ftmp7
1848dbbd9e4bSmacallan	faligndata		%f50, %f52, %ftmp8
1849dbbd9e4bSmacallan	RLOAD(f48,rvis5e,rvis5r)
1850dbbd9e4bSmacallan	fnop
1851dbbd9e4bSmacallan	fnop
1852dbbd9e4bSmacallan	fnop
1853dbbd9e4bSmacallan	faligndata		%f36, %f38, %f16
1854dbbd9e4bSmacallan	faligndata		%f38, %f40, %f18
1855dbbd9e4bSmacallan	faligndata		%f40, %f42, %f20
1856dbbd9e4bSmacallan	faligndata		%f42, %f44, %f22
1857dbbd9e4bSmacallan	faligndata		%f44, %f46, %f24
1858dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f26
1859dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1860dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1861dbbd9e4bSmacallan	STORE
1862dbbd9e4bSmacallan	ba,pt			%xcc, rvis4
1863dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1864dbbd9e4bSmacallan	.align			64
1865dbbd9e4bSmacallanrvis6:	faligndata		%f32, %f34, %ftmp6
1866dbbd9e4bSmacallan	faligndata		%f34, %f36, %ftmp7
1867dbbd9e4bSmacallan	faligndata		%f36, %f38, %ftmp8
1868dbbd9e4bSmacallan	RLOAD(f32,rvis6e,rvis6r)
1869dbbd9e4bSmacallan	fnop
1870dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1871dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1872dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1873dbbd9e4bSmacallan	faligndata		%f54, %f56, %f16
1874dbbd9e4bSmacallan	faligndata		%f56, %f58, %f18
1875dbbd9e4bSmacallan	faligndata		%f58, %f60, %f20
1876dbbd9e4bSmacallan	faligndata		%f60, %f62, %f22
1877dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f24
1878dbbd9e4bSmacallan	STORE
1879dbbd9e4bSmacallan	sub			%dst, 64, %dst
1880dbbd9e4bSmacallanrvis7:	faligndata		%f48, %f50, %ftmp6
1881dbbd9e4bSmacallan	faligndata		%f50, %f52, %ftmp7
1882dbbd9e4bSmacallan	faligndata		%f52, %f54, %ftmp8
1883dbbd9e4bSmacallan	RLOAD(f48,rvis7e,rvis7r)
1884dbbd9e4bSmacallan	fnop
1885dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1886dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1887dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1888dbbd9e4bSmacallan	faligndata		%f38, %f40, %f16
1889dbbd9e4bSmacallan	faligndata		%f40, %f42, %f18
1890dbbd9e4bSmacallan	faligndata		%f42, %f44, %f20
1891dbbd9e4bSmacallan	faligndata		%f44, %f46, %f22
1892dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f24
1893dbbd9e4bSmacallan	STORE
1894dbbd9e4bSmacallan	ba,pt			%xcc, rvis6
1895dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1896dbbd9e4bSmacallan	.align			64
1897dbbd9e4bSmacallanrvis8:	faligndata		%f32, %f34, %ftmp5
1898dbbd9e4bSmacallan	faligndata		%f34, %f36, %ftmp6
1899dbbd9e4bSmacallan	faligndata		%f36, %f38, %ftmp7
1900dbbd9e4bSmacallan	faligndata		%f38, %f40, %ftmp8
1901dbbd9e4bSmacallan	RLOAD(f32,rvis8e,rvis8r)
1902dbbd9e4bSmacallan	fnop
1903dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1904dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1905dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1906dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1907dbbd9e4bSmacallan	faligndata		%f56, %f58, %f16
1908dbbd9e4bSmacallan	faligndata		%f58, %f60, %f18
1909dbbd9e4bSmacallan	faligndata		%f60, %f62, %f20
1910dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f22
1911dbbd9e4bSmacallan	STORE
1912dbbd9e4bSmacallan	sub			%dst, 64, %dst
1913dbbd9e4bSmacallanrvis9:	faligndata		%f48, %f50, %ftmp5
1914dbbd9e4bSmacallan	faligndata		%f50, %f52, %ftmp6
1915dbbd9e4bSmacallan	faligndata		%f52, %f54, %ftmp7
1916dbbd9e4bSmacallan	faligndata		%f54, %f56, %ftmp8
1917dbbd9e4bSmacallan	RLOAD(f48,rvis9e,rvis9r)
1918dbbd9e4bSmacallan	fnop
1919dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1920dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1921dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1922dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1923dbbd9e4bSmacallan	faligndata		%f40, %f42, %f16
1924dbbd9e4bSmacallan	faligndata		%f42, %f44, %f18
1925dbbd9e4bSmacallan	faligndata		%f44, %f46, %f20
1926dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f22
1927dbbd9e4bSmacallan	STORE
1928dbbd9e4bSmacallan	ba,pt			%xcc, rvis8
1929dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1930dbbd9e4bSmacallan	.align			64
1931dbbd9e4bSmacallanrvis10:	faligndata		%f32, %f34, %ftmp4
1932dbbd9e4bSmacallan	faligndata		%f34, %f36, %ftmp5
1933dbbd9e4bSmacallan	faligndata		%f36, %f38, %ftmp6
1934dbbd9e4bSmacallan	faligndata		%f38, %f40, %ftmp7
1935dbbd9e4bSmacallan	faligndata		%f40, %f42, %ftmp8
1936dbbd9e4bSmacallan	RLOAD(f32,rvis10e,rvis10r)
1937dbbd9e4bSmacallan	fnop
1938dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
1939dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1940dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1941dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1942dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1943dbbd9e4bSmacallan	faligndata		%f58, %f60, %f16
1944dbbd9e4bSmacallan	faligndata		%f60, %f62, %f18
1945dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f20
1946dbbd9e4bSmacallan	STORE
1947dbbd9e4bSmacallan	sub			%dst, 64, %dst
1948dbbd9e4bSmacallanrvis11:	faligndata		%f48, %f50, %ftmp4
1949dbbd9e4bSmacallan	faligndata		%f50, %f52, %ftmp5
1950dbbd9e4bSmacallan	faligndata		%f52, %f54, %ftmp6
1951dbbd9e4bSmacallan	faligndata		%f54, %f56, %ftmp7
1952dbbd9e4bSmacallan	faligndata		%f56, %f58, %ftmp8
1953dbbd9e4bSmacallan	RLOAD(f48,rvis11e,rvis11r)
1954dbbd9e4bSmacallan	fnop
1955dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
1956dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1957dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1958dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1959dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1960dbbd9e4bSmacallan	faligndata		%f42, %f44, %f16
1961dbbd9e4bSmacallan	faligndata		%f44, %f46, %f18
1962dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f20
1963dbbd9e4bSmacallan	STORE
1964dbbd9e4bSmacallan	ba,pt			%xcc, rvis10
1965dbbd9e4bSmacallan	 sub			%dst, 64, %dst
1966dbbd9e4bSmacallan	.align			64
1967dbbd9e4bSmacallanrvis12:	faligndata		%f32, %f34, %ftmp3
1968dbbd9e4bSmacallan	faligndata		%f34, %f36, %ftmp4
1969dbbd9e4bSmacallan	faligndata		%f36, %f38, %ftmp5
1970dbbd9e4bSmacallan	faligndata		%f38, %f40, %ftmp6
1971dbbd9e4bSmacallan	faligndata		%f40, %f42, %ftmp7
1972dbbd9e4bSmacallan	faligndata		%f42, %f44, %ftmp8
1973dbbd9e4bSmacallan	RLOAD(f32,rvis12e,rvis12r)
1974dbbd9e4bSmacallan	fnop
1975dbbd9e4bSmacallan	fmovd			%ftmp3, %f20
1976dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
1977dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1978dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1979dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1980dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1981dbbd9e4bSmacallan	faligndata		%f60, %f62, %f16
1982dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f18
1983dbbd9e4bSmacallan	STORE
1984dbbd9e4bSmacallan	sub			%dst, 64, %dst
1985dbbd9e4bSmacallanrvis13:	faligndata		%f48, %f50, %ftmp3
1986dbbd9e4bSmacallan	faligndata		%f50, %f52, %ftmp4
1987dbbd9e4bSmacallan	faligndata		%f52, %f54, %ftmp5
1988dbbd9e4bSmacallan	faligndata		%f54, %f56, %ftmp6
1989dbbd9e4bSmacallan	faligndata		%f56, %f58, %ftmp7
1990dbbd9e4bSmacallan	faligndata		%f58, %f60, %ftmp8
1991dbbd9e4bSmacallan	RLOAD(f48,rvis13e,rvis13r)
1992dbbd9e4bSmacallan	fnop
1993dbbd9e4bSmacallan	fmovd			%ftmp3, %f20
1994dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
1995dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
1996dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
1997dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
1998dbbd9e4bSmacallan	fmovd			%ftmp8, %f30
1999dbbd9e4bSmacallan	faligndata		%f44, %f46, %f16
2000dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f18
2001dbbd9e4bSmacallan	STORE
2002dbbd9e4bSmacallan	ba,pt			%xcc, rvis12
2003dbbd9e4bSmacallan	 sub			%dst, 64, %dst
2004dbbd9e4bSmacallan	.align			64
2005dbbd9e4bSmacallanrvis14:	fnop
2006dbbd9e4bSmacallan	faligndata		%f32, %f34, %f18
2007dbbd9e4bSmacallan	faligndata		%f34, %f36, %f20
2008dbbd9e4bSmacallan	faligndata		%f36, %f38, %f22
2009dbbd9e4bSmacallan	faligndata		%f38, %f40, %f24
2010dbbd9e4bSmacallan	faligndata		%f40, %f42, %f26
2011dbbd9e4bSmacallan	faligndata		%f42, %f44, %f28
2012dbbd9e4bSmacallan	faligndata		%f44, %f46, %f30
2013dbbd9e4bSmacallan	RLOAD(f32,rvis14e,rvis14r)
2014dbbd9e4bSmacallan	fnop
2015dbbd9e4bSmacallan	fnop
2016dbbd9e4bSmacallan	fnop
2017dbbd9e4bSmacallan	fnop
2018dbbd9e4bSmacallan	fnop
2019dbbd9e4bSmacallan	fnop
2020dbbd9e4bSmacallan	fnop
2021dbbd9e4bSmacallan	faligndata		%f62, %ftmp1, %f16
2022dbbd9e4bSmacallan	STORE
2023dbbd9e4bSmacallan	sub			%dst, 64, %dst
2024dbbd9e4bSmacallanrvis15:	fnop
2025dbbd9e4bSmacallan	faligndata		%f48, %f50, %f18
2026dbbd9e4bSmacallan	faligndata		%f50, %f52, %f20
2027dbbd9e4bSmacallan	faligndata		%f52, %f54, %f22
2028dbbd9e4bSmacallan	faligndata		%f54, %f56, %f24
2029dbbd9e4bSmacallan	faligndata		%f56, %f58, %f26
2030dbbd9e4bSmacallan	faligndata		%f58, %f60, %f28
2031dbbd9e4bSmacallan	faligndata		%f60, %f62, %f30
2032dbbd9e4bSmacallan	RLOAD(f48,rvis15e,rvis15r)
2033dbbd9e4bSmacallan	fnop
2034dbbd9e4bSmacallan	fnop
2035dbbd9e4bSmacallan	fnop
2036dbbd9e4bSmacallan	fnop
2037dbbd9e4bSmacallan	fnop
2038dbbd9e4bSmacallan	fnop
2039dbbd9e4bSmacallan	fnop
2040dbbd9e4bSmacallan	faligndata		%f46, %ftmp1, %f16
2041dbbd9e4bSmacallan	STORE
2042dbbd9e4bSmacallan	ba,pt			%xcc, rvis14
2043dbbd9e4bSmacallan	 sub			%dst, 64, %dst
2044dbbd9e4bSmacallan
2045dbbd9e4bSmacallanrightstr:
2046dbbd9e4bSmacallan	brlz,pn			%curw, narrowstr + 4
2047dbbd9e4bSmacallan	 cmp			%rightw, 64
2048dbbd9e4bSmacallan	be,pn			%icc, 2f
2049dbbd9e4bSmacallan	 and			%rightw, 0x38, %tmp1
2050dbbd9e4bSmacallan	andcc			%rightw, 7, %g0
2051dbbd9e4bSmacallan	be,pn			%icc, 1f
2052dbbd9e4bSmacallan	 neg			%tmp1
2053dbbd9e4bSmacallan	sub			%g0, %rightw, %tmp5
2054dbbd9e4bSmacallan	add			%branchbase, (3f - BranchBaseR - 8), %tmp2
2055dbbd9e4bSmacallan	edge8l			%tmp5, %g0, %tmp5
2056dbbd9e4bSmacallan	sub			%dst, %tmp1, %tmp4
2057dbbd9e4bSmacallan	jmpl			%tmp1 + %tmp2, %g0
2058dbbd9e4bSmacallan	 add			%tmp4, 0x40, %tmp4
2059dbbd9e4bSmacallan1:	addcc			%branchbase, (4f - BranchBaseR), %tmp2
2060dbbd9e4bSmacallan	sra			%tmp1, 1, %tmp1
2061dbbd9e4bSmacallan	jmpl			%tmp1 + %tmp2, %g0
2062dbbd9e4bSmacallan2:	 add			%dst, 0x40, %tmp1
2063dbbd9e4bSmacallan	jmpl			%branchbase + %tmp3, %g0
2064dbbd9e4bSmacallan	 stda			%f16, [%tmp1] ASI_BLK_P
2065dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x00
2066dbbd9e4bSmacallan	 stda			%f30, [%tmp4 + %tmp5] ASI_PST8_P
2067dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x04
2068dbbd9e4bSmacallan	 stda			%f28, [%tmp4 + %tmp5] ASI_PST8_P
2069dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x08
2070dbbd9e4bSmacallan	 stda			%f26, [%tmp4 + %tmp5] ASI_PST8_P
2071dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x0c
2072dbbd9e4bSmacallan	 stda			%f24, [%tmp4 + %tmp5] ASI_PST8_P
2073dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x10
2074dbbd9e4bSmacallan	 stda			%f22, [%tmp4 + %tmp5] ASI_PST8_P
2075dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x14
2076dbbd9e4bSmacallan	 stda			%f20, [%tmp4 + %tmp5] ASI_PST8_P
2077dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x18
2078dbbd9e4bSmacallan	 stda			%f18, [%tmp4 + %tmp5] ASI_PST8_P
2079dbbd9e4bSmacallan	jmpl			%branchbase + %tmp3, %g0
2080dbbd9e4bSmacallan	 stda			%f16, [%tmp4 + %tmp5] ASI_PST8_P
2081dbbd9e4bSmacallan3:	std			%f28, [%dst + 0x70]
2082dbbd9e4bSmacallan	std			%f26, [%dst + 0x68]
2083dbbd9e4bSmacallan	std			%f24, [%dst + 0x60]
2084dbbd9e4bSmacallan	std			%f22, [%dst + 0x58]
2085dbbd9e4bSmacallan	std			%f20, [%dst + 0x50]
2086dbbd9e4bSmacallan	std			%f18, [%dst + 0x48]
2087dbbd9e4bSmacallan	std			%f16, [%dst + 0x40]
2088dbbd9e4bSmacallan4:	jmpl			%branchbase + %tmp3, %g0
2089dbbd9e4bSmacallan	 nop
2090dbbd9e4bSmacallan
2091dbbd9e4bSmacallanleftstr:cmp			%leftw, 64
2092dbbd9e4bSmacallan	be,pn			%icc, 2f
2093dbbd9e4bSmacallan	 and			%leftw, 0x38, %tmp1
2094dbbd9e4bSmacallan	andcc			%leftw, 7, %g0
2095dbbd9e4bSmacallan	be,pn			%icc, 1f
2096dbbd9e4bSmacallan	 neg			%tmp1
2097dbbd9e4bSmacallan	sub			%dst, %leftw, %tmp4
2098dbbd9e4bSmacallan	add			%branchbase, (3f - BranchBaseR - 8), %tmp2
2099dbbd9e4bSmacallan	edge8			%tmp4, %g0, %tmp5
2100dbbd9e4bSmacallan	andn			%tmp4, 7, %tmp4
2101dbbd9e4bSmacallan	add			%mode, %skind, %mode
2102dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
2103dbbd9e4bSmacallan	 add			%tmp4, 0x40, %tmp4
2104dbbd9e4bSmacallan1:	add			%branchbase, (4f - BranchBaseR), %tmp2
2105dbbd9e4bSmacallan	sra			%tmp1, 1, %tmp1
2106dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
2107dbbd9e4bSmacallan2:	 add			%mode, %skind, %mode
2108dbbd9e4bSmacallan	ba,pt			%xcc, 4f
2109dbbd9e4bSmacallan	 stda			%f16, [%dst] ASI_BLK_P
2110dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x00
2111dbbd9e4bSmacallan	 stda			%f16, [%tmp4 + %tmp5] ASI_PST8_P
2112dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x04
2113dbbd9e4bSmacallan	 stda			%f18, [%tmp4 + %tmp5] ASI_PST8_P
2114dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x08
2115dbbd9e4bSmacallan	 stda			%f20, [%tmp4 + %tmp5] ASI_PST8_P
2116dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x0c
2117dbbd9e4bSmacallan	 stda			%f22, [%tmp4 + %tmp5] ASI_PST8_P
2118dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x10
2119dbbd9e4bSmacallan	 stda			%f24, [%tmp4 + %tmp5] ASI_PST8_P
2120dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x14
2121dbbd9e4bSmacallan	 stda			%f26, [%tmp4 + %tmp5] ASI_PST8_P
2122dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x18
2123dbbd9e4bSmacallan	 stda			%f28, [%tmp4 + %tmp5] ASI_PST8_P
2124dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x1c
2125dbbd9e4bSmacallan	 stda			%f30, [%tmp4 + %tmp5] ASI_PST8_P
2126dbbd9e4bSmacallan3:	std			%f18, [%dst + 0x08]
2127dbbd9e4bSmacallan	std			%f20, [%dst + 0x10]
2128dbbd9e4bSmacallan	std			%f22, [%dst + 0x18]
2129dbbd9e4bSmacallan	std			%f24, [%dst + 0x20]
2130dbbd9e4bSmacallan	std			%f26, [%dst + 0x28]
2131dbbd9e4bSmacallan	std			%f28, [%dst + 0x30]
2132dbbd9e4bSmacallan	std			%f30, [%dst + 0x38]
2133dbbd9e4bSmacallan4:	brnz,pn			%narrow, rnarrow_cont
2134dbbd9e4bSmacallan	 deccc			%srcnotdone
2135dbbd9e4bSmacallan	blu,pn			%ICC, rreturn
2136dbbd9e4bSmacallan	 sub			%mode, %dkind, %mode
2137dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0
2138dbbd9e4bSmacallan	and			%mode, 63, %mode
2139dbbd9e4bSmacallan	cmp			%srcnext, %srcn
2140dbbd9e4bSmacallan	bne,a,pn		%ICC, 6f
2141dbbd9e4bSmacallan	 xor			%fregset, 64, %fregset
2142dbbd9e4bSmacallan	brnz,a,pn		%fregset, 5f
2143dbbd9e4bSmacallan	 ldda			[%src] ASI_BLK_P, %f32
2144dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48
2145dbbd9e4bSmacallan5:	sub			%src, 64, %src
2146dbbd9e4bSmacallan6:	add			%dst, %dkind, %dst
2147dbbd9e4bSmacallan	sub			%w, %leftw, %tmp1
2148dbbd9e4bSmacallan	add			%srcstart, %skind, %srcstart
2149dbbd9e4bSmacallan	add			%dst, %tmp1, %dst
2150dbbd9e4bSmacallan	add			%rightw, %dkind, %rightw
2151dbbd9e4bSmacallan	add			%dst, 63, %dst
2152dbbd9e4bSmacallan	sub			%rightw, 1, %rightw
2153dbbd9e4bSmacallan	sub			%dkind, %leftw, %leftw
2154dbbd9e4bSmacallan	and			%rightw, 63, %rightw
2155dbbd9e4bSmacallan	and			%leftw, 63, %leftw
2156dbbd9e4bSmacallan	add			%rightw, 1, %rightw
2157dbbd9e4bSmacallan	mov			64, %tmp1
2158dbbd9e4bSmacallan	andn			%dst, 63, %dst
2159dbbd9e4bSmacallan	sub			%tmp1, %leftw, %leftw
2160dbbd9e4bSmacallanrroll_wide:
2161dbbd9e4bSmacallan	add			%srcstart, %skind, %srcnext
2162dbbd9e4bSmacallan	sub			%srcstart, %w, %srcend
2163dbbd9e4bSmacallan	fnop
2164dbbd9e4bSmacallan	deccc			%srcnext
2165dbbd9e4bSmacallan	sll			%mode, 4, %tmp4
2166dbbd9e4bSmacallan	fnop
2167dbbd9e4bSmacallan	andn			%srcnext, 63, %srcnext
2168dbbd9e4bSmacallan	andn			%srcend, 63, %srcend
2169dbbd9e4bSmacallan	fnop
2170dbbd9e4bSmacallan	mov			%srcnext, %srcn
2171dbbd9e4bSmacallan	and			%mode, 7, %leftw2
2172dbbd9e4bSmacallan	fnop
2173dbbd9e4bSmacallan	andn			%tmp4, 127, %tmp4
2174dbbd9e4bSmacallan	sub			%leftw, %leftw2, %leftw2
2175dbbd9e4bSmacallan	fnop
2176dbbd9e4bSmacallan	add			%tmp4, %fregset, %tmp4
2177dbbd9e4bSmacallan	subcc			%w, %leftw, %curw
2178dbbd9e4bSmacallan	fnop
2179dbbd9e4bSmacallan	srl			%tmp4, 1, %tmp3
2180dbbd9e4bSmacallan	sub			%tmp1, %mode, %tmp1
2181dbbd9e4bSmacallan	fnop
2182dbbd9e4bSmacallan	add			%tmp3, (rrepn0 - BranchBaseR), %tmp2
2183dbbd9e4bSmacallan	cmp			%tmp1, %rightw
2184dbbd9e4bSmacallan	add			%tmp4, %tmp3, %tmp4
2185dbbd9e4bSmacallan	sub			%srcnext, 64, %tmp1
2186dbbd9e4bSmacallan	movgeu			%icc, %tmp2, %tmp4
2187dbbd9e4bSmacallan	cmp			%srcnext, %srcend
2188dbbd9e4bSmacallan	sub			%dst, 64, %dst
2189dbbd9e4bSmacallan	move			%ICC, %tmp1, %srcnext
2190dbbd9e4bSmacallan	jmpl			%branchbase + %tmp4, %g0
2191dbbd9e4bSmacallan	 sub			%curw, %rightw, %curw
2192dbbd9e4bSmacallan
2193dbbd9e4bSmacallanrreturn:return			%i7+8
2194731e20d8Smacallan#if defined(__NetBSD__)
2195731e20d8Smacallan	 wr			%g0, 4, %fprs
2196731e20d8Smacallan#else
2197dbbd9e4bSmacallan	 wr			%g0, 0, %fprs
2198731e20d8Smacallan#endif
2199dbbd9e4bSmacallan
2200dbbd9e4bSmacallannarrowstr:
2201dbbd9e4bSmacallan	cmp			%rightw, 64
2202dbbd9e4bSmacallan	add			%dst, 0x80, %fregset
2203dbbd9e4bSmacallan	and			%rightw, 0x38, %tmp1
2204dbbd9e4bSmacallan	sub			%fregset, %leftw, %tmp2
2205dbbd9e4bSmacallan	mov			%w, %tmp4
2206dbbd9e4bSmacallan	andn			%tmp2, 7, %fregset
2207dbbd9e4bSmacallan	be,pn			%icc, 2f
2208dbbd9e4bSmacallan	 edge8			%tmp2, %g0, %srcend
2209dbbd9e4bSmacallan	andcc			%rightw, 7, %g0
2210dbbd9e4bSmacallan	be,pn			%icc, 1f
2211dbbd9e4bSmacallan	 neg			%tmp1
2212dbbd9e4bSmacallan	sub			%g0, %rightw, %tmp4
2213dbbd9e4bSmacallan	add			%tmp1, (2f - BranchBaseR - 8), %tmp1
2214dbbd9e4bSmacallan	add			%dst, 0x40, %tmp5
2215dbbd9e4bSmacallan	sub			%leftw, 0x41, %tmp2
2216dbbd9e4bSmacallan	add			%tmp5, %rightw, %tmp5
2217dbbd9e4bSmacallan	edge8l			%tmp4, %tmp2, %tmp2
2218dbbd9e4bSmacallan	and			%rightw, 7, %tmp4
2219dbbd9e4bSmacallan	andn			%tmp5, 7, %tmp5
2220dbbd9e4bSmacallan	jmpl			%branchbase + %tmp1, %g0
2221dbbd9e4bSmacallan	 sub			%w, %tmp4, %tmp4
2222dbbd9e4bSmacallan1:	addcc			%branchbase, (4f - BranchBaseR - 8), %tmp2
2223dbbd9e4bSmacallan	sll			%tmp1, 1, %tmp1
2224dbbd9e4bSmacallan	jmpl			%tmp2 + %tmp1, %g0
2225dbbd9e4bSmacallan	 nop
2226dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x00
2227dbbd9e4bSmacallan	 stda			%f30, [%tmp5 + %tmp2] ASI_PST8_P
2228dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x10
2229dbbd9e4bSmacallan	 stda			%f28, [%tmp5 + %tmp2] ASI_PST8_P
2230dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x20
2231dbbd9e4bSmacallan	 stda			%f26, [%tmp5 + %tmp2] ASI_PST8_P
2232dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x30
2233dbbd9e4bSmacallan	 stda			%f24, [%tmp5 + %tmp2] ASI_PST8_P
2234dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x40
2235dbbd9e4bSmacallan	 stda			%f22, [%tmp5 + %tmp2] ASI_PST8_P
2236dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x50
2237dbbd9e4bSmacallan	 stda			%f20, [%tmp5 + %tmp2] ASI_PST8_P
2238dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x60
2239dbbd9e4bSmacallan	 stda			%f18, [%tmp5 + %tmp2] ASI_PST8_P
2240dbbd9e4bSmacallan	ba,pt			%xcc, 3f+0x70
2241dbbd9e4bSmacallan	 stda			%f16, [%tmp5 + %tmp2] ASI_PST8_P
2242dbbd9e4bSmacallan2:	subcc			%tmp4, 8, %tmp4
2243dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x00
2244dbbd9e4bSmacallan	 fnop
2245dbbd9e4bSmacallan	std			%f30, [%dst + 0x78]
2246dbbd9e4bSmacallan3:	subcc			%tmp4, 8, %tmp4
2247dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x10
2248dbbd9e4bSmacallan	 fnop
2249dbbd9e4bSmacallan	std			%f28, [%dst + 0x70]
2250dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2251dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x20
2252dbbd9e4bSmacallan	 fnop
2253dbbd9e4bSmacallan	std			%f26, [%dst + 0x68]
2254dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2255dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x30
2256dbbd9e4bSmacallan	 fnop
2257dbbd9e4bSmacallan	std			%f24, [%dst + 0x60]
2258dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2259dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x40
2260dbbd9e4bSmacallan	 fnop
2261dbbd9e4bSmacallan	std			%f22, [%dst + 0x58]
2262dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2263dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x50
2264dbbd9e4bSmacallan	 fnop
2265dbbd9e4bSmacallan	std			%f20, [%dst + 0x50]
2266dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2267dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x60
2268dbbd9e4bSmacallan	 fnop
2269dbbd9e4bSmacallan	std			%f18, [%dst + 0x48]
2270dbbd9e4bSmacallan	subcc			%tmp4, 8, %tmp4
2271dbbd9e4bSmacallan	bl,pn			%icc, 4f+0x70
2272dbbd9e4bSmacallan	 fnop
2273dbbd9e4bSmacallan	std			%f16, [%dst + 0x40]
2274dbbd9e4bSmacallan	ba,pt			%xcc, 5f
2275dbbd9e4bSmacallan	 nop
2276dbbd9e4bSmacallan4:	cmp			%tmp4, -8
2277dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2278dbbd9e4bSmacallan	 stda			%f30, [%fregset + %srcend] ASI_PST8_P
2279dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2280dbbd9e4bSmacallan	cmp			%tmp4, -8
2281dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2282dbbd9e4bSmacallan	 stda			%f28, [%fregset + %srcend] ASI_PST8_P
2283dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2284dbbd9e4bSmacallan	cmp			%tmp4, -8
2285dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2286dbbd9e4bSmacallan	 stda			%f26, [%fregset + %srcend] ASI_PST8_P
2287dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2288dbbd9e4bSmacallan	cmp			%tmp4, -8
2289dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2290dbbd9e4bSmacallan	 stda			%f24, [%fregset + %srcend] ASI_PST8_P
2291dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2292dbbd9e4bSmacallan	cmp			%tmp4, -8
2293dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2294dbbd9e4bSmacallan	 stda			%f22, [%fregset + %srcend] ASI_PST8_P
2295dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2296dbbd9e4bSmacallan	cmp			%tmp4, -8
2297dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2298dbbd9e4bSmacallan	 stda			%f20, [%fregset + %srcend] ASI_PST8_P
2299dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2300dbbd9e4bSmacallan	cmp			%tmp4, -8
2301dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2302dbbd9e4bSmacallan	 stda			%f18, [%fregset + %srcend] ASI_PST8_P
2303dbbd9e4bSmacallan	ba,a,pt			%xcc, 5f
2304dbbd9e4bSmacallan	cmp			%tmp4, -8
2305dbbd9e4bSmacallan	bg,a,pn			%ICC, 5f
2306dbbd9e4bSmacallan	 stda			%f16, [%fregset + %srcend] ASI_PST8_P
2307dbbd9e4bSmacallan5:	and			%tmp3, 60, %tmp3
2308dbbd9e4bSmacallan	add			%dst, 64, %dst
2309dbbd9e4bSmacallan	xor			%tmp3, 60, %tmp3
2310dbbd9e4bSmacallan	add			%mode, %skind, %mode
2311dbbd9e4bSmacallan	add			%tmp3, 4, %tmp3
2312dbbd9e4bSmacallan	brz,pn			%h, rreturn
2313dbbd9e4bSmacallan	 and			%tmp3, 64, %fregset
2314dbbd9e4bSmacallanrnarrow_cont:
2315dbbd9e4bSmacallan	deccc			%h
2316dbbd9e4bSmacallan	blu,pn			%ICC, rreturn
2317dbbd9e4bSmacallan	 sub			%mode, %dkind, %mode
2318dbbd9e4bSmacallan	alignaddr		%mode, %g0, %g0
2319dbbd9e4bSmacallan	and			%mode, 63, %mode
2320dbbd9e4bSmacallan	sub			%srcnext, %srcn, %tmp4
2321dbbd9e4bSmacallan	add			%dst, %dkind, %dst
2322dbbd9e4bSmacallan	sub			%w, %leftw, %tmp1
2323dbbd9e4bSmacallan	add			%srcstart, %skind, %srcstart
2324dbbd9e4bSmacallan	add			%dst, %tmp1, %dst
2325dbbd9e4bSmacallan	add			%rightw, %dkind, %rightw
2326dbbd9e4bSmacallan	sub			%dkind, %leftw, %leftw
2327dbbd9e4bSmacallan	sub			%rightw, 1, %rightw
2328dbbd9e4bSmacallan	and			%leftw, 63, %leftw
2329dbbd9e4bSmacallan	add			%dst, 63, %dst
2330dbbd9e4bSmacallan	mov			64, %tmp1
2331dbbd9e4bSmacallan	and			%rightw, 63, %rightw
2332dbbd9e4bSmacallan	sub			%tmp1, %leftw, %leftw
2333dbbd9e4bSmacallan	add			%srcstart, %skind, %srcnext
2334dbbd9e4bSmacallan	add			%rightw, 1, %rightw
2335dbbd9e4bSmacallan	sub			%srcstart, %w, %srcend
2336dbbd9e4bSmacallan	dec			%srcnext
2337dbbd9e4bSmacallan	andn			%srcend, 63, %srcend
2338dbbd9e4bSmacallan	andn			%srcnext, 63, %srcnext
2339dbbd9e4bSmacallan	sub			%w, %leftw, %prepw
2340dbbd9e4bSmacallan	mov			%srcnext, %srcn
2341dbbd9e4bSmacallan	sub			%srcnext, 64, %tmp3
2342dbbd9e4bSmacallan	add			%srcnext, 64, %tmp2
2343dbbd9e4bSmacallan	cmp			%skind, 0
2344dbbd9e4bSmacallan	movg			%ICC, %tmp2, %tmp3
2345dbbd9e4bSmacallan	mov			%h, %srcnotdone
2346dbbd9e4bSmacallan	sub			%prepw, %rightw, %prepw
2347dbbd9e4bSmacallan	sub			%srcstart, 1, %tmp1
2348dbbd9e4bSmacallan	cmp			%srcnext, %srcend
2349dbbd9e4bSmacallan	move			%ICC, %tmp3, %srcnext
2350dbbd9e4bSmacallan	and			%tmp1, 63, %tmp1
2351dbbd9e4bSmacallan	sub			%srcnext, 64, %tmp3
2352dbbd9e4bSmacallan	sub			%tmp1, %w, %tmp1
2353dbbd9e4bSmacallan	cmp			%srcnext, %srcend
2354dbbd9e4bSmacallan	move			%ICC, %tmp3, %srcnext
2355dbbd9e4bSmacallan	mov			%prepw, %curw
2356dbbd9e4bSmacallan	cmp			%tmp1, -65
2357dbbd9e4bSmacallan	movl			%ICC, 64, %prepw
2358dbbd9e4bSmacallan	andn			%dst, 63, %dst
2359dbbd9e4bSmacallan	cmp			%srcnext, %srcstop
2360dbbd9e4bSmacallan	move			%ICC, 0, %srcnotdone
2361dbbd9e4bSmacallan	brnz,a,pn		%tmp4, rroll_narrow
2362dbbd9e4bSmacallan	 xor			%fregset, 64, %fregset
2363dbbd9e4bSmacallan	cmp			%tmp1, -1
2364dbbd9e4bSmacallan	bl,pt			%ICC, 6f
2365dbbd9e4bSmacallan	 tst			%srcnotdone
2366dbbd9e4bSmacallan	be,a,pn			%ICC, rroll_narrow
2367dbbd9e4bSmacallan	 membar			SYNC
2368dbbd9e4bSmacallan	mov			%srcnext, %src
2369dbbd9e4bSmacallan6:	brnz,a,pn		%fregset, 7f
2370dbbd9e4bSmacallan	 ldda			[%src] ASI_BLK_P, %f32
2371dbbd9e4bSmacallan	ldda			[%src] ASI_BLK_P, %f48
2372dbbd9e4bSmacallan7:	subcc			%src, 64, %src
2373dbbd9e4bSmacallanrroll_narrow:
2374dbbd9e4bSmacallan	fnop
2375dbbd9e4bSmacallan	sll			%mode, 4, %tmp2
2376dbbd9e4bSmacallan	and			%mode, 7, %leftw2
2377dbbd9e4bSmacallan	fnop
2378dbbd9e4bSmacallan	andn			%tmp2, 127, %tmp2
2379dbbd9e4bSmacallan	sub			%dst, 64, %dst
2380dbbd9e4bSmacallan	fnop
2381dbbd9e4bSmacallan	add			%tmp2, %fregset, %tmp2
2382dbbd9e4bSmacallan	subcc			%leftw, %leftw2, %leftw2
2383dbbd9e4bSmacallan	fnop
2384dbbd9e4bSmacallan	srl			%tmp2, 1, %tmp3
2385dbbd9e4bSmacallan	add			%tmp3, (rrepr0 - BranchBaseR), %tmp4
2386dbbd9e4bSmacallan	fnop
2387dbbd9e4bSmacallan	add			%tmp2, %tmp3, %tmp2
2388dbbd9e4bSmacallan	cmp			%tmp1, -1
2389dbbd9e4bSmacallan	fnop
2390dbbd9e4bSmacallan	mov			64, %tmp1
2391dbbd9e4bSmacallan	movge			%icc, %tmp4, %tmp2
2392dbbd9e4bSmacallan	sub			%tmp1, %mode, %tmp1
2393dbbd9e4bSmacallan	add			%tmp3, (rrepn0 - BranchBaseR), %tmp4
2394dbbd9e4bSmacallan	fnop
2395dbbd9e4bSmacallan	cmp			%tmp1, %rightw
2396dbbd9e4bSmacallan	movgeu			%icc, %tmp4, %tmp2
2397dbbd9e4bSmacallan	jmpl			%branchbase + %tmp2, %g0
2398dbbd9e4bSmacallan	 and			%tmp3, 32, %tmp3
2399dbbd9e4bSmacallan
2400dbbd9e4bSmacallan#define RVISEND1(i,tgt,tgtr)					\
2401dbbd9e4bSmacallantgt:								\
2402dbbd9e4bSmacallan	brnz,pt			%curw, 1f;			\
2403dbbd9e4bSmacallan	 cmp			%leftw2, ((i&14)*4);		\
2404dbbd9e4bSmacallan	bg,pn			%icc, tgtr - 8;			\
2405dbbd9e4bSmacallan	 tst			%srcnotdone;			\
2406dbbd9e4bSmacallan	be,a,pn			%ICC, tgtr;			\
2407dbbd9e4bSmacallan	 membar			SYNC;				\
2408dbbd9e4bSmacallan	ba,pt			%xcc, tgtr - 8;			\
2409dbbd9e4bSmacallan	 mov			%srcnext, %src;			\
2410dbbd9e4bSmacallan1:	mov			((i & 1) ^ 1) * 64, %fregset;
2411dbbd9e4bSmacallan
2412dbbd9e4bSmacallan#define RVISEND2(i,fset)					\
2413dbbd9e4bSmacallan	brz,a,pn		%srcnotdone, 3f;		\
2414dbbd9e4bSmacallan	 membar			SYNC;				\
2415dbbd9e4bSmacallan	ldda			[%srcnext] ASI_BLK_P, %fset;	\
2416dbbd9e4bSmacallan	sub			%srcnext, 64, %src;		\
2417dbbd9e4bSmacallan3:	mov			(i & 1) * 64, %fregset;
2418dbbd9e4bSmacallan
2419dbbd9e4bSmacallanRVISEND1(0,rvis0e,rvis0r)
2420dbbd9e4bSmacallan	fnop
2421dbbd9e4bSmacallan	fnop
2422dbbd9e4bSmacallan	fnop
2423dbbd9e4bSmacallan	fnop
2424dbbd9e4bSmacallan	fnop
2425dbbd9e4bSmacallan	fnop
2426dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2427dbbd9e4bSmacallan	 faligndata		%ftmp2, %ftmp1, %f30
2428dbbd9e4bSmacallanRVISEND2(0,f32)
2429dbbd9e4bSmacallan	fnop
2430dbbd9e4bSmacallan	faligndata		%f48, %f50, %f16
2431dbbd9e4bSmacallan	faligndata		%f50, %f52, %f18
2432dbbd9e4bSmacallan	faligndata		%f52, %f54, %f20
2433dbbd9e4bSmacallan	faligndata		%f54, %f56, %f22
2434dbbd9e4bSmacallan	faligndata		%f56, %f58, %f24
2435dbbd9e4bSmacallan	faligndata		%f58, %f60, %f26
2436dbbd9e4bSmacallan	faligndata		%f60, %f62, %f28
2437dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2438dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f30
2439dbbd9e4bSmacallanRVISEND1(1,rvis1e,rvis1r)
2440dbbd9e4bSmacallan	fnop
2441dbbd9e4bSmacallan	fnop
2442dbbd9e4bSmacallan	fnop
2443dbbd9e4bSmacallan	fnop
2444dbbd9e4bSmacallan	fnop
2445dbbd9e4bSmacallan	fnop
2446dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2447dbbd9e4bSmacallan	 faligndata		%ftmp2, %ftmp1, %f30
2448dbbd9e4bSmacallanRVISEND2(1,f48)
2449dbbd9e4bSmacallan	fnop
2450dbbd9e4bSmacallan	faligndata		%f32, %f34, %f16
2451dbbd9e4bSmacallan	faligndata		%f34, %f36, %f18
2452dbbd9e4bSmacallan	faligndata		%f36, %f38, %f20
2453dbbd9e4bSmacallan	faligndata		%f38, %f40, %f22
2454dbbd9e4bSmacallan	faligndata		%f40, %f42, %f24
2455dbbd9e4bSmacallan	faligndata		%f42, %f44, %f26
2456dbbd9e4bSmacallan	faligndata		%f44, %f46, %f28
2457dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2458dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f30
2459dbbd9e4bSmacallanRVISEND1(2,rvis2e,rvis2r)
2460dbbd9e4bSmacallan	fnop
2461dbbd9e4bSmacallan	fnop
2462dbbd9e4bSmacallan	fnop
2463dbbd9e4bSmacallan	fnop
2464dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f28
2465dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2466dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2467dbbd9e4bSmacallanRVISEND2(2,f32)
2468dbbd9e4bSmacallan	fnop
2469dbbd9e4bSmacallan	fnop
2470dbbd9e4bSmacallan	faligndata		%f50, %f52, %f16
2471dbbd9e4bSmacallan	faligndata		%f52, %f54, %f18
2472dbbd9e4bSmacallan	faligndata		%f54, %f56, %f20
2473dbbd9e4bSmacallan	faligndata		%f56, %f58, %f22
2474dbbd9e4bSmacallan	faligndata		%f58, %f60, %f24
2475dbbd9e4bSmacallan	faligndata		%f60, %f62, %f26
2476dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2477dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f28
2478dbbd9e4bSmacallanRVISEND1(3,rvis3e,rvis3r)
2479dbbd9e4bSmacallan	fnop
2480dbbd9e4bSmacallan	fnop
2481dbbd9e4bSmacallan	fnop
2482dbbd9e4bSmacallan	fnop
2483dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f28
2484dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2485dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2486dbbd9e4bSmacallanRVISEND2(3,f48)
2487dbbd9e4bSmacallan	fnop
2488dbbd9e4bSmacallan	fnop
2489dbbd9e4bSmacallan	faligndata		%f34, %f36, %f16
2490dbbd9e4bSmacallan	faligndata		%f36, %f38, %f18
2491dbbd9e4bSmacallan	faligndata		%f38, %f40, %f20
2492dbbd9e4bSmacallan	faligndata		%f40, %f42, %f22
2493dbbd9e4bSmacallan	faligndata		%f42, %f44, %f24
2494dbbd9e4bSmacallan	faligndata		%f44, %f46, %f26
2495dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2496dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f28
2497dbbd9e4bSmacallanRVISEND1(4,rvis4e,rvis4r)
2498dbbd9e4bSmacallan	fnop
2499dbbd9e4bSmacallan	fnop
2500dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f26
2501dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2502dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2503dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2504dbbd9e4bSmacallanRVISEND2(4,f32)
2505dbbd9e4bSmacallan	fnop
2506dbbd9e4bSmacallan	fnop
2507dbbd9e4bSmacallan	fnop
2508dbbd9e4bSmacallan	faligndata		%f52, %f54, %f16
2509dbbd9e4bSmacallan	faligndata		%f54, %f56, %f18
2510dbbd9e4bSmacallan	faligndata		%f56, %f58, %f20
2511dbbd9e4bSmacallan	faligndata		%f58, %f60, %f22
2512dbbd9e4bSmacallan	faligndata		%f60, %f62, %f24
2513dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2514dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f26
2515dbbd9e4bSmacallanRVISEND1(5,rvis5e,rvis5r)
2516dbbd9e4bSmacallan	fnop
2517dbbd9e4bSmacallan	fnop
2518dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f26
2519dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2520dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2521dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2522dbbd9e4bSmacallanRVISEND2(5,f48)
2523dbbd9e4bSmacallan	fnop
2524dbbd9e4bSmacallan	fnop
2525dbbd9e4bSmacallan	fnop
2526dbbd9e4bSmacallan	faligndata		%f36, %f38, %f16
2527dbbd9e4bSmacallan	faligndata		%f38, %f40, %f18
2528dbbd9e4bSmacallan	faligndata		%f40, %f42, %f20
2529dbbd9e4bSmacallan	faligndata		%f42, %f44, %f22
2530dbbd9e4bSmacallan	faligndata		%f44, %f46, %f24
2531dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2532dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f26
2533dbbd9e4bSmacallanRVISEND1(6,rvis6e,rvis6r)
2534dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f24
2535dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2536dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2537dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2538dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2539dbbd9e4bSmacallanRVISEND2(6,f32)
2540dbbd9e4bSmacallan	fnop
2541dbbd9e4bSmacallan	fnop
2542dbbd9e4bSmacallan	fnop
2543dbbd9e4bSmacallan	fnop
2544dbbd9e4bSmacallan	faligndata		%f54, %f56, %f16
2545dbbd9e4bSmacallan	faligndata		%f56, %f58, %f18
2546dbbd9e4bSmacallan	faligndata		%f58, %f60, %f20
2547dbbd9e4bSmacallan	faligndata		%f60, %f62, %f22
2548dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2549dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f24
2550dbbd9e4bSmacallanRVISEND1(7,rvis7e,rvis7r)
2551dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f24
2552dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2553dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2554dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2555dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2556dbbd9e4bSmacallanRVISEND2(7,f48)
2557dbbd9e4bSmacallan	fnop
2558dbbd9e4bSmacallan	fnop
2559dbbd9e4bSmacallan	fnop
2560dbbd9e4bSmacallan	fnop
2561dbbd9e4bSmacallan	faligndata		%f38, %f40, %f16
2562dbbd9e4bSmacallan	faligndata		%f40, %f42, %f18
2563dbbd9e4bSmacallan	faligndata		%f42, %f44, %f20
2564dbbd9e4bSmacallan	faligndata		%f44, %f46, %f22
2565dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2566dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f24
2567dbbd9e4bSmacallanRVISEND1(8,rvis8e,rvis8r)
2568dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f22
2569dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2570dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2571dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2572dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2573dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2574dbbd9e4bSmacallanRVISEND2(8,f32)
2575dbbd9e4bSmacallan	fnop
2576dbbd9e4bSmacallan	fnop
2577dbbd9e4bSmacallan	fnop
2578dbbd9e4bSmacallan	fnop
2579dbbd9e4bSmacallan	fnop
2580dbbd9e4bSmacallan	faligndata		%f56, %f58, %f16
2581dbbd9e4bSmacallan	faligndata		%f58, %f60, %f18
2582dbbd9e4bSmacallan	faligndata		%f60, %f62, %f20
2583dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2584dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f22
2585dbbd9e4bSmacallanRVISEND1(9,rvis9e,rvis9r)
2586dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f22
2587dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2588dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2589dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2590dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2591dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2592dbbd9e4bSmacallanRVISEND2(9,f48)
2593dbbd9e4bSmacallan	fnop
2594dbbd9e4bSmacallan	fnop
2595dbbd9e4bSmacallan	fnop
2596dbbd9e4bSmacallan	fnop
2597dbbd9e4bSmacallan	fnop
2598dbbd9e4bSmacallan	faligndata		%f40, %f42, %f16
2599dbbd9e4bSmacallan	faligndata		%f42, %f44, %f18
2600dbbd9e4bSmacallan	faligndata		%f44, %f46, %f20
2601dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2602dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f22
2603dbbd9e4bSmacallanRVISEND1(10,rvis10e,rvis10r)
2604dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f20
2605dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
2606dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2607dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2608dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2609dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2610dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2611dbbd9e4bSmacallanRVISEND2(10,f32)
2612dbbd9e4bSmacallan	fnop
2613dbbd9e4bSmacallan	fnop
2614dbbd9e4bSmacallan	fnop
2615dbbd9e4bSmacallan	fnop
2616dbbd9e4bSmacallan	fnop
2617dbbd9e4bSmacallan	fnop
2618dbbd9e4bSmacallan	faligndata		%f58, %f60, %f16
2619dbbd9e4bSmacallan	faligndata		%f60, %f62, %f18
2620dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2621dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f20
2622dbbd9e4bSmacallanRVISEND1(11,rvis11e,rvis11r)
2623dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f20
2624dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
2625dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2626dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2627dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2628dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2629dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2630dbbd9e4bSmacallanRVISEND2(11,f48)
2631dbbd9e4bSmacallan	fnop
2632dbbd9e4bSmacallan	fnop
2633dbbd9e4bSmacallan	fnop
2634dbbd9e4bSmacallan	fnop
2635dbbd9e4bSmacallan	fnop
2636dbbd9e4bSmacallan	fnop
2637dbbd9e4bSmacallan	faligndata		%f42, %f44, %f16
2638dbbd9e4bSmacallan	faligndata		%f44, %f46, %f18
2639dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2640dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f20
2641dbbd9e4bSmacallanRVISEND1(12,rvis12e,rvis12r)
2642dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f18
2643dbbd9e4bSmacallan	fmovd			%ftmp3, %f20
2644dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
2645dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2646dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2647dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2648dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2649dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2650dbbd9e4bSmacallanRVISEND2(12,f32)
2651dbbd9e4bSmacallan	fnop
2652dbbd9e4bSmacallan	fnop
2653dbbd9e4bSmacallan	fnop
2654dbbd9e4bSmacallan	fnop
2655dbbd9e4bSmacallan	fnop
2656dbbd9e4bSmacallan	fnop
2657dbbd9e4bSmacallan	fnop
2658dbbd9e4bSmacallan	faligndata		%f60, %f62, %f16
2659dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2660dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f18
2661dbbd9e4bSmacallanRVISEND1(13,rvis13e,rvis13r)
2662dbbd9e4bSmacallan	faligndata		%ftmp2, %ftmp1, %f18
2663dbbd9e4bSmacallan	fmovd			%ftmp3, %f20
2664dbbd9e4bSmacallan	fmovd			%ftmp4, %f22
2665dbbd9e4bSmacallan	fmovd			%ftmp5, %f24
2666dbbd9e4bSmacallan	fmovd			%ftmp6, %f26
2667dbbd9e4bSmacallan	fmovd			%ftmp7, %f28
2668dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2669dbbd9e4bSmacallan	 fmovd			%ftmp8, %f30
2670dbbd9e4bSmacallanRVISEND2(13,f48)
2671dbbd9e4bSmacallan	fnop
2672dbbd9e4bSmacallan	fnop
2673dbbd9e4bSmacallan	fnop
2674dbbd9e4bSmacallan	fnop
2675dbbd9e4bSmacallan	fnop
2676dbbd9e4bSmacallan	fnop
2677dbbd9e4bSmacallan	fnop
2678dbbd9e4bSmacallan	faligndata		%f44, %f46, %f16
2679dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2680dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f18
2681dbbd9e4bSmacallanRVISEND1(14,rvis14e,rvis14r)
2682dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2683dbbd9e4bSmacallan	 faligndata		%ftmp2, %ftmp1, %f16
2684dbbd9e4bSmacallanRVISEND2(14,f32)
2685dbbd9e4bSmacallan	fnop
2686dbbd9e4bSmacallan	fnop
2687dbbd9e4bSmacallan	fnop
2688dbbd9e4bSmacallan	fnop
2689dbbd9e4bSmacallan	fnop
2690dbbd9e4bSmacallan	fnop
2691dbbd9e4bSmacallan	fnop
2692dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2693dbbd9e4bSmacallan	 faligndata		%f62, %ftmp1, %f16
2694dbbd9e4bSmacallanRVISEND1(15,rvis15e,rvis15r)
2695dbbd9e4bSmacallan	ble,pn			%icc, leftstr
2696dbbd9e4bSmacallan	 faligndata		%ftmp2, %ftmp1, %f16
2697dbbd9e4bSmacallanRVISEND2(15,f48)
2698dbbd9e4bSmacallan	fnop
2699dbbd9e4bSmacallan	fnop
2700dbbd9e4bSmacallan	fnop
2701dbbd9e4bSmacallan	fnop
2702dbbd9e4bSmacallan	fnop
2703dbbd9e4bSmacallan	fnop
2704dbbd9e4bSmacallan	fnop
2705dbbd9e4bSmacallan	ba,pt			%xcc, leftstr
2706dbbd9e4bSmacallan	 faligndata		%f46, %ftmp1, %f16
2707dbbd9e4bSmacallan
2708dbbd9e4bSmacallan#define RREPEND(tgt,tgtr)					\
2709dbbd9e4bSmacallantgt:								\
2710dbbd9e4bSmacallan	brnz,pt			%srcnotdone, tgtr - 8;		\
2711dbbd9e4bSmacallan	 mov			%srcnext, %src;			\
2712dbbd9e4bSmacallan	ba,pt			%xcc, tgtr;			\
2713dbbd9e4bSmacallan	 membar			SYNC;
2714dbbd9e4bSmacallan
2715dbbd9e4bSmacallan	.align			16
2716dbbd9e4bSmacallanRREPEND(rrep0e,rrep0r)
2717dbbd9e4bSmacallanRREPEND(rrep1e,rrep1r)
2718dbbd9e4bSmacallanRREPEND(rrep2e,rrep2r)
2719dbbd9e4bSmacallanRREPEND(rrep3e,rrep3r)
2720dbbd9e4bSmacallanRREPEND(rrep4e,rrep4r)
2721dbbd9e4bSmacallanRREPEND(rrep5e,rrep5r)
2722dbbd9e4bSmacallanRREPEND(rrep6e,rrep6r)
2723dbbd9e4bSmacallanRREPEND(rrep7e,rrep7r)
2724dbbd9e4bSmacallanRREPEND(rrep8e,rrep8r)
2725dbbd9e4bSmacallanRREPEND(rrep9e,rrep9r)
2726dbbd9e4bSmacallanRREPEND(rrep10e,rrep10r)
2727dbbd9e4bSmacallanRREPEND(rrep11e,rrep11r)
2728dbbd9e4bSmacallanRREPEND(rrep12e,rrep12r)
2729dbbd9e4bSmacallanRREPEND(rrep13e,rrep13r)
2730dbbd9e4bSmacallanRREPEND(rrep14e,rrep14r)
2731dbbd9e4bSmacallanRREPEND(rrep15e,rrep15r)
2732