xform.S revision 7117f1b4
1/* $Id: xform.S,v 1.1.1.1 2008/07/29 05:10:21 mrg Exp $ */
2
3	/* TODO
4	 *
5	 * 1) It would be nice if load/store double could be used
6	 *    at least for the matrix parts.  I think for the matrices
7	 *    it is safe, but for the vertices it probably is not due to
8	 *    things like glInterleavedArrays etc.
9	 *
10	 *    UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
11	 *
12	 * 2) One extremely slick trick would be if we could enclose
13	 *    groups of xform calls on the same vertices such that
14	 *    we just load the matrix into f16-->f31 before the calls
15	 *    and then we would not have to do them here.  This may be
16	 *    tricky and not much of a gain though.
17	 */
18
19#include "sparc_matrix.h"
20
21#if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
22	/* Solaris requires this for 64-bit. */
23        .register %g2, #scratch
24        .register %g3, #scratch
25#endif
26
27	.text
28	.align	64
29
30__set_v4f_1:
31	ld	[%o0 + V4F_FLAGS], %g2
32	mov	1, %g1
33	st	%g1, [%o0 + V4F_SIZE]
34	or	%g2, VEC_SIZE_1, %g2
35	retl
36	 st	%g2, [%o0 + V4F_FLAGS]
37__set_v4f_2:
38	ld	[%o0 + V4F_FLAGS], %g2
39	mov	2, %g1
40	st	%g1, [%o0 + V4F_SIZE]
41	or	%g2, VEC_SIZE_2, %g2
42	retl
43	 st	%g2, [%o0 + V4F_FLAGS]
44__set_v4f_3:
45	ld	[%o0 + V4F_FLAGS], %g2
46	mov	3, %g1
47	st	%g1, [%o0 + V4F_SIZE]
48	or	%g2, VEC_SIZE_3, %g2
49	retl
50	 st	%g2, [%o0 + V4F_FLAGS]
51__set_v4f_4:
52	ld	[%o0 + V4F_FLAGS], %g2
53	mov	4, %g1
54	st	%g1, [%o0 + V4F_SIZE]
55	or	%g2, VEC_SIZE_4, %g2
56	retl
57	 st	%g2, [%o0 + V4F_FLAGS]
58
59	/* First the raw versions. */
60
61	.globl	_mesa_sparc_transform_points1_general
62_mesa_sparc_transform_points1_general:
63	ld	[%o2 + V4F_STRIDE], %o5
64	LDPTR	[%o2 + V4F_START], %g1
65	LDPTR	[%o0 + V4F_START], %g2
66	ld	[%o2 + V4F_COUNT], %g3
67
68	LDMATRIX_0_1_2_3_12_13_14_15(%o1)
69
70	cmp	%g3, 1
71	st	%g3, [%o0 + V4F_COUNT]
72	bl	3f
73	 clr	%o1
74
75	be	2f
76	 andn	%g3, 1, %o2
77
781:	ld	[%g1 + 0x00], %f0	! LSU	Group
79	add	%g1, %o5, %g1		! IEU0
80	ld	[%g1 + 0x00], %f8	! LSU	Group
81	add	%o1, 2, %o1		! IEU0
82	add	%g1, %o5, %g1		! IEU1
83	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
84	fmuls	%f0, M1, %f2		! FGM	Group
85	fmuls	%f0, M2, %f3		! FGM	Group
86	fmuls	%f0, M3, %f4		! FGM	Group
87	fmuls	%f8, M0, %f9		! FGM	Group	f1 available
88	fadds	%f1, M12, %f1		! FGA
89	st	%f1, [%g2 + 0x00]	! LSU
90	fmuls	%f8, M1, %f10		! FGM	Group	f2 available
91	fadds	%f2, M13, %f2		! FGA
92	st	%f2, [%g2 + 0x04]	! LSU
93	fmuls	%f8, M2, %f11		! FGM	Group	f3 available
94	fadds	%f3, M14, %f3		! FGA
95	st	%f3, [%g2 + 0x08]	! LSU
96	fmuls	%f8, M3, %f12		! FGM	Group	f4 available
97	fadds	%f4, M15, %f4		! FGA
98	st	%f4, [%g2 + 0x0c]	! LSU
99	fadds	%f9, M12, %f9		! FGA	Group	f9 available
100	st	%f9, [%g2 + 0x10]	! LSU
101	fadds	%f10, M13, %f10		! FGA	Group	f10 available
102	st	%f10, [%g2 + 0x14]	! LSU
103	fadds	%f11, M14, %f11		! FGA	Group	f11 available
104	st	%f11, [%g2 + 0x18]	! LSU
105	fadds	%f12, M15, %f12		! FGA	Group	f12 available
106	st	%f12, [%g2 + 0x1c]	! LSU
107	cmp	%o1, %o2		! IEU1
108	bne	1b			! CTI
109	 add	%g2, 0x20, %g2		! IEU0	Group
110
111	cmp	%o1, %g3
112	be	3f
113	 nop
114
1152:	ld	[%g1 + 0x00], %f0	! LSU	Group
116	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
117	fmuls	%f0, M1, %f2		! FGM	Group
118	fmuls	%f0, M2, %f3		! FGM	Group
119	fmuls	%f0, M3, %f4		! FGM	Group
120	fadds	%f1, M12, %f1		! FGA	Group
121	st	%f1, [%g2 + 0x00]	! LSU
122	fadds	%f2, M13, %f2		! FGA	Group
123	st	%f2, [%g2 + 0x04]	! LSU
124	fadds	%f3, M14, %f3		! FGA	Group
125	st	%f3, [%g2 + 0x08]	! LSU
126	fadds	%f4, M15, %f4		! FGA	Group
127	st	%f4, [%g2 + 0x0c]	! LSU
128
1293:
130	ba	__set_v4f_4
131	 nop
132
133	.globl	_mesa_sparc_transform_points1_identity
134_mesa_sparc_transform_points1_identity:
135	cmp	%o0, %o2
136	be	4f
137	 ld	[%o2 + V4F_STRIDE], %o5
138	LDPTR	[%o2 + V4F_START], %g1
139	LDPTR	[%o0 + V4F_START], %g2
140	ld	[%o2 + V4F_COUNT], %g3
141
142	cmp	%g3, 1
143	st	%g3, [%o0 + V4F_COUNT]
144	bl	3f
145	 clr	%o1
146
147	be	2f
148	 andn	%g3, 1, %o2
149
1501:	ld	[%g1 + 0x00], %f0	! LSU	Group
151	add	%g1, %o5, %g1		! IEU0
152	ld	[%g1 + 0x00], %f1	! LSU	Group
153	add	%o1, 2, %o1		! IEU0
154	add	%g1, %o5, %g1		! IEU1
155	st	%f0, [%g2 + 0x00]	! LSU	Group
156	cmp	%o1, %o2		! IEU1
157	st	%f1, [%g2 + 0x10]	! LSU	Group
158	bne	1b			! CTI
159	 add	%g2, 0x20, %g2		! IEU0
160
161	cmp	%o1, %g3
162	be	3f
163	 nop
164
1652:	ld	[%g1 + 0x00], %f0
166	addx	%g0, %g0, %g0
167	st	%f0, [%g2 + 0x00]
168
1693:
170	ba	__set_v4f_1
171	 nop
172
1734:	retl
174	 nop
175
176	.globl	_mesa_sparc_transform_points1_2d
177_mesa_sparc_transform_points1_2d:
178	ld	[%o2 + V4F_STRIDE], %o5
179	LDPTR	[%o2 + V4F_START], %g1
180	LDPTR	[%o0 + V4F_START], %g2
181	ld	[%o2 + V4F_COUNT], %g3
182
183	LDMATRIX_0_1_12_13(%o1)
184
185	cmp	%g3, 1
186	st	%g3, [%o0 + V4F_COUNT]
187	bl	3f
188	 clr	%o1
189
190	be	2f
191	 andn	%g3, 1, %o2
192
1931:	ld	[%g1 + 0x00], %f0	! LSU	Group
194	add	%g1, %o5, %g1		! IEU0
195	ld	[%g1 + 0x00], %f8	! LSU	Group
196	add	%o1, 2, %o1		! IEU0
197	add	%g1, %o5, %g1		! IEU1
198	fmuls	%f0, M0, %f1		! FGM	Group
199	fmuls	%f0, M1, %f2		! FGM	Group
200	fmuls	%f8, M0, %f9		! FGM	Group
201	fmuls	%f8, M1, %f10		! FGM	Group
202	fadds	%f1, M12, %f3		! FGA	Group	f1 available
203	st	%f3, [%g2 + 0x00]	! LSU
204	fadds	%f2, M13, %f4		! FGA	Group	f2 available
205	st	%f4, [%g2 + 0x04]	! LSU
206	fadds	%f9, M12, %f11		! FGA	Group	f9 available
207	st	%f11, [%g2 + 0x10]	! LSU
208	fadds	%f10, M13, %f12		! FGA	Group	f10 available
209	st	%f12, [%g2 + 0x14]	! LSU
210	cmp	%o1, %o2		! IEU1
211	bne	1b			! CTI
212	 add	%g2, 0x20, %g2		! IEU0	Group
213
214	cmp	%o1, %g3
215	be	3f
216	 nop
217
2182:	ld	[%g1 + 0x00], %f0
219	fmuls	%f0, M0, %f1
220	fmuls	%f0, M1, %f2
221	fadds	%f1, M12, %f3
222	st	%f3, [%g2 + 0x00]
223	fadds	%f2, M13, %f4
224	st	%f4, [%g2 + 0x04]
225
2263:
227	ba	__set_v4f_2
228	 nop
229
230	.globl	_mesa_sparc_transform_points1_2d_no_rot
231_mesa_sparc_transform_points1_2d_no_rot:
232	ld	[%o2 + V4F_STRIDE], %o5
233	LDPTR	[%o2 + V4F_START], %g1
234	LDPTR	[%o0 + V4F_START], %g2
235	ld	[%o2 + V4F_COUNT], %g3
236
237	LDMATRIX_0_12_13(%o1)
238
239	cmp	%g3, 1
240	st	%g3, [%o0 + V4F_COUNT]
241	bl	3f
242	 clr	%o1
243
244	be	2f
245	 andn	%g3, 1, %o2
246
2471:	ld	[%g1 + 0x00], %f0	! LSU	Group
248	add	%g1, %o5, %g1		! IEU0
249	ld	[%g1 + 0x00], %f4	! LSU	Group
250	add	%o1, 2, %o1		! IEU0
251	add	%g1, %o5, %g1		! IEU1
252	fmuls	%f0, M0, %f1		! FGM	Group
253	fmuls	%f4, M0, %f5		! FGM	Group
254	fadds	%f1, M12, %f3		! FGA	Group, 2 cycle stall, f1 available
255	st	%f3, [%g2 + 0x00]	! LSU
256	st	M13, [%g2 + 0x04]	! LSU	Group, f5 available
257	fadds	%f5, M12, %f6		! FGA
258	st	%f6, [%g2 + 0x10]	! LSU	Group
259	st	M13, [%g2 + 0x14]	! LSU	Group
260	cmp	%o1, %o2		! IEU1
261	bne	1b			! CTI
262	 add	%g2, 0x20, %g2		! IEU0	Group
263
264	cmp	%o1, %g3
265	be	3f
266	 nop
267
2682:	ld	[%g1 + 0x00], %f0
269	fmuls	%f0, M0, %f1
270	fadds	%f1, M12, %f3
271	st	%f3, [%g2 + 0x00]
272	st	M13, [%g2 + 0x04]
273
2743:
275	ba	__set_v4f_2
276	 nop
277
278	.globl	_mesa_sparc_transform_points1_3d
279_mesa_sparc_transform_points1_3d:
280	ld	[%o2 + V4F_STRIDE], %o5
281	LDPTR	[%o2 + V4F_START], %g1
282	LDPTR	[%o0 + V4F_START], %g2
283	ld	[%o2 + V4F_COUNT], %g3
284
285	LDMATRIX_0_1_2_12_13_14(%o1)
286
287	cmp	%g3, 1
288	st	%g3, [%o0 + V4F_COUNT]
289	bl	3f
290	 clr	%o1
291
292	be	2f
293	 andn	%g3, 1, %o2
294
2951:	ld	[%g1 + 0x00], %f0	! LSU	Group
296	add	%g1, %o5, %g1		! IEU0
297	ld	[%g1 + 0x00], %f4	! LSU	Group
298	add	%o1, 2, %o1		! IEU0
299	add	%g1, %o5, %g1		! IEU1
300	fmuls	%f0, M0, %f1		! FGM	Group
301	fmuls	%f0, M1, %f2		! FGM	Group
302	fmuls	%f0, M2, %f3		! FGM	Group
303	fmuls	%f4, M0, %f5		! FGM	Group
304	fadds	%f1, M12, %f1		! FGA	Group, f1 available
305	st	%f1, [%g2 + 0x00]	! LSU
306	fmuls	%f4, M1, %f6		! FGM
307	fadds	%f2, M13, %f2		! FGA	Group, f2 available
308	st	%f2, [%g2 + 0x04]	! LSU
309	fmuls	%f4, M2, %f7		! FGM
310	fadds	%f3, M14, %f3		! FGA	Group, f3 available
311	st	%f3, [%g2 + 0x08]	! LSU
312	fadds	%f5, M12, %f5		! FGA	Group, f5 available
313	st	%f5, [%g2 + 0x10]	! LSU
314	fadds	%f6, M13, %f6		! FGA	Group, f6 available
315	st	%f6, [%g2 + 0x14]	! LSU
316	fadds	%f7, M14, %f7		! FGA	Group, f7 available
317	st	%f7, [%g2 + 0x18]	! LSU
318	cmp	%o1, %o2		! IEU1
319	bne	1b			! CTI
320	 add	%g2, 0x20, %g2		! IEU0	Group
321
322	cmp	%o1, %g3
323	be	3f
324	 nop
325
3262:	ld	[%g1 + 0x00], %f0
327	fmuls	%f0, M0, %f1
328	fmuls	%f0, M1, %f2
329	fmuls	%f0, M2, %f3
330	fadds	%f1, M12, %f1
331	st	%f1, [%g2 + 0x00]
332	fadds	%f2, M13, %f2
333	st	%f2, [%g2 + 0x04]
334	fadds	%f3, M14, %f3
335	st	%f3, [%g2 + 0x08]
336
3373:
338	ba	__set_v4f_3
339	 nop
340
341	.globl	_mesa_sparc_transform_points1_3d_no_rot
342_mesa_sparc_transform_points1_3d_no_rot:
343	ld	[%o2 + V4F_STRIDE], %o5
344	LDPTR	[%o2 + V4F_START], %g1
345	LDPTR	[%o0 + V4F_START], %g2
346	ld	[%o2 + V4F_COUNT], %g3
347
348	LDMATRIX_0_12_13_14(%o1)
349
350	cmp	%g3, 1
351	st	%g3, [%o0 + V4F_COUNT]
352	bl	3f
353	 clr	%o1
354
355	be	2f
356	 andn	%g3, 1, %o2
357
3581:	ld	[%g1 + 0x00], %f0	! LSU	Group
359	add	%g1, %o5, %g1		! IEU0
360	ld	[%g1 + 0x00], %f2	! LSU	Group
361	add	%o1, 2, %o1		! IEU0
362	add	%g1, %o5, %g1		! IEU1
363	fmuls	%f0, M0, %f1		! FGM	Group
364	fmuls	%f2, M0, %f3		! FGM	Group
365	fadds	%f1, M12, %f1		! FGA	Group, 2 cycle stall, f1 available
366	st	%f1, [%g2 + 0x00]	! LSU
367	fadds	%f3, M12, %f3		! FGA	Group, f3 available
368	st	M13, [%g2 + 0x04]	! LSU
369	st	M14, [%g2 + 0x08]	! LSU	Group
370	st	%f3, [%g2 + 0x10]	! LSU	Group
371	st	M13, [%g2 + 0x14]	! LSU	Group
372	st	M14, [%g2 + 0x18]	! LSU	Group
373	cmp	%o1, %o2		! IEU1
374	bne	1b			! CTI
375	 add	%g2, 0x20, %g2		! IEU0	Group
376
377	cmp	%o1, %g3
378	be	3f
379	 nop
380
3812:	ld	[%g1 + 0x00], %f0
382	fmuls	%f0, M0, %f1
383	fadds	%f1, M12, %f1
384	st	%f1, [%g2 + 0x00]
385	st	M13, [%g2 + 0x04]
386	st	M14, [%g2 + 0x08]
387
3883:
389	ba	__set_v4f_3
390	 nop
391
392	.globl	_mesa_sparc_transform_points1_perspective
393_mesa_sparc_transform_points1_perspective:
394	ld	[%o2 + V4F_STRIDE], %o5
395	LDPTR	[%o2 + V4F_START], %g1
396	LDPTR	[%o0 + V4F_START], %g2
397	ld	[%o2 + V4F_COUNT], %g3
398
399	LDMATRIX_0_14(%o1)
400
401	cmp	%g3, 1
402	st	%g3, [%o0 + V4F_COUNT]
403	bl	3f
404	 clr	%o1
405
406	be	2f
407	 andn	%g3, 1, %o2
408
4091:	ld	[%g1 + 0x00], %f0	! LSU	Group
410	add	%g1, %o5, %g1		! IEU0
411	ld	[%g1 + 0x00], %f2	! LSU	Group
412	add	%o1, 2, %o1		! IEU0
413	add	%g1, %o5, %g1		! IEU1
414	fmuls	%f0, M0, %f1		! FGM	Group
415	st	%f1, [%g2 + 0x00]	! LSU
416	fmuls	%f2, M0, %f3		! FGM	Group
417	st	%g0, [%g2 + 0x04]	! LSU
418	st	M14, [%g2 + 0x08]	! LSU	Group
419	st	%g0, [%g2 + 0x0c]	! LSU	Group
420	st	%f3, [%g2 + 0x10]	! LSU	Group
421	st	%g0, [%g2 + 0x14]	! LSU	Group
422	st	M14, [%g2 + 0x18]	! LSU	Group
423	st	%g0, [%g2 + 0x1c]	! LSU	Group
424	cmp	%o1, %o2		! IEU1
425	bne	1b			! CTI
426	 add	%g2, 0x20, %g2		! IEU0	Group
427
428	cmp	%o1, %g3
429	be	3f
430	 nop
431
4322:	ld	[%g1 + 0x00], %f0
433	fmuls	%f0, M0, %f1
434	st	%f1, [%g2 + 0x00]
435	st	%g0, [%g2 + 0x04]
436	st	M14, [%g2 + 0x08]
437	st	%g0, [%g2 + 0x0c]
438
4393:
440	ba	__set_v4f_4
441	 nop
442
443	.globl	_mesa_sparc_transform_points2_general
444_mesa_sparc_transform_points2_general:
445	ld	[%o2 + V4F_STRIDE], %o5
446	LDPTR	[%o2 + V4F_START], %g1
447	LDPTR	[%o0 + V4F_START], %g2
448	ld	[%o2 + V4F_COUNT], %g3
449
450	LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
451
452	cmp	%g3, 0
453	st	%g3, [%o0 + V4F_COUNT]
454	be	2f
455	 clr	%o1
456
4571:	ld	[%g1 + 0x00], %f0	! LSU	Group
458	ld	[%g1 + 0x04], %f1	! LSU	Group
459	add	%o1, 1, %o1		! IEU0
460	add	%g1, %o5, %g1		! IEU1
461	fmuls	%f0, M0, %f2		! FGM	Group
462	fmuls	%f0, M1, %f3		! FGM	Group
463	fmuls	%f0, M2, %f4		! FGM	Group
464	fmuls	%f0, M3, %f5		! FGM	Group
465	fadds	%f2, M12, %f2		! FGA	Group	f2 available
466	fmuls	%f1, M4, %f6		! FGM
467	fadds	%f3, M13, %f3		! FGA	Group	f3 available
468	fmuls	%f1, M5, %f7		! FGM
469	fadds	%f4, M14, %f4		! FGA	Group	f4 available
470	fmuls	%f1, M6, %f8		! FGM
471	fadds	%f5, M15, %f5		! FGA	Group	f5 available
472	fmuls	%f1, M7, %f9		! FGM
473	fadds	%f2, %f6, %f2		! FGA	Group	f6 available
474	st	%f2, [%g2 + 0x00]	! LSU
475	fadds	%f3, %f7, %f3		! FGA	Group	f7 available
476	st	%f3, [%g2 + 0x04]	! LSU
477	fadds	%f4, %f8, %f4		! FGA	Group	f8 available
478	st	%f4, [%g2 + 0x08]	! LSU
479	fadds	%f5, %f9, %f5		! FGA	Group	f9 available
480	st	%f5, [%g2 + 0x0c]	! LSU
481	cmp	%o1, %g3		! IEU1
482	bne	1b			! CTI
483	 add	%g2, 0x10, %g2		! IEU0	Group
4842:
485	ba	__set_v4f_4
486	 nop
487
488	.globl	_mesa_sparc_transform_points2_identity
489_mesa_sparc_transform_points2_identity:
490	cmp	%o2, %o0
491	be	3f
492	 ld	[%o2 + V4F_STRIDE], %o5
493	LDPTR	[%o2 + V4F_START], %g1
494	LDPTR	[%o0 + V4F_START], %g2
495	ld	[%o2 + V4F_COUNT], %g3
496
497	cmp	%g3, 0
498	st	%g3, [%o0 + V4F_COUNT]
499	be	2f
500	 clr	%o1
501
5021:	ld	[%g1 + 0x00], %f0	! LSU	Group
503	add	%o1, 1, %o1		! IEU0
504	ld	[%g1 + 0x04], %f1	! LSU	Group
505	add	%g1, %o5, %g1		! IEU0
506	cmp	%o1, %g3		! IEU1
507	st	%f0, [%g2 + 0x00]	! LSU	Group
508	st	%f1, [%g2 + 0x04]	! LSU	Group
509	bne	1b			! CTI
510	 add	%g2, 0x10, %g2		! IEU0
5112:
512	ba	__set_v4f_2
513	 nop
514
5153:	retl
516	 nop
517
518	.globl	_mesa_sparc_transform_points2_2d
519_mesa_sparc_transform_points2_2d:
520	ld	[%o2 + V4F_STRIDE], %o5
521	LDPTR	[%o2 + V4F_START], %g1
522	LDPTR	[%o0 + V4F_START], %g2
523	ld	[%o2 + V4F_COUNT], %g3
524
525	LDMATRIX_0_1_4_5_12_13(%o1)
526
527	cmp	%g3, 1
528	st	%g3, [%o0 + V4F_COUNT]
529	bl	3f
530	 clr	%o1
531
532	be	2f
533	 andn	%g3, 1, %o2
534
5351:	ld	[%g1 + 0x00], %f0	! LSU	Group
536	ld	[%g1 + 0x04], %f1	! LSU	Group
537	add	%o1, 2, %o1		! IEU0
538	add	%g1, %o5, %g1		! IEU1
539	fmuls	%f0, M0, %f2		! FGM
540	ld	[%g1 + 0x00], %f8	! LSU	Group
541	fmuls	%f0, M1, %f3		! FGM
542	ld	[%g1 + 0x04], %f9	! LSU	Group
543	fmuls	%f1, M4, %f6		! FGM
544	fmuls	%f1, M5, %f7		! FGM	Group
545	add	%g1, %o5, %g1		! IEU0
546	fmuls	%f8, M0, %f10		! FGM	Group	f2 available
547	fadds	%f2, M12, %f2		! FGA
548	fmuls	%f8, M1, %f11		! FGM	Group	f3 available
549	fadds	%f3, M13, %f3		! FGA
550	fmuls	%f9, M4, %f12		! FGM	Group
551	fmuls	%f9, M5, %f13		! FGM	Group
552	fadds	%f10, M12, %f10		! FGA	Group	f2, f10 available
553	fadds	%f2, %f6, %f2		! FGA	Group	f3, f11 available
554	st	%f2, [%g2 + 0x00]	! LSU
555	fadds	%f11, M13, %f11		! FGA	Group	f12 available
556	fadds	%f3, %f7, %f3		! FGA	Group	f13 available
557	st	%f3, [%g2 + 0x04]	! LSU
558	fadds	%f10, %f12, %f10	! FGA	Group	f10 available
559	st	%f10, [%g2 + 0x10]	! LSU
560	fadds	%f11, %f13, %f11	! FGA	Group	f11 available
561	st	%f11, [%g2 + 0x14]	! LSU
562	cmp	%o1, %o2		! IEU1
563	bne	1b			! CTI
564	 add	%g2, 0x20, %g2		! IEU0	Group
565
566	cmp	%o1, %g3
567	be	3f
568	 nop
569
5702:	ld	[%g1 + 0x00], %f0	! LSU	Group
571	ld	[%g1 + 0x04], %f1	! LSU	Group
572	fmuls	%f0, M0, %f2		! FGM	Group
573	fmuls	%f0, M1, %f3		! FGM	Group
574	fmuls	%f1, M4, %f6		! FGM	Group
575	fmuls	%f1, M5, %f7		! FGM	Group
576	fadds	%f2, M12, %f2		! FGA	Group	f2 available
577	fadds	%f3, M13, %f3		! FGA	Group	f3 available
578	fadds	%f2, %f6, %f2		! FGA	Group	2 cycle stall, f2 available
579	st	%f2, [%g2 + 0x00]	! LSU
580	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
581	st	%f3, [%g2 + 0x04]	! LSU
582
5833:
584	ba	__set_v4f_2
585	 nop
586
587	.globl	_mesa_sparc_transform_points2_2d_no_rot
588_mesa_sparc_transform_points2_2d_no_rot:
589	ld	[%o2 + V4F_STRIDE], %o5
590	LDPTR	[%o2 + V4F_START], %g1
591	LDPTR	[%o0 + V4F_START], %g2
592	ld	[%o2 + V4F_COUNT], %g3
593
594	LDMATRIX_0_5_12_13(%o1)
595
596	cmp	%g3, 1
597	st	%g3, [%o0 + V4F_COUNT]
598	bl	3f
599	 clr	%o1
600
601	be	2f
602	 andn	%g3, 1, %o2
603
6041:	ld	[%g1 + 0x00], %f0	! LSU	Group
605	ld	[%g1 + 0x04], %f1	! LSU	Group
606	add	%o1, 2, %o1		! IEU0
607	add	%g1, %o5, %g1		! IEU1
608	ld	[%g1 + 0x00], %f4	! LSU	Group
609	fmuls	%f0, M0, %f2		! FGM
610	ld	[%g1 + 0x04], %f5	! LSU	Group
611	fmuls	%f1, M5, %f3		! FGM
612	fmuls	%f4, M0, %f6		! FGM	Group
613	add	%g1, %o5, %g1		! IEU0
614	fmuls	%f5, M5, %f7		! FGM	Group
615	fadds	%f2, M12, %f2		! FGA	Group	f2 available
616	st	%f2, [%g2 + 0x00]	! LSU
617	fadds	%f3, M13, %f3		! FGA	Group	f3 available
618	st	%f3, [%g2 + 0x04]	! LSU
619	fadds	%f6, M12, %f6		! FGA	Group	f6 available
620	st	%f6, [%g2 + 0x10]	! LSU
621	fadds	%f7, M13, %f7		! FGA	Group	f7 available
622	st	%f7, [%g2 + 0x14]	! LSU
623	cmp	%o1, %o2		! IEU1
624	bne	1b			! CTI
625	 add	%g2, 0x20, %g2		! IEU0	Group
626
627	cmp	%o1, %g3
628	be	3f
629	 nop
630
6312:	ld	[%g1 + 0x00], %f0	! LSU	Group
632	ld	[%g1 + 0x04], %f1	! LSU	Group
633	fmuls	%f0, M0, %f2		! FGM	Group
634	fmuls	%f1, M5, %f3		! FGM	Group
635	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
636	st	%f2, [%g2 + 0x00]	! LSU
637	fadds	%f3, M13, %f3		! FGA	Group	f3 available
638	st	%f3, [%g2 + 0x04]	! LSU
639
6403:
641	ba	__set_v4f_2
642	 nop
643
644	/* orig: 12 cycles */
645	.globl	_mesa_sparc_transform_points2_3d
646_mesa_sparc_transform_points2_3d:
647	ld	[%o2 + V4F_STRIDE], %o5
648	ld	[%o2 + V4F_START], %g1
649	ld	[%o0 + V4F_START], %g2
650	ld	[%o2 + V4F_COUNT], %g3
651
652	LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
653
654	cmp	%g3, 1
655	st	%g3, [%o0 + V4F_COUNT]
656	bl	3f
657	 clr	%o1
658
659	be	2f
660	 andn	%g3, 1, %o2
661
6621:	ld	[%g1 + 0x00], %f0	! LSU	Group
663	ld	[%g1 + 0x04], %f1	! LSU	Group
664	add	%o1, 2, %o1		! IEU0
665	add	%g1, %o5, %g1		! IEU1
666	ld	[%g1 + 0x00], %f9	! LSU	Group
667	fmuls	%f0, M0, %f2		! FGM
668	ld	[%g1 + 0x04], %f10	! LSU	Group
669	fmuls	%f0, M1, %f3		! FGM
670	fmuls	%f0, M2, %f4		! FGM	Group
671	add	%g1, %o5, %g1		! IEU0
672	fmuls	%f1, M4, %f6		! FGM	Group
673	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
674	fadds	%f2, M12, %f2		! FGA
675	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
676	fadds	%f3, M13, %f3		! FGA
677	fmuls	%f9, M0, %f11		! FGM	Group	f4 available
678	fadds	%f4, M14, %f4		! FGA
679	fmuls	%f9, M1, %f12		! FGM	Group	f6 available
680	fmuls	%f9, M2, %f13		! FGM	Group	f2, f7 available
681	fadds	%f2, %f6, %f2		! FGA
682	st	%f2, [%g2 + 0x00]	! LSU
683	fmuls	%f10, M4, %f14		! FGM	Group	f3, f8 available
684	fadds	%f3, %f7, %f3		! FGA
685	st	%f3, [%g2 + 0x04]	! LSU
686	fmuls	%f10, M5, %f15		! FGM	Group	f4, f11 available
687	fadds	%f11, M12, %f11		! FGA
688	fmuls	%f10, M6, %f0		! FGM	Group	f12 available
689	fadds	%f12, M13, %f12		! FGA
690	fadds	%f13, M14, %f13		! FGA	Group	f13 available
691	fadds	%f4, %f8, %f4		! FGA	Group	f14 available
692	st	%f4, [%g2 + 0x08]	! LSU
693	fadds	%f11, %f14, %f11	! FGA	Group	f15, f11 available
694	st	%f11, [%g2 + 0x10]	! LSU
695	fadds	%f12, %f15, %f12	! FGA	Group	f0, f12 available
696	st	%f12, [%g2 + 0x14]	! LSU
697	fadds	%f13, %f0, %f13		! FGA	Group	f13 available
698	st	%f13, [%g2 + 0x18]	! LSU
699
700	cmp	%o1, %o2		! IEU1
701	bne	1b			! CTI
702	 add	%g2, 0x20, %g2		! IEU0	Group
703
704	cmp	%o1, %g3
705	be	3f
706	 nop
707
7082:	ld	[%g1 + 0x00], %f0	! LSU	Group
709	ld	[%g1 + 0x04], %f1	! LSU	Group
710	fmuls	%f0, M0, %f2		! FGM	Group
711	fmuls	%f0, M1, %f3		! FGM	Group
712	fmuls	%f0, M2, %f4		! FGM	Group
713	fmuls	%f1, M4, %f6		! FGM	Group
714	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
715	fadds	%f2, M12, %f2		! FGA
716	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
717	fadds	%f3, M13, %f3		! FGA
718	fadds	%f4, M14, %f4		! FGA	Group	f4 available
719	fadds	%f2, %f6, %f2		! FGA	Group	stall, f2, f6, f7 available
720	st	%f2, [%g2 + 0x00]	! LSU
721	fadds	%f3, %f7, %f3		! FGA	Group	f3, f8 available
722	st	%f3, [%g2 + 0x04]	! LSU
723	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
724	st	%f4, [%g2 + 0x08]	! LSU
725
7263:
727	ba	__set_v4f_3
728	 nop
729
730	.globl	_mesa_sparc_transform_points2_3d_no_rot
731_mesa_sparc_transform_points2_3d_no_rot:
732	ld	[%o2 + V4F_STRIDE], %o5
733	LDPTR	[%o2 + V4F_START], %g1
734	LDPTR	[%o0 + V4F_START], %g2
735	ld	[%o2 + V4F_COUNT], %g3
736
737	LDMATRIX_0_5_12_13_14(%o1)
738
739	cmp	%g3, 1
740	st	%g3, [%o0 + V4F_COUNT]
741	bl	3f
742	 clr	%o3
743
744	be	2f
745	 andn	%g3, 1, %o2
746
7471:	ld	[%g1 + 0x00], %f0	! LSU	Group
748	ld	[%g1 + 0x04], %f1	! LSU	Group
749	add	%o3, 2, %o3		! IEU0
750	add	%g1, %o5, %g1		! IEU1
751	ld	[%g1 + 0x00], %f4	! LSU	Group
752	fmuls	%f0, M0, %f2		! FGM
753	ld	[%g1 + 0x04], %f5	! LSU	Group
754	fmuls	%f1, M5, %f3		! FGM
755	fmuls	%f4, M0, %f6		! FGM	Group
756	add	%g1, %o5, %g1		! IEU0
757	fmuls	%f5, M5, %f7		! FGM	Group
758	fadds	%f2, M12, %f2		! FGA	Group	f2 available
759	st	%f2, [%g2 + 0x00]	! LSU
760	fadds	%f3, M13, %f3		! FGA	Group	f3 available
761	st	%f3, [%g2 + 0x04]	! LSU
762	fadds	%f6, M12, %f6		! FGA	Group	f6 available
763	st	M14, [%g2 + 0x08]	! LSU
764	fadds	%f7, M13, %f7		! FGA	Group	f7 available
765	st	%f6, [%g2 + 0x10]	! LSU
766	st	%f7, [%g2 + 0x14]	! LSU	Group
767	st	M14, [%g2 + 0x18]	! LSU	Group
768	cmp	%o3, %o2		! IEU1
769	bne	1b			! CTI
770	 add	%g2, 0x20, %g2		! IEU0	Group
771
772	cmp	%o3, %g3
773	be	3f
774	 nop
775
7762:	ld	[%g1 + 0x00], %f0	! LSU	Group
777	ld	[%g1 + 0x04], %f1	! LSU	Group
778	fmuls	%f0, M0, %f2		! FGM	Group
779	fmuls	%f1, M5, %f3		! FGM	Group
780	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
781	st	%f2, [%g2 + 0x00]	! LSU
782	fadds	%f3, M13, %f3		! FGA	Group	f3 available
783	st	%f3, [%g2 + 0x04]	! LSU
784	st	M14, [%g2 + 0x08]	! LSU	Group
785
7863:	ld	[%o1 + (14 * 0x4)], %g3
787	cmp	%g3, 0
788	bne	__set_v4f_3
789	 nop
790	ba	__set_v4f_2
791	 nop
792
793	.globl	_mesa_sparc_transform_points2_perspective
794_mesa_sparc_transform_points2_perspective:
795	ld	[%o2 + V4F_STRIDE], %o5
796	LDPTR	[%o2 + V4F_START], %g1
797	LDPTR	[%o0 + V4F_START], %g2
798	ld	[%o2 + V4F_COUNT], %g3
799
800	LDMATRIX_0_5_14(%o1)
801
802	cmp	%g3, 0
803	st	%g3, [%o0 + V4F_COUNT]
804	be	2f
805	 clr	%o1
806
8071:	ld	[%g1 + 0x00], %f0
808	ld	[%g1 + 0x04], %f1
809	add	%o1, 1, %o1
810	add	%g1, %o5, %g1
811	fmuls	%f0, M0, %f2
812	st	%f2, [%g2 + 0x00]
813	fmuls	%f1, M5, %f3
814	st	%f3, [%g2 + 0x04]
815	st	M14, [%g2 + 0x08]
816	st	%g0, [%g2 + 0x0c]
817	cmp	%o1, %g3
818	bne	1b
819	 add	%g2, 0x10, %g2
8202:
821	ba	__set_v4f_4
822	 nop
823
824	.globl	_mesa_sparc_transform_points3_general
825_mesa_sparc_transform_points3_general:
826	ld	[%o2 + V4F_STRIDE], %o5
827	LDPTR	[%o2 + V4F_START], %g1
828	LDPTR	[%o0 + V4F_START], %g2
829	ld	[%o2 + V4F_COUNT], %g3
830
831	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
832
833	cmp	%g3, 0
834	st	%g3, [%o0 + V4F_COUNT]
835	be	2f
836	 clr	%o1
837
8381:	ld	[%g1 + 0x00], %f0	! LSU	Group
839	ld	[%g1 + 0x04], %f1	! LSU	Group
840	ld	[%g1 + 0x08], %f2	! LSU	Group
841	add	%o1, 1, %o1		! IEU0
842	add	%g1, %o5, %g1		! IEU1
843	fmuls	%f0, M0, %f3		! FGM
844	fmuls	%f1, M4, %f7		! FGM	Group
845	fmuls	%f0, M1, %f4		! FGM	Group
846	fmuls	%f1, M5, %f8		! FGM	Group
847	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
848	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
849	fadds	%f3, %f7, %f3		! FGA
850	fmuls	%f0, M3, %f6		! FGM	Group	f4 available
851	fmuls	%f1, M7, %f10		! FGM	Group	f8 available
852	fadds	%f4, %f8, %f4		! FGA
853	fmuls	%f2, M8, %f7		! FGM	Group	f5 available
854	fmuls	%f2, M9, %f8		! FGM	Group	f9,f3 available
855	fadds	%f5, %f9, %f5		! FGA
856	fmuls	%f2, M10, %f9		! FGM	Group	f6 available
857	fadds	%f6, %f10, %f6		! FGA	Group	f10,f4 available
858	fmuls	%f2, M11, %f10		! FGM
859	fadds	%f3, M12, %f3		! FGA	Group	f7 available
860	fadds	%f4, M13, %f4		! FGA	Group	f8,f5 available
861	fadds	%f5, M14, %f5		! FGA	Group	f9 available
862	fadds	%f6, M15, %f6		! FGA	Group	f10,f6 available
863	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
864	st	%f3, [%g2 + 0x00]	! LSU
865	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
866	st	%f4, [%g2 + 0x04]	! LSU
867	fadds	%f5, %f9, %f5		! FGA	Group	f5 available
868	st	%f5, [%g2 + 0x08]	! LSU
869	fadds	%f6, %f10, %f6		! FGA	Group	f6 available
870	st	%f6, [%g2 + 0x0c]	! LSU
871	cmp	%o1, %g3		! IEU1
872	bne	1b			! CTI
873	 add	%g2, 0x10, %g2		! IEU0	Group
8742:
875	ba	__set_v4f_4
876	 nop
877
878	.globl	_mesa_sparc_transform_points3_identity
879_mesa_sparc_transform_points3_identity:
880	ld	[%o2 + V4F_STRIDE], %o5
881	LDPTR	[%o2 + V4F_START], %g1
882	LDPTR	[%o0 + V4F_START], %g2
883	ld	[%o2 + V4F_COUNT], %g3
884
885	cmp	%g3, 0
886	st	%g3, [%o0 + V4F_COUNT]
887	be	2f
888	 clr	%o1
889
8901:	ld	[%g1 + 0x00], %f0
891	ld	[%g1 + 0x04], %f1
892	ld	[%g1 + 0x08], %f2
893	add	%o1, 1, %o1
894	add	%g1, %o5, %g1
895	cmp	%o1, %g3
896	st	%f0, [%g2 + 0x00]
897	st	%f1, [%g2 + 0x04]
898	st	%f2, [%g2 + 0x08]
899	bne	1b
900	 add	%g2, 0x10, %g2
9012:
902	ba	__set_v4f_3
903	 nop
904
905	.globl	_mesa_sparc_transform_points3_2d
906_mesa_sparc_transform_points3_2d:
907	ld	[%o2 + V4F_STRIDE], %o5
908	LDPTR	[%o2 + V4F_START], %g1
909	LDPTR	[%o0 + V4F_START], %g2
910	ld	[%o2 + V4F_COUNT], %g3
911
912	LDMATRIX_0_1_4_5_12_13(%o1)
913
914	cmp	%g3, 0
915	st	%g3, [%o0 + V4F_COUNT]
916	be	2f
917	 clr	%o1
918
9191:	ld	[%g1 + 0x00], %f0	! LSU	Group
920	ld	[%g1 + 0x04], %f1	! LSU	Group
921	ld	[%g1 + 0x08], %f2	! LSU	Group
922	add	%o1, 1, %o1		! IEU0
923	add	%g1, %o5, %g1		! IEU1
924	fmuls	%f0, M0, %f3		! FGM
925	fmuls	%f0, M1, %f4		! FGM	Group
926	fmuls	%f1, M4, %f6		! FGM	Group
927	fmuls	%f1, M5, %f7		! FGM	Group
928	fadds	%f3, M12, %f3		! FGA	Group	f3 available
929	fadds	%f4, M13, %f4		! FGA	Group	f4 available
930	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
931	st	%f3, [%g2 + 0x00]	! LSU
932	fadds	%f4, %f7, %f4		! FGA	Group	f7 available
933	st	%f4, [%g2 + 0x04]	! LSU
934	st	%f2, [%g2 + 0x08]	! LSU	Group
935	cmp	%o1, %g3		! IEU1
936	bne	1b			! CTI
937	 add	%g2, 0x10, %g2		! IEU0	Group
9382:
939	ba	__set_v4f_3
940	 nop
941
942	.globl	_mesa_sparc_transform_points3_2d_no_rot
943_mesa_sparc_transform_points3_2d_no_rot:
944	ld	[%o2 + V4F_STRIDE], %o5
945	LDPTR	[%o2 + V4F_START], %g1
946	LDPTR	[%o0 + V4F_START], %g2
947	ld	[%o2 + V4F_COUNT], %g3
948
949	LDMATRIX_0_5_12_13(%o1)
950
951	cmp	%g3, 0
952	st	%g3, [%o0 + V4F_COUNT]
953	be	2f
954	 clr	%o1
955
9561:	ld	[%g1 + 0x00], %f0	! LSU	Group
957	ld	[%g1 + 0x04], %f1	! LSU	Group
958	ld	[%g1 + 0x08], %f2	! LSU	Group
959	add	%o1, 1, %o1		! IEU0
960	add	%g1, %o5, %g1		! IEU1
961	fmuls	%f0, M0, %f3		! FGM
962	fmuls	%f1, M5, %f4		! FGM	Group
963	st	%f2, [%g2 + 0x08]	! LSU
964	fadds	%f3, M12, %f3		! FGA	Group
965	st	%f3, [%g2 + 0x00]	! LSU
966	fadds	%f4, M13, %f4		! FGA	Group
967	st	%f4, [%g2 + 0x04]	! LSU
968	cmp	%o1, %g3		! IEU1
969	bne	1b			! CTI
970	 add	%g2, 0x10, %g2		! IEU0	Group
9712:
972	ba	__set_v4f_3
973	 nop
974
975	.globl	_mesa_sparc_transform_points3_3d
976_mesa_sparc_transform_points3_3d:
977	ld	[%o2 + V4F_STRIDE], %o5
978	LDPTR	[%o2 + V4F_START], %g1
979	LDPTR	[%o0 + V4F_START], %g2
980	ld	[%o2 + V4F_COUNT], %g3
981
982	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
983
984	cmp	%g3, 0
985	st	%g3, [%o0 + V4F_COUNT]
986	be	2f
987	 clr	%o1
988
9891:	ld	[%g1 + 0x00], %f0	! LSU	Group
990	ld	[%g1 + 0x04], %f1	! LSU	Group
991	ld	[%g1 + 0x08], %f2	! LSU	Group
992	add	%o1, 1, %o1		! IEU0
993	add	%g1, %o5, %g1		! IEU1
994	fmuls	%f0, M0, %f3		! FGM
995	fmuls	%f1, M4, %f6		! FGM	Group
996	fmuls	%f0, M1, %f4		! FGM	Group
997	fmuls	%f1, M5, %f7		! FGM	Group
998	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
999	fmuls	%f1, M6, %f8		! FGM	Group	f6 available
1000	fadds	%f3, %f6, %f3		! FGA
1001	fmuls	%f2, M8, %f9		! FGM	Group	f4 available
1002	fmuls	%f2, M9, %f10		! FGM	Group	f7 available
1003	fadds	%f4, %f7, %f4		! FGA
1004	fmuls	%f2, M10, %f11		! FGM	Group	f5 available
1005	fadds	%f5, %f8, %f5		! FGA	Group	f8, f3 available
1006	fadds	%f3, %f9, %f3		! FGA	Group	f9 available
1007	fadds	%f4, %f10, %f4		! FGA	Group	f10, f4 available
1008	fadds	%f5, %f11, %f5		! FGA	Group	stall, f11, f5 available
1009	fadds	%f3, M12, %f3		! FGA	Group	f3 available
1010	st	%f3, [%g2 + 0x00]	! LSU
1011	fadds	%f4, M13, %f4		! FGA	Group	f4 available
1012	st	%f4, [%g2 + 0x04]	! LSU
1013	fadds	%f5, M14, %f5		! FGA	Group	f5 available
1014	st	%f5, [%g2 + 0x08]	! LSU
1015	cmp	%o1, %g3		! IEU1
1016	bne	1b			! CTI
1017	 add	%g2, 0x10, %g2		! IEU0	Group
10182:
1019	ba	__set_v4f_3
1020	 nop
1021
1022	.globl	_mesa_sparc_transform_points3_3d_no_rot
1023_mesa_sparc_transform_points3_3d_no_rot:
1024	ld	[%o2 + V4F_STRIDE], %o5
1025	LDPTR	[%o2 + V4F_START], %g1
1026	LDPTR	[%o0 + V4F_START], %g2
1027	ld	[%o2 + V4F_COUNT], %g3
1028
1029	LDMATRIX_0_5_10_12_13_14(%o1)
1030
1031	cmp	%g3, 0
1032	st	%g3, [%o0 + V4F_COUNT]
1033	be	2f
1034	 clr	%o1
1035
10361:	ld	[%g1 + 0x00], %f0	! LSU	Group
1037	ld	[%g1 + 0x04], %f1	! LSU	Group
1038	ld	[%g1 + 0x08], %f2	! LSU	Group
1039	add	%o1, 1, %o1		! IEU0
1040	add	%g1, %o5, %g1		! IEU1
1041	cmp	%o1, %g3		! IEU1	Group
1042	fmuls	%f0, M0, %f3		! FGM
1043	fmuls	%f1, M5, %f4		! FGM	Group
1044	fmuls	%f2, M10, %f5		! FGM	Group
1045	fadds	%f3, M12, %f3		! FGA	Group, stall, f3 available
1046	st	%f3, [%g2 + 0x00]	! LSU
1047	fadds	%f4, M13, %f4		! FGA	Group, f4 available
1048	st	%f4, [%g2 + 0x04]	! LSU
1049	fadds	%f5, M14, %f5		! FGA	Group, f5 available
1050	st	%f5, [%g2 + 0x08]	! LEU
1051	bne	1b			! CTI
1052	 add	%g2, 0x10, %g2		! IEU0	Group
10532:
1054	ba	__set_v4f_3
1055	 nop
1056
1057	.globl	_mesa_sparc_transform_points3_perspective
1058_mesa_sparc_transform_points3_perspective:
1059	ld	[%o2 + V4F_STRIDE], %o5
1060	LDPTR	[%o2 + V4F_START], %g1
1061	LDPTR	[%o0 + V4F_START], %g2
1062	ld	[%o2 + V4F_COUNT], %g3
1063
1064	LDMATRIX_0_5_8_9_10_14(%o1)
1065
1066	cmp	%g3, 0
1067	st	%g3, [%o0 + V4F_COUNT]
1068	be	2f
1069	 clr	%o1
1070
10711:	ld	[%g1 + 0x00], %f0	! LSU	Group
1072	ld	[%g1 + 0x04], %f1	! LSU	Group
1073	ld	[%g1 + 0x08], %f2	! LSU	Group
1074	add	%o1, 1, %o1		! IEU0
1075	add	%g1, %o5, %g1		! IEU1
1076	fmuls	%f0, M0, %f3		! FGM
1077	fmuls	%f2, M8, %f6		! FGM	Group
1078	fmuls	%f1, M5, %f4		! FGM	Group
1079	fmuls	%f2, M9, %f7		! FGM	Group
1080	fmuls	%f2, M10, %f5		! FGM	Group	f3 available
1081	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
1082	st	%f3, [%g2 + 0x00]	! LSU
1083	fadds	%f4, %f7, %f4		! FGA	Group	stall, f4, f7 available
1084	st	%f4, [%g2 + 0x04]	! LSU
1085	fadds	%f5, M14, %f5		! FGA	Group
1086	st	%f5, [%g2 + 0x08]	! LSU
1087	fnegs	%f2, %f6		! FGA	Group
1088	st	%f6, [%g2 + 0x0c]	! LSU
1089	cmp	%o1, %g3		! IEU1
1090	bne	1b			! CTI
1091	 add	%g2, 0x10, %g2		! IEU0	Group
10922:
1093	ba	__set_v4f_4
1094	 nop
1095
1096	.globl	_mesa_sparc_transform_points4_general
1097_mesa_sparc_transform_points4_general:
1098	ld	[%o2 + V4F_STRIDE], %o5
1099	LDPTR	[%o2 + V4F_START], %g1
1100	LDPTR	[%o0 + V4F_START], %g2
1101	ld	[%o2 + V4F_COUNT], %g3
1102
1103	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1104
1105	cmp	%g3, 0
1106	st	%g3, [%o0 + V4F_COUNT]
1107	be	2f
1108	 clr	%o1
1109
11101:	ld	[%g1 + 0x00], %f0	! LSU	Group
1111	ld	[%g1 + 0x04], %f1	! LSU	Group
1112	ld	[%g1 + 0x08], %f2	! LSU	Group
1113	ld	[%g1 + 0x0c], %f3	! LSU	Group
1114	add	%o1, 1, %o1		! IEU0
1115	add	%g1, %o5, %g1		! IEU1
1116	fmuls	%f0, M0, %f4		! FGM	Group
1117	fmuls	%f1, M4, %f8		! FGM	Group
1118	fmuls	%f0, M1, %f5		! FGM	Group
1119	fmuls	%f1, M5, %f9		! FGM	Group
1120	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
1121	fmuls	%f1, M6, %f10		! FGM	Group	f8 available
1122	fadds	%f4, %f8, %f4		! FGA
1123	fmuls	%f0, M3, %f7		! FGM	Group	f5 available
1124	fmuls	%f1, M7, %f11		! FGM	Group	f9 available
1125	fadds	%f5, %f9, %f5		! FGA
1126	fmuls	%f2, M8, %f12		! FGM	Group	f6 available
1127	fmuls	%f2, M9, %f13		! FGM	Group	f10, f4 available
1128	fadds	%f6, %f10, %f6		! FGA
1129	fmuls	%f2, M10, %f14		! FGM	Group	f7 available
1130	fmuls	%f2, M11, %f15		! FGM	Group	f11, f5 available
1131	fadds	%f7, %f11, %f7		! FGA
1132	fmuls	%f3, M12, %f8		! FGM	Group	f12 available
1133	fadds	%f4, %f12, %f4		! FGA
1134	fmuls	%f3, M13, %f9		! FGM	Group	f13, f6 available
1135	fadds	%f5, %f13, %f5		! FGA
1136	fmuls	%f3, M14, %f10		! FGM	Group	f14 available
1137	fadds	%f6, %f14, %f6		! FGA
1138	fmuls	%f3, M15, %f11		! FGM	Group	f15, f7 available
1139	fadds	%f7, %f15, %f7		! FGA
1140	fadds	%f4, %f8, %f4		! FGA	Group	f8, f4 available
1141	st	%f4, [%g2 + 0x00]	! LSU
1142	fadds	%f5, %f9, %f5		! FGA	Group	f9, f5 available
1143	st	%f5, [%g2 + 0x04]	! LSU
1144	fadds	%f6, %f10, %f6		! FGA	Group	f10, f6 available
1145	st	%f6, [%g2 + 0x08]	! LSU
1146	fadds	%f7, %f11, %f7		! FGA	Group	f11, f7 available
1147	st	%f7, [%g2 + 0x0c]	! LSU
1148	cmp	%o1, %g3		! IEU1
1149	bne	1b			! CTI
1150	 add	%g2, 0x10, %g2		! IEU0	Group
11512:
1152	ba	__set_v4f_4
1153	 nop
1154
1155	.globl	_mesa_sparc_transform_points4_identity
1156_mesa_sparc_transform_points4_identity:
1157	ld	[%o2 + V4F_STRIDE], %o5
1158	LDPTR	[%o2 + V4F_START], %g1
1159	LDPTR	[%o0 + V4F_START], %g2
1160	ld	[%o2 + V4F_COUNT], %g3
1161
1162	cmp	%g3, 0
1163	st	%g3, [%o0 + V4F_COUNT]
1164	be	2f
1165	 clr	%o1
1166
11671:	ld	[%g1 + 0x00], %f0
1168	ld	[%g1 + 0x04], %f1
1169	ld	[%g1 + 0x08], %f2
1170	add	%o1, 1, %o1
1171	ld	[%g1 + 0x0c], %f3
1172	add	%g1, %o5, %g1
1173	st	%f0, [%g2 + 0x00]
1174	st	%f1, [%g2 + 0x04]
1175	st	%f2, [%g2 + 0x08]
1176	cmp	%o1, %g3
1177	st	%f3, [%g2 + 0x0c]
1178	bne	1b
1179	 add	%g2, 0x10, %g2
11802:
1181	ba	__set_v4f_4
1182	 nop
1183
1184	.globl	_mesa_sparc_transform_points4_2d
1185_mesa_sparc_transform_points4_2d:
1186	ld	[%o2 + V4F_STRIDE], %o5
1187	LDPTR	[%o2 + V4F_START], %g1
1188	LDPTR	[%o0 + V4F_START], %g2
1189	ld	[%o2 + V4F_COUNT], %g3
1190
1191	LDMATRIX_0_1_4_5_12_13(%o1)
1192
1193	cmp	%g3, 0
1194	st	%g3, [%o0 + V4F_COUNT]
1195	be	2f
1196	 clr	%o1
1197
11981:	ld	[%g1 + 0x00], %f0	! LSU	Group
1199	ld	[%g1 + 0x04], %f1	! LSU	Group
1200	ld	[%g1 + 0x08], %f2	! LSU	Group
1201	ld	[%g1 + 0x0c], %f3	! LSU	Group
1202	add	%o1, 1, %o1		! IEU0
1203	add	%g1, %o5, %g1		! IEU1
1204	fmuls	%f0, M0, %f4		! FGM
1205	fmuls	%f1, M4, %f8		! FGM	Group
1206	fmuls	%f0, M1, %f5		! FGM	Group
1207	fmuls	%f1, M5, %f9		! FGM	Group	f4 available
1208	fmuls	%f3, M12, %f12		! FGM	Group
1209	fmuls	%f3, M13, %f13		! FGM	Group	f8 available
1210	fadds	%f4, %f8, %f4		! FGA
1211	fadds	%f5, %f9, %f5		! FGA	Group	stall, f5, f9 available
1212	fadds	%f4, %f12, %f4		! FGA	Group	2 cycle stall, f4, f12, f13 avail
1213	st	%f4, [%g2 + 0x00]	! LSU
1214	fadds	%f5, %f13, %f5		! FGA	Group	f5 available
1215	st	%f5, [%g2 + 0x04]	! LSU
1216	st	%f2, [%g2 + 0x08]	! LSU	Group
1217	st	%f3, [%g2 + 0x0c]	! LSU	Group
1218	cmp	%o1, %g3		! IEU1
1219	bne	1b			! CTI
1220	 add	%g2, 0x10, %g2		! IEU0	Group
12212:
1222	ba	__set_v4f_4
1223	 nop
1224
1225	.globl	_mesa_sparc_transform_points4_2d_no_rot
1226_mesa_sparc_transform_points4_2d_no_rot:
1227	ld	[%o2 + V4F_STRIDE], %o5
1228	LDPTR	[%o2 + V4F_START], %g1
1229	LDPTR	[%o0 + V4F_START], %g2
1230	ld	[%o2 + V4F_COUNT], %g3
1231
1232	LDMATRIX_0_1_4_5_12_13(%o1)
1233
1234	cmp	%g3, 0
1235	st	%g3, [%o0 + V4F_COUNT]
1236	be	2f
1237	 clr	%o1
1238
12391:	ld	[%g1 + 0x00], %f0
1240	ld	[%g1 + 0x04], %f1
1241	ld	[%g1 + 0x08], %f2
1242	ld	[%g1 + 0x0c], %f3
1243	add	%o1, 1, %o1
1244	add	%g1, %o5, %g1
1245	fmuls	%f0, M0, %f4
1246	fmuls	%f3, M12, %f8
1247	fmuls	%f1, M5, %f5
1248	fmuls	%f3, M13, %f9
1249	fadds	%f4, %f8, %f4
1250	st	%f4, [%g2 + 0x00]
1251	fadds	%f5, %f9, %f5
1252	st	%f5, [%g2 + 0x04]
1253	st	%f2, [%g2 + 0x08]
1254	st	%f3, [%g2 + 0x0c]
1255	cmp	%o1, %g3
1256	bne	1b
1257	 add	%g2, 0x10, %g2
12582:
1259	ba	__set_v4f_4
1260	 nop
1261
1262	.globl	_mesa_sparc_transform_points4_3d
1263_mesa_sparc_transform_points4_3d:
1264	ld	[%o2 + V4F_STRIDE], %o5
1265	LDPTR	[%o2 + V4F_START], %g1
1266	LDPTR	[%o0 + V4F_START], %g2
1267	ld	[%o2 + V4F_COUNT], %g3
1268
1269	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1270
1271	cmp	%g3, 0
1272	st	%g3, [%o0 + V4F_COUNT]
1273	be	2f
1274	 clr	%o1
1275
12761:	ld	[%g1 + 0x00], %f0	! LSU	Group
1277	ld	[%g1 + 0x04], %f1	! LSU	Group
1278	ld	[%g1 + 0x08], %f2	! LSU	Group
1279	ld	[%g1 + 0x0c], %f3	! LSU	Group
1280	add	%o1, 1, %o1		! IEU0
1281	add	%g1, %o5, %g1		! IEU1
1282	fmuls	%f0, M0, %f4		! FGM
1283	fmuls	%f1, M4, %f7		! FGM	Group
1284	fmuls	%f0, M1, %f5		! FGM	Group
1285	fmuls	%f1, M5, %f8		! FGM	Group
1286	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
1287	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
1288	fadds	%f4, %f7, %f4		! FGA
1289	fmuls	%f2, M8, %f10		! FGM	Group	f5 available
1290	fmuls	%f2, M9, %f11		! FGM	Group	f8 available
1291	fadds	%f5, %f8, %f5		! FGA
1292	fmuls	%f2, M10, %f12		! FGM	Group	f6 available
1293	fmuls	%f3, M12, %f13		! FGM	Group	f9, f4 available
1294	fadds	%f6, %f9, %f6		! FGA
1295	fmuls	%f3, M13, %f14		! FGM	Group	f10 available
1296	fadds	%f4, %f10, %f4		! FGA
1297	fmuls	%f3, M14, %f15		! FGM	Group	f11, f5 available
1298	fadds	%f5, %f11, %f5		! FGA
1299	fadds	%f6, %f12, %f6		! FGA	Group	stall, f12, f13, f6 available
1300	fadds	%f4, %f13, %f4		! FGA	Group	f14, f4 available
1301	st	%f4, [%g2 + 0x00]	! LSU
1302	fadds	%f5, %f14, %f5		! FGA	Group	f15, f5 available
1303	st	%f5, [%g2 + 0x04]	! LSU
1304	fadds	%f6, %f15, %f6		! FGA	Group	f6 available
1305	st	%f6, [%g2 + 0x08]	! LSU
1306	st	%f3, [%g2 + 0x0c]	! LSU	Group
1307	cmp	%o1, %g3		! IEU1
1308	bne	1b			! CTI
1309	 add	%g2, 0x10, %g2		! IEU0	Group
13102:
1311	ba	__set_v4f_4
1312	 nop
1313
1314	.globl	_mesa_sparc_transform_points4_3d_no_rot
1315_mesa_sparc_transform_points4_3d_no_rot:
1316	ld	[%o2 + V4F_STRIDE], %o5
1317	LDPTR	[%o2 + V4F_START], %g1
1318	LDPTR	[%o0 + V4F_START], %g2
1319	ld	[%o2 + V4F_COUNT], %g3
1320
1321	LDMATRIX_0_5_10_12_13_14(%o1)
1322
1323	cmp	%g3, 0
1324	st	%g3, [%o0 + V4F_COUNT]
1325	be	2f
1326	 clr	%o1
1327
13281:	ld	[%g1 + 0x00], %f0	! LSU	Group
1329	ld	[%g1 + 0x04], %f1	! LSU	Group
1330	ld	[%g1 + 0x08], %f2	! LSU	Group
1331	ld	[%g1 + 0x0c], %f3	! LSU	Group
1332	add	%o1, 1, %o1		! IEU0
1333	add	%g1, %o5, %g1		! IEU1
1334	fmuls	%f0, M0, %f4		! FGM
1335	fmuls	%f3, M12, %f7		! FGM	Group
1336	fmuls	%f1, M5, %f5		! FGM	Group
1337	fmuls	%f3, M13, %f8		! FGM	Group
1338	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
1339	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
1340	fadds	%f4, %f7, %f4		! FGA
1341	st	%f4, [%g2 + 0x00]	! LSU
1342	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
1343	st	%f5, [%g2 + 0x04]	! LSU
1344	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
1345	st	%f6, [%g2 + 0x08]	! LSU
1346	st	%f3, [%g2 + 0x0c]	! LSU	Group
1347	cmp	%o1, %g3		! IEU1
1348	bne	1b			! CTI
1349	 add	%g2, 0x10, %g2		! IEU0	Group
13502:
1351	ba	__set_v4f_4
1352	 nop
1353
1354	.globl	_mesa_sparc_transform_points4_perspective
1355_mesa_sparc_transform_points4_perspective:
1356	ld	[%o2 + V4F_STRIDE], %o5
1357	LDPTR	[%o2 + V4F_START], %g1
1358	LDPTR	[%o0 + V4F_START], %g2
1359	ld	[%o2 + V4F_COUNT], %g3
1360
1361	LDMATRIX_0_5_8_9_10_14(%o1)
1362
1363	cmp	%g3, 0
1364	st	%g3, [%o0 + V4F_COUNT]
1365	be	2f
1366	 clr	%o1
1367
13681:	ld	[%g1 + 0x00], %f0	! LSU	Group
1369	ld	[%g1 + 0x04], %f1	! LSU	Group
1370	ld	[%g1 + 0x08], %f2	! LSU	Group
1371	ld	[%g1 + 0x0c], %f3	! LSU	Group
1372	add	%o1, 1, %o1		! IEU0
1373	add	%g1, %o5, %g1		! IEU1
1374	fmuls	%f0, M0, %f4		! FGM
1375	fmuls	%f2, M8, %f7		! FGM	Group
1376	fmuls	%f1, M5, %f5		! FGM	Group
1377	fmuls	%f2, M9, %f8		! FGM	Group
1378	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
1379	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
1380	fadds	%f4, %f7, %f4		! FGA
1381	st	%f4, [%g2 + 0x00]	! LSU
1382	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
1383	st	%f5, [%g2 + 0x04]	! LSU
1384	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
1385	st	%f6, [%g2 + 0x08]	! LSU
1386	fnegs	%f2, %f7		! FGA	Group
1387	st	%f7, [%g2 + 0x0c]	! LSU
1388	cmp	%o1, %g3		! IEU1
1389	bne	1b			! CTI
1390	 add	%g2, 0x10, %g2		! IEU0	Group
13912:
1392	ba	__set_v4f_4
1393	 nop
1394