1848b8605Smrg
2848b8605Smrg#include "sparc_matrix.h"
3848b8605Smrg
4848b8605Smrg	.register %g2, #scratch
5848b8605Smrg	.register %g3, #scratch
6848b8605Smrg
7848b8605Smrg	.text
8848b8605Smrg
9848b8605Smrg#ifdef __arch64__
10848b8605Smrg#define STACK_VAR_OFF	(2047 + (8 * 16))
11848b8605Smrg#else
12848b8605Smrg#define STACK_VAR_OFF	(4 * 16)
13848b8605Smrg#endif
14848b8605Smrg
15848b8605Smrg	/* Newton-Raphson approximation turns out to be slower
16848b8605Smrg	 * (and less accurate) than direct fsqrts/fdivs.
17848b8605Smrg	 */
18848b8605Smrg#define ONE_DOT_ZERO	0x3f800000
19848b8605Smrg
20848b8605Smrg	.globl	_mesa_sparc_transform_normalize_normals
21848b8605Smrg_mesa_sparc_transform_normalize_normals:
22848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
23848b8605Smrg
24848b8605Smrg	sethi	%hi(ONE_DOT_ZERO), %g2
25848b8605Smrg	sub	%sp, 16, %sp
26848b8605Smrg	st	%g2, [%sp + STACK_VAR_OFF+0x0]
27848b8605Smrg	st	%o1, [%sp + STACK_VAR_OFF+0x4]
28848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
29848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
30848b8605Smrg	add	%sp, 16, %sp
31848b8605Smrg
32848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
33848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
34848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
35848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
36848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
37848b8605Smrg
38848b8605Smrg	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
39848b8605Smrg
40848b8605Smrg	/* dest->count = in->count */
41848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
42848b8605Smrg
43848b8605Smrg	cmp	%g1, 1
44848b8605Smrg	bl	7f
45848b8605Smrg	 cmp	%o3, 0
46848b8605Smrg	bne	4f
47848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
48848b8605Smrg
49848b8605Smrg1:	/* LENGTHS == NULL */
50848b8605Smrg	ld	[%o5 + 0x00], %f0		! ux = from[0]
51848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
52848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
53848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
54848b8605Smrg	add	%o4, 1, %o4			! i++
55848b8605Smrg
56848b8605Smrg	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
57848b8605Smrg	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
58848b8605Smrg	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
59848b8605Smrg	 */
60848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
61848b8605Smrg	fmuls	%f1, M1, %f4			! FGM	Group
62848b8605Smrg	fmuls	%f0, M4, %f5			! FGM	Group
63848b8605Smrg	fmuls	%f1, M5, %f6			! FGM	Group
64848b8605Smrg	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
65848b8605Smrg	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
66848b8605Smrg	fadds	%f3, %f4, %f3			! FGA
67848b8605Smrg	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
68848b8605Smrg	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
69848b8605Smrg	fadds	%f5, %f6, %f5			! FGA
70848b8605Smrg	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
71848b8605Smrg	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
72848b8605Smrg	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
73848b8605Smrg	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
74848b8605Smrg	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
75848b8605Smrg
76848b8605Smrg	/* f3=tx, f5=ty, f7=tz */
77848b8605Smrg
78848b8605Smrg	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
79848b8605Smrg	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
80848b8605Smrg	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
81848b8605Smrg	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
82848b8605Smrg	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
83848b8605Smrg	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
84848b8605Smrg
85848b8605Smrg	/* scale (f6) = 1.0 / sqrt(len) */
86848b8605Smrg	fsqrts	%f6, %f6			! FDIV  20 cycles
87848b8605Smrg	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
88848b8605Smrg
89848b8605Smrg	fmuls	%f3, %f6, %f3
90848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
91848b8605Smrg	fmuls	%f5, %f6, %f5
92848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
93848b8605Smrg	fmuls	%f7, %f6, %f7
94848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
95848b8605Smrg
96848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
97848b8605Smrg	bl	1b
98848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
99848b8605Smrg
100848b8605Smrg	ba	7f
101848b8605Smrg	 nop
102848b8605Smrg
103848b8605Smrg4:	/* LENGTHS != NULL */
104848b8605Smrg	fmuls	M0, %f15, M0
105848b8605Smrg	fmuls	M1, %f15, M1
106848b8605Smrg	fmuls	M2, %f15, M2
107848b8605Smrg	fmuls	M4, %f15, M4
108848b8605Smrg	fmuls	M5, %f15, M5
109848b8605Smrg	fmuls	M6, %f15, M6
110848b8605Smrg	fmuls	M8, %f15, M8
111848b8605Smrg	fmuls	M9, %f15, M9
112848b8605Smrg	fmuls	M10, %f15, M10
113848b8605Smrg
114848b8605Smrg5:
115848b8605Smrg	ld	[%o5 + 0x00], %f0		! ux = from[0]
116848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
117848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
118848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
119848b8605Smrg	add	%o4, 1, %o4			! i++
120848b8605Smrg
121848b8605Smrg	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
122848b8605Smrg	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
123848b8605Smrg	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
124848b8605Smrg	 */
125848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
126848b8605Smrg	fmuls	%f1, M1, %f4			! FGM	Group
127848b8605Smrg	fmuls	%f0, M4, %f5			! FGM	Group
128848b8605Smrg	fmuls	%f1, M5, %f6			! FGM	Group
129848b8605Smrg	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
130848b8605Smrg	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
131848b8605Smrg	fadds	%f3, %f4, %f3			! FGA
132848b8605Smrg	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
133848b8605Smrg	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
134848b8605Smrg	fadds	%f5, %f6, %f5			! FGA
135848b8605Smrg	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
136848b8605Smrg	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
137848b8605Smrg	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
138848b8605Smrg	ld	[%o3], %f13			! LSU
139848b8605Smrg	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
140848b8605Smrg	add	%o3, 4, %o3			! IEU0
141848b8605Smrg	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
142848b8605Smrg
143848b8605Smrg	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
144848b8605Smrg
145848b8605Smrg	fmuls	%f3, %f13, %f3
146848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
147848b8605Smrg	fmuls	%f5, %f13, %f5
148848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
149848b8605Smrg	fmuls	%f7, %f13, %f7
150848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
151848b8605Smrg
152848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
153848b8605Smrg	bl	5b
154848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
155848b8605Smrg
156848b8605Smrg7:	retl
157848b8605Smrg	 nop
158848b8605Smrg
159848b8605Smrg	.globl	_mesa_sparc_transform_normalize_normals_no_rot
160848b8605Smrg_mesa_sparc_transform_normalize_normals_no_rot:
161848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
162848b8605Smrg
163848b8605Smrg	sethi	%hi(ONE_DOT_ZERO), %g2
164848b8605Smrg	sub	%sp, 16, %sp
165848b8605Smrg	st	%g2, [%sp + STACK_VAR_OFF+0x0]
166848b8605Smrg	st	%o1, [%sp + STACK_VAR_OFF+0x4]
167848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
168848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
169848b8605Smrg	add	%sp, 16, %sp
170848b8605Smrg
171848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
172848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
173848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
174848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
175848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
176848b8605Smrg
177848b8605Smrg	LDMATRIX_0_5_10(%o0)
178848b8605Smrg
179848b8605Smrg	/* dest->count = in->count */
180848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
181848b8605Smrg
182848b8605Smrg	cmp	%g1, 1
183848b8605Smrg	bl	7f
184848b8605Smrg	 cmp	%o3, 0
185848b8605Smrg	bne	4f
186848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
187848b8605Smrg
188848b8605Smrg1:	/* LENGTHS == NULL */
189848b8605Smrg	ld	[%o5 + 0x00], %f0		! ux = from[0]
190848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
191848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
192848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
193848b8605Smrg	add	%o4, 1, %o4			! i++
194848b8605Smrg
195848b8605Smrg	/* tx (f3) = (ux * m0)
196848b8605Smrg	 * ty (f5) = (uy * m5)
197848b8605Smrg	 * tz (f7) = (uz * m10)
198848b8605Smrg	 */
199848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
200848b8605Smrg	fmuls	%f1, M5, %f5			! FGM	Group
201848b8605Smrg	fmuls	%f2, M10, %f7			! FGM	Group
202848b8605Smrg
203848b8605Smrg	/* f3=tx, f5=ty, f7=tz */
204848b8605Smrg
205848b8605Smrg	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
206848b8605Smrg	fmuls	%f3, %f3, %f6			! FGM	Group	stall, f3 available
207848b8605Smrg	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
208848b8605Smrg	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
209848b8605Smrg	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
210848b8605Smrg	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
211848b8605Smrg
212848b8605Smrg	/* scale (f6) = 1.0 / sqrt(len) */
213848b8605Smrg	fsqrts	%f6, %f6			! FDIV  20 cycles
214848b8605Smrg	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
215848b8605Smrg
216848b8605Smrg	fmuls	%f3, %f6, %f3
217848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
218848b8605Smrg	fmuls	%f5, %f6, %f5
219848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
220848b8605Smrg	fmuls	%f7, %f6, %f7
221848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
222848b8605Smrg
223848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
224848b8605Smrg	bl	1b
225848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
226848b8605Smrg
227848b8605Smrg	ba	7f
228848b8605Smrg	 nop
229848b8605Smrg
230848b8605Smrg4:	/* LENGTHS != NULL */
231848b8605Smrg	fmuls	M0, %f15, M0
232848b8605Smrg	fmuls	M5, %f15, M5
233848b8605Smrg	fmuls	M10, %f15, M10
234848b8605Smrg
235848b8605Smrg5:
236848b8605Smrg	ld	[%o5 + 0x00], %f0		! ux = from[0]
237848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
238848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
239848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
240848b8605Smrg	add	%o4, 1, %o4			! i++
241848b8605Smrg
242848b8605Smrg	/* tx (f3) = (ux * m0)
243848b8605Smrg	 * ty (f5) = (uy * m5)
244848b8605Smrg	 * tz (f7) = (uz * m10)
245848b8605Smrg	 */
246848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
247848b8605Smrg	ld	[%o3], %f13			! LSU
248848b8605Smrg	fmuls	%f1, M5, %f5			! FGM	Group
249848b8605Smrg	add	%o3, 4, %o3			! IEU0
250848b8605Smrg	fmuls	%f2, M10, %f7			! FGM	Group
251848b8605Smrg
252848b8605Smrg	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
253848b8605Smrg
254848b8605Smrg	fmuls	%f3, %f13, %f3
255848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
256848b8605Smrg	fmuls	%f5, %f13, %f5
257848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
258848b8605Smrg	fmuls	%f7, %f13, %f7
259848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
260848b8605Smrg
261848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
262848b8605Smrg	bl	5b
263848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
264848b8605Smrg
265848b8605Smrg7:	retl
266848b8605Smrg	 nop
267848b8605Smrg
268848b8605Smrg	.globl	_mesa_sparc_transform_rescale_normals_no_rot
269848b8605Smrg_mesa_sparc_transform_rescale_normals_no_rot:
270848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
271848b8605Smrg	sub	%sp, 16, %sp
272848b8605Smrg	st	%o1, [%sp + STACK_VAR_OFF+0x0]
273848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
274848b8605Smrg	add	%sp, 16, %sp
275848b8605Smrg
276848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
277848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
278848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
279848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
280848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
281848b8605Smrg
282848b8605Smrg	LDMATRIX_0_5_10(%o0)
283848b8605Smrg
284848b8605Smrg	/* dest->count = in->count */
285848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
286848b8605Smrg
287848b8605Smrg	cmp	%g1, 1
288848b8605Smrg	bl	7f
289848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
290848b8605Smrg
291848b8605Smrg	fmuls	M0, %f15, M0
292848b8605Smrg	fmuls	M5, %f15, M5
293848b8605Smrg	fmuls	M10, %f15, M10
294848b8605Smrg
295848b8605Smrg1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
296848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
297848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
298848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
299848b8605Smrg	add	%o4, 1, %o4			! i++
300848b8605Smrg
301848b8605Smrg	/* tx (f3) = (ux * m0)
302848b8605Smrg	 * ty (f5) = (uy * m5)
303848b8605Smrg	 * tz (f7) = (uz * m10)
304848b8605Smrg	 */
305848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
306848b8605Smrg	st	%f3, [%g3 + 0x00]		! LSU
307848b8605Smrg	fmuls	%f1, M5, %f5			! FGM	Group
308848b8605Smrg	st	%f5, [%g3 + 0x04]		! LSU
309848b8605Smrg	fmuls	%f2, M10, %f7			! FGM	Group
310848b8605Smrg	st	%f7, [%g3 + 0x08]		! LSU
311848b8605Smrg
312848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
313848b8605Smrg	bl	1b
314848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
315848b8605Smrg
316848b8605Smrg7:	retl
317848b8605Smrg	 nop
318848b8605Smrg
319848b8605Smrg	.globl	_mesa_sparc_transform_rescale_normals
320848b8605Smrg_mesa_sparc_transform_rescale_normals:
321848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
322848b8605Smrg	sub	%sp, 16, %sp
323848b8605Smrg	st	%o1, [%sp + STACK_VAR_OFF+0x0]
324848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
325848b8605Smrg	add	%sp, 16, %sp
326848b8605Smrg
327848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
328848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
329848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
330848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
331848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
332848b8605Smrg
333848b8605Smrg	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
334848b8605Smrg
335848b8605Smrg	/* dest->count = in->count */
336848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
337848b8605Smrg
338848b8605Smrg	cmp	%g1, 1
339848b8605Smrg	bl	7f
340848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
341848b8605Smrg
342848b8605Smrg	fmuls	M0, %f15, M0
343848b8605Smrg	fmuls	M1, %f15, M1
344848b8605Smrg	fmuls	M2, %f15, M2
345848b8605Smrg	fmuls	M4, %f15, M4
346848b8605Smrg	fmuls	M5, %f15, M5
347848b8605Smrg	fmuls	M6, %f15, M6
348848b8605Smrg	fmuls	M8, %f15, M8
349848b8605Smrg	fmuls	M9, %f15, M9
350848b8605Smrg	fmuls	M10, %f15, M10
351848b8605Smrg
352848b8605Smrg1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
353848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
354848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
355848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
356848b8605Smrg	add	%o4, 1, %o4			! i++
357848b8605Smrg
358848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
359848b8605Smrg	fmuls	%f1, M1, %f4			! FGM	Group
360848b8605Smrg	fmuls	%f0, M4, %f5			! FGM	Group
361848b8605Smrg	fmuls	%f1, M5, %f6			! FGM	Group
362848b8605Smrg	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
363848b8605Smrg	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
364848b8605Smrg	fadds	%f3, %f4, %f3			! FGA
365848b8605Smrg	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
366848b8605Smrg	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
367848b8605Smrg	fadds	%f5, %f6, %f5			! FGA
368848b8605Smrg	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
369848b8605Smrg	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
370848b8605Smrg	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
371848b8605Smrg	st	%f3, [%g3 + 0x00]		! LSU
372848b8605Smrg	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
373848b8605Smrg	st	%f5, [%g3 + 0x04]		! LSU
374848b8605Smrg	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
375848b8605Smrg	st	%f7, [%g3 + 0x08]		! LSU
376848b8605Smrg
377848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
378848b8605Smrg	bl	1b
379848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
380848b8605Smrg
381848b8605Smrg7:	retl
382848b8605Smrg	 nop
383848b8605Smrg
384848b8605Smrg	.globl	_mesa_sparc_transform_normals_no_rot
385848b8605Smrg_mesa_sparc_transform_normals_no_rot:
386848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
387848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
388848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
389848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
390848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
391848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
392848b8605Smrg
393848b8605Smrg	LDMATRIX_0_5_10(%o0)
394848b8605Smrg
395848b8605Smrg	/* dest->count = in->count */
396848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
397848b8605Smrg
398848b8605Smrg	cmp	%g1, 1
399848b8605Smrg	bl	7f
400848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
401848b8605Smrg
402848b8605Smrg1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
403848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
404848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
405848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
406848b8605Smrg	add	%o4, 1, %o4			! i++
407848b8605Smrg
408848b8605Smrg	/* tx (f3) = (ux * m0)
409848b8605Smrg	 * ty (f5) = (uy * m5)
410848b8605Smrg	 * tz (f7) = (uz * m10)
411848b8605Smrg	 */
412848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
413848b8605Smrg	st	%f3, [%g3 + 0x00]		! LSU
414848b8605Smrg	fmuls	%f1, M5, %f5			! FGM	Group
415848b8605Smrg	st	%f5, [%g3 + 0x04]		! LSU
416848b8605Smrg	fmuls	%f2, M10, %f7			! FGM	Group
417848b8605Smrg	st	%f7, [%g3 + 0x08]		! LSU
418848b8605Smrg
419848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
420848b8605Smrg	bl	1b
421848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
422848b8605Smrg
423848b8605Smrg7:	retl
424848b8605Smrg	 nop
425848b8605Smrg
426848b8605Smrg	.globl	_mesa_sparc_transform_normals
427848b8605Smrg_mesa_sparc_transform_normals:
428848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
429848b8605Smrg	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
430848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
431848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
432848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
433848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
434848b8605Smrg
435848b8605Smrg	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
436848b8605Smrg
437848b8605Smrg	/* dest->count = in->count */
438848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
439848b8605Smrg
440848b8605Smrg	cmp	%g1, 1
441848b8605Smrg	bl	7f
442848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
443848b8605Smrg
444848b8605Smrg1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
445848b8605Smrg	ld	[%o5 + 0x04], %f1		! uy = from[1]
446848b8605Smrg	ld	[%o5 + 0x08], %f2		! uz = from[2]
447848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
448848b8605Smrg	add	%o4, 1, %o4			! i++
449848b8605Smrg
450848b8605Smrg	fmuls	%f0, M0, %f3			! FGM	Group
451848b8605Smrg	fmuls	%f1, M1, %f4			! FGM	Group
452848b8605Smrg	fmuls	%f0, M4, %f5			! FGM	Group
453848b8605Smrg	fmuls	%f1, M5, %f6			! FGM	Group
454848b8605Smrg	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
455848b8605Smrg	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
456848b8605Smrg	fadds	%f3, %f4, %f3			! FGA
457848b8605Smrg	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
458848b8605Smrg	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
459848b8605Smrg	fadds	%f5, %f6, %f5			! FGA
460848b8605Smrg	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
461848b8605Smrg	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
462848b8605Smrg	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
463848b8605Smrg	st	%f3, [%g3 + 0x00]		! LSU
464848b8605Smrg	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
465848b8605Smrg	st	%f5, [%g3 + 0x04]		! LSU
466848b8605Smrg	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
467848b8605Smrg	st	%f7, [%g3 + 0x08]		! LSU
468848b8605Smrg
469848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
470848b8605Smrg	bl	1b
471848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
472848b8605Smrg
473848b8605Smrg7:	retl
474848b8605Smrg	 nop
475848b8605Smrg
476848b8605Smrg	.globl	_mesa_sparc_normalize_normals
477848b8605Smrg_mesa_sparc_normalize_normals:
478848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
479848b8605Smrg
480848b8605Smrg	sethi	%hi(ONE_DOT_ZERO), %g2
481848b8605Smrg	sub	%sp, 16, %sp
482848b8605Smrg	st	%g2, [%sp + STACK_VAR_OFF+0x0]
483848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
484848b8605Smrg	add	%sp, 16, %sp
485848b8605Smrg
486848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
487848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
488848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
489848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
490848b8605Smrg
491848b8605Smrg	/* dest->count = in->count */
492848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
493848b8605Smrg
494848b8605Smrg	cmp	%g1, 1
495848b8605Smrg	bl	7f
496848b8605Smrg	 cmp	%o3, 0
497848b8605Smrg	bne	4f
498848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
499848b8605Smrg
500848b8605Smrg1:	/* LENGTHS == NULL */
501848b8605Smrg	ld	[%o5 + 0x00], %f3		! ux = from[0]
502848b8605Smrg	ld	[%o5 + 0x04], %f5		! uy = from[1]
503848b8605Smrg	ld	[%o5 + 0x08], %f7		! uz = from[2]
504848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
505848b8605Smrg	add	%o4, 1, %o4			! i++
506848b8605Smrg
507848b8605Smrg	/* f3=tx, f5=ty, f7=tz */
508848b8605Smrg
509848b8605Smrg	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
510848b8605Smrg	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
511848b8605Smrg	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
512848b8605Smrg	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
513848b8605Smrg	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
514848b8605Smrg	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
515848b8605Smrg
516848b8605Smrg	/* scale (f6) = 1.0 / sqrt(len) */
517848b8605Smrg	fsqrts	%f6, %f6			! FDIV  20 cycles
518848b8605Smrg	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
519848b8605Smrg
520848b8605Smrg	fmuls	%f3, %f6, %f3
521848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
522848b8605Smrg	fmuls	%f5, %f6, %f5
523848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
524848b8605Smrg	fmuls	%f7, %f6, %f7
525848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
526848b8605Smrg
527848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
528848b8605Smrg	bl	1b
529848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
530848b8605Smrg
531848b8605Smrg	ba	7f
532848b8605Smrg	 nop
533848b8605Smrg
534848b8605Smrg4:	/* LENGTHS != NULL */
535848b8605Smrg
536848b8605Smrg5:
537848b8605Smrg	ld	[%o5 + 0x00], %f3		! ux = from[0]
538848b8605Smrg	ld	[%o5 + 0x04], %f5		! uy = from[1]
539848b8605Smrg	ld	[%o5 + 0x08], %f7		! uz = from[2]
540848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
541848b8605Smrg	add	%o4, 1, %o4			! i++
542848b8605Smrg
543848b8605Smrg	ld	[%o3], %f13			! LSU
544848b8605Smrg	add	%o3, 4, %o3			! IEU0
545848b8605Smrg
546848b8605Smrg	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
547848b8605Smrg
548848b8605Smrg	fmuls	%f3, %f13, %f3
549848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
550848b8605Smrg	fmuls	%f5, %f13, %f5
551848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
552848b8605Smrg	fmuls	%f7, %f13, %f7
553848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
554848b8605Smrg
555848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
556848b8605Smrg	bl	5b
557848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
558848b8605Smrg
559848b8605Smrg7:	retl
560848b8605Smrg	 nop
561848b8605Smrg
562848b8605Smrg	.globl	_mesa_sparc_rescale_normals
563848b8605Smrg_mesa_sparc_rescale_normals:
564848b8605Smrg	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
565848b8605Smrg
566848b8605Smrg	sethi	%hi(ONE_DOT_ZERO), %g2
567848b8605Smrg	sub	%sp, 16, %sp
568848b8605Smrg	st	%o1, [%sp + STACK_VAR_OFF+0x0]
569848b8605Smrg	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
570848b8605Smrg	add	%sp, 16, %sp
571848b8605Smrg
572848b8605Smrg	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
573848b8605Smrg	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
574848b8605Smrg	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
575848b8605Smrg	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
576848b8605Smrg
577848b8605Smrg	/* dest->count = in->count */
578848b8605Smrg	st	%g1, [%o4 + V4F_COUNT]
579848b8605Smrg
580848b8605Smrg	cmp	%g1, 1
581848b8605Smrg	bl	7f
582848b8605Smrg	 clr	%o4				! 'i' for STRIDE_LOOP
583848b8605Smrg
584848b8605Smrg1:
585848b8605Smrg	ld	[%o5 + 0x00], %f3		! ux = from[0]
586848b8605Smrg	ld	[%o5 + 0x04], %f5		! uy = from[1]
587848b8605Smrg	ld	[%o5 + 0x08], %f7		! uz = from[2]
588848b8605Smrg	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
589848b8605Smrg	add	%o4, 1, %o4			! i++
590848b8605Smrg
591848b8605Smrg	/* f3=tx, f5=ty, f7=tz */
592848b8605Smrg
593848b8605Smrg	fmuls	%f3, %f15, %f3
594848b8605Smrg	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
595848b8605Smrg	fmuls	%f5, %f15, %f5
596848b8605Smrg	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
597848b8605Smrg	fmuls	%f7, %f15, %f7
598848b8605Smrg	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
599848b8605Smrg
600848b8605Smrg	cmp	%o4, %g1			! continue if (i < count)
601848b8605Smrg	bl	1b
602848b8605Smrg	 add	%g3, 0x10, %g3			! advance out vector pointer
603848b8605Smrg
604848b8605Smrg7:	retl
605848b8605Smrg	 nop
606