norm.S revision 7117f1b4
1/* $Id: norm.S,v 1.1.1.1 2008/07/29 05:10:21 mrg Exp $ */
2
3#include "sparc_matrix.h"
4
5#if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
6	/* Solaris requires this for 64-bit. */
7	.register %g2, #scratch
8	.register %g3, #scratch
9#endif
10
11	.text
12
13#ifdef __arch64__
14#define STACK_VAR_OFF	(2047 + (8 * 16))
15#else
16#define STACK_VAR_OFF	(4 * 16)
17#endif
18
19	/* Newton-Raphson approximation turns out to be slower
20	 * (and less accurate) than direct fsqrts/fdivs.
21	 */
22#define ONE_DOT_ZERO	0x3f800000
23
24	.globl	_mesa_sparc_transform_normalize_normals
25_mesa_sparc_transform_normalize_normals:
26	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
27
28	sethi	%hi(ONE_DOT_ZERO), %g2
29	sub	%sp, 16, %sp
30	st	%g2, [%sp + STACK_VAR_OFF+0x0]
31	st	%o1, [%sp + STACK_VAR_OFF+0x4]
32	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
33	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
34	add	%sp, 16, %sp
35
36	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
37	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
38	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
39	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
40	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
41
42	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
43
44	/* dest->count = in->count */
45	st	%g1, [%o4 + V4F_COUNT]
46
47	cmp	%g1, 1
48	bl	7f
49	 cmp	%o3, 0
50	bne	4f
51	 clr	%o4				! 'i' for STRIDE_LOOP
52
531:	/* LENGTHS == NULL */
54	ld	[%o5 + 0x00], %f0		! ux = from[0]
55	ld	[%o5 + 0x04], %f1		! uy = from[1]
56	ld	[%o5 + 0x08], %f2		! uz = from[2]
57	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
58	add	%o4, 1, %o4			! i++
59
60	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
61	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
62	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
63	 */
64	fmuls	%f0, M0, %f3			! FGM	Group
65	fmuls	%f1, M1, %f4			! FGM	Group
66	fmuls	%f0, M4, %f5			! FGM	Group
67	fmuls	%f1, M5, %f6			! FGM	Group
68	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
69	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
70	fadds	%f3, %f4, %f3			! FGA
71	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
72	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
73	fadds	%f5, %f6, %f5			! FGA
74	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
75	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
76	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
77	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
78	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
79
80	/* f3=tx, f5=ty, f7=tz */
81
82	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
83	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
84	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
85	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
86	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
87	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
88
89	/* scale (f6) = 1.0 / sqrt(len) */
90	fsqrts	%f6, %f6			! FDIV  20 cycles
91	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
92
93	fmuls	%f3, %f6, %f3
94	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
95	fmuls	%f5, %f6, %f5
96	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
97	fmuls	%f7, %f6, %f7
98	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
99
100	cmp	%o4, %g1			! continue if (i < count)
101	bl	1b
102	 add	%g3, 0x0c, %g3			! advance out vector pointer
103
104	ba	7f
105	 nop
106
1074:	/* LENGTHS != NULL */
108	fmuls	M0, %f15, M0
109	fmuls	M1, %f15, M1
110	fmuls	M2, %f15, M2
111	fmuls	M4, %f15, M4
112	fmuls	M5, %f15, M5
113	fmuls	M6, %f15, M6
114	fmuls	M8, %f15, M8
115	fmuls	M9, %f15, M9
116	fmuls	M10, %f15, M10
117
1185:
119	ld	[%o5 + 0x00], %f0		! ux = from[0]
120	ld	[%o5 + 0x04], %f1		! uy = from[1]
121	ld	[%o5 + 0x08], %f2		! uz = from[2]
122	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
123	add	%o4, 1, %o4			! i++
124
125	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
126	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
127	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
128	 */
129	fmuls	%f0, M0, %f3			! FGM	Group
130	fmuls	%f1, M1, %f4			! FGM	Group
131	fmuls	%f0, M4, %f5			! FGM	Group
132	fmuls	%f1, M5, %f6			! FGM	Group
133	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
134	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
135	fadds	%f3, %f4, %f3			! FGA
136	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
137	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
138	fadds	%f5, %f6, %f5			! FGA
139	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
140	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
141	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
142	ld	[%o3], %f13			! LSU
143	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
144	add	%o3, 4, %o3			! IEU0
145	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
146
147	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
148
149	fmuls	%f3, %f13, %f3
150	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
151	fmuls	%f5, %f13, %f5
152	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
153	fmuls	%f7, %f13, %f7
154	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
155
156	cmp	%o4, %g1			! continue if (i < count)
157	bl	5b
158	 add	%g3, 0x0c, %g3			! advance out vector pointer
159
1607:	retl
161	 nop
162
163	.globl	_mesa_sparc_transform_normalize_normals_no_rot
164_mesa_sparc_transform_normalize_normals_no_rot:
165	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
166
167	sethi	%hi(ONE_DOT_ZERO), %g2
168	sub	%sp, 16, %sp
169	st	%g2, [%sp + STACK_VAR_OFF+0x0]
170	st	%o1, [%sp + STACK_VAR_OFF+0x4]
171	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
172	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
173	add	%sp, 16, %sp
174
175	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
176	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
177	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
178	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
179	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
180
181	LDMATRIX_0_5_10(%o0)
182
183	/* dest->count = in->count */
184	st	%g1, [%o4 + V4F_COUNT]
185
186	cmp	%g1, 1
187	bl	7f
188	 cmp	%o3, 0
189	bne	4f
190	 clr	%o4				! 'i' for STRIDE_LOOP
191
1921:	/* LENGTHS == NULL */
193	ld	[%o5 + 0x00], %f0		! ux = from[0]
194	ld	[%o5 + 0x04], %f1		! uy = from[1]
195	ld	[%o5 + 0x08], %f2		! uz = from[2]
196	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
197	add	%o4, 1, %o4			! i++
198
199	/* tx (f3) = (ux * m0)
200	 * ty (f5) = (uy * m5)
201	 * tz (f7) = (uz * m10)
202	 */
203	fmuls	%f0, M0, %f3			! FGM	Group
204	fmuls	%f1, M5, %f5			! FGM	Group
205	fmuls	%f2, M10, %f7			! FGM	Group
206
207	/* f3=tx, f5=ty, f7=tz */
208
209	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
210	fmuls	%f3, %f3, %f6			! FGM	Group	stall, f3 available
211	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
212	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
213	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
214	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
215
216	/* scale (f6) = 1.0 / sqrt(len) */
217	fsqrts	%f6, %f6			! FDIV  20 cycles
218	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
219
220	fmuls	%f3, %f6, %f3
221	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
222	fmuls	%f5, %f6, %f5
223	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
224	fmuls	%f7, %f6, %f7
225	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
226
227	cmp	%o4, %g1			! continue if (i < count)
228	bl	1b
229	 add	%g3, 0x0c, %g3			! advance out vector pointer
230
231	ba	7f
232	 nop
233
2344:	/* LENGTHS != NULL */
235	fmuls	M0, %f15, M0
236	fmuls	M5, %f15, M5
237	fmuls	M10, %f15, M10
238
2395:
240	ld	[%o5 + 0x00], %f0		! ux = from[0]
241	ld	[%o5 + 0x04], %f1		! uy = from[1]
242	ld	[%o5 + 0x08], %f2		! uz = from[2]
243	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
244	add	%o4, 1, %o4			! i++
245
246	/* tx (f3) = (ux * m0)
247	 * ty (f5) = (uy * m5)
248	 * tz (f7) = (uz * m10)
249	 */
250	fmuls	%f0, M0, %f3			! FGM	Group
251	ld	[%o3], %f13			! LSU
252	fmuls	%f1, M5, %f5			! FGM	Group
253	add	%o3, 4, %o3			! IEU0
254	fmuls	%f2, M10, %f7			! FGM	Group
255
256	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
257
258	fmuls	%f3, %f13, %f3
259	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
260	fmuls	%f5, %f13, %f5
261	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
262	fmuls	%f7, %f13, %f7
263	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
264
265	cmp	%o4, %g1			! continue if (i < count)
266	bl	5b
267	 add	%g3, 0x0c, %g3			! advance out vector pointer
268
2697:	retl
270	 nop
271
272	.globl	_mesa_sparc_transform_rescale_normals_no_rot
273_mesa_sparc_transform_rescale_normals_no_rot:
274	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
275	sub	%sp, 16, %sp
276	st	%o1, [%sp + STACK_VAR_OFF+0x0]
277	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
278	add	%sp, 16, %sp
279
280	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
281	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
282	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
283	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
284	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
285
286	LDMATRIX_0_5_10(%o0)
287
288	/* dest->count = in->count */
289	st	%g1, [%o4 + V4F_COUNT]
290
291	cmp	%g1, 1
292	bl	7f
293	 clr	%o4				! 'i' for STRIDE_LOOP
294
295	fmuls	M0, %f15, M0
296	fmuls	M5, %f15, M5
297	fmuls	M10, %f15, M10
298
2991:	ld	[%o5 + 0x00], %f0		! ux = from[0]
300	ld	[%o5 + 0x04], %f1		! uy = from[1]
301	ld	[%o5 + 0x08], %f2		! uz = from[2]
302	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
303	add	%o4, 1, %o4			! i++
304
305	/* tx (f3) = (ux * m0)
306	 * ty (f5) = (uy * m5)
307	 * tz (f7) = (uz * m10)
308	 */
309	fmuls	%f0, M0, %f3			! FGM	Group
310	st	%f3, [%g3 + 0x00]		! LSU
311	fmuls	%f1, M5, %f5			! FGM	Group
312	st	%f5, [%g3 + 0x04]		! LSU
313	fmuls	%f2, M10, %f7			! FGM	Group
314	st	%f7, [%g3 + 0x08]		! LSU
315
316	cmp	%o4, %g1			! continue if (i < count)
317	bl	1b
318	 add	%g3, 0x0c, %g3			! advance out vector pointer
319
3207:	retl
321	 nop
322
323	.globl	_mesa_sparc_transform_rescale_normals
324_mesa_sparc_transform_rescale_normals:
325	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
326	sub	%sp, 16, %sp
327	st	%o1, [%sp + STACK_VAR_OFF+0x0]
328	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
329	add	%sp, 16, %sp
330
331	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
332	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
333	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
334	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
335	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
336
337	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
338
339	/* dest->count = in->count */
340	st	%g1, [%o4 + V4F_COUNT]
341
342	cmp	%g1, 1
343	bl	7f
344	 clr	%o4				! 'i' for STRIDE_LOOP
345
346	fmuls	M0, %f15, M0
347	fmuls	M1, %f15, M1
348	fmuls	M2, %f15, M2
349	fmuls	M4, %f15, M4
350	fmuls	M5, %f15, M5
351	fmuls	M6, %f15, M6
352	fmuls	M8, %f15, M8
353	fmuls	M9, %f15, M9
354	fmuls	M10, %f15, M10
355
3561:	ld	[%o5 + 0x00], %f0		! ux = from[0]
357	ld	[%o5 + 0x04], %f1		! uy = from[1]
358	ld	[%o5 + 0x08], %f2		! uz = from[2]
359	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
360	add	%o4, 1, %o4			! i++
361
362	fmuls	%f0, M0, %f3			! FGM	Group
363	fmuls	%f1, M1, %f4			! FGM	Group
364	fmuls	%f0, M4, %f5			! FGM	Group
365	fmuls	%f1, M5, %f6			! FGM	Group
366	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
367	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
368	fadds	%f3, %f4, %f3			! FGA
369	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
370	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
371	fadds	%f5, %f6, %f5			! FGA
372	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
373	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
374	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
375	st	%f3, [%g3 + 0x00]		! LSU
376	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
377	st	%f5, [%g3 + 0x04]		! LSU
378	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
379	st	%f7, [%g3 + 0x08]		! LSU
380
381	cmp	%o4, %g1			! continue if (i < count)
382	bl	1b
383	 add	%g3, 0x0c, %g3			! advance out vector pointer
384
3857:	retl
386	 nop
387
388	.globl	_mesa_sparc_transform_normals_no_rot
389_mesa_sparc_transform_normals_no_rot:
390	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
391	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
392	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
393	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
394	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
395	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
396
397	LDMATRIX_0_5_10(%o0)
398
399	/* dest->count = in->count */
400	st	%g1, [%o4 + V4F_COUNT]
401
402	cmp	%g1, 1
403	bl	7f
404	 clr	%o4				! 'i' for STRIDE_LOOP
405
4061:	ld	[%o5 + 0x00], %f0		! ux = from[0]
407	ld	[%o5 + 0x04], %f1		! uy = from[1]
408	ld	[%o5 + 0x08], %f2		! uz = from[2]
409	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
410	add	%o4, 1, %o4			! i++
411
412	/* tx (f3) = (ux * m0)
413	 * ty (f5) = (uy * m5)
414	 * tz (f7) = (uz * m10)
415	 */
416	fmuls	%f0, M0, %f3			! FGM	Group
417	st	%f3, [%g3 + 0x00]		! LSU
418	fmuls	%f1, M5, %f5			! FGM	Group
419	st	%f5, [%g3 + 0x04]		! LSU
420	fmuls	%f2, M10, %f7			! FGM	Group
421	st	%f7, [%g3 + 0x08]		! LSU
422
423	cmp	%o4, %g1			! continue if (i < count)
424	bl	1b
425	 add	%g3, 0x0c, %g3			! advance out vector pointer
426
4277:	retl
428	 nop
429
430	.globl	_mesa_sparc_transform_normals
431_mesa_sparc_transform_normals:
432	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
433	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
434	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
435	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
436	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
437	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
438
439	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
440
441	/* dest->count = in->count */
442	st	%g1, [%o4 + V4F_COUNT]
443
444	cmp	%g1, 1
445	bl	7f
446	 clr	%o4				! 'i' for STRIDE_LOOP
447
4481:	ld	[%o5 + 0x00], %f0		! ux = from[0]
449	ld	[%o5 + 0x04], %f1		! uy = from[1]
450	ld	[%o5 + 0x08], %f2		! uz = from[2]
451	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
452	add	%o4, 1, %o4			! i++
453
454	fmuls	%f0, M0, %f3			! FGM	Group
455	fmuls	%f1, M1, %f4			! FGM	Group
456	fmuls	%f0, M4, %f5			! FGM	Group
457	fmuls	%f1, M5, %f6			! FGM	Group
458	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
459	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
460	fadds	%f3, %f4, %f3			! FGA
461	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
462	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
463	fadds	%f5, %f6, %f5			! FGA
464	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
465	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
466	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
467	st	%f3, [%g3 + 0x00]		! LSU
468	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
469	st	%f5, [%g3 + 0x04]		! LSU
470	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
471	st	%f7, [%g3 + 0x08]		! LSU
472
473	cmp	%o4, %g1			! continue if (i < count)
474	bl	1b
475	 add	%g3, 0x0c, %g3			! advance out vector pointer
476
4777:	retl
478	 nop
479
480	.globl	_mesa_sparc_normalize_normals
481_mesa_sparc_normalize_normals:
482	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
483
484	sethi	%hi(ONE_DOT_ZERO), %g2
485	sub	%sp, 16, %sp
486	st	%g2, [%sp + STACK_VAR_OFF+0x0]
487	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
488	add	%sp, 16, %sp
489
490	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
491	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
492	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
493	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
494
495	/* dest->count = in->count */
496	st	%g1, [%o4 + V4F_COUNT]
497
498	cmp	%g1, 1
499	bl	7f
500	 cmp	%o3, 0
501	bne	4f
502	 clr	%o4				! 'i' for STRIDE_LOOP
503
5041:	/* LENGTHS == NULL */
505	ld	[%o5 + 0x00], %f3		! ux = from[0]
506	ld	[%o5 + 0x04], %f5		! uy = from[1]
507	ld	[%o5 + 0x08], %f7		! uz = from[2]
508	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
509	add	%o4, 1, %o4			! i++
510
511	/* f3=tx, f5=ty, f7=tz */
512
513	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
514	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
515	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
516	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
517	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
518	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
519
520	/* scale (f6) = 1.0 / sqrt(len) */
521	fsqrts	%f6, %f6			! FDIV  20 cycles
522	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
523
524	fmuls	%f3, %f6, %f3
525	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
526	fmuls	%f5, %f6, %f5
527	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
528	fmuls	%f7, %f6, %f7
529	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
530
531	cmp	%o4, %g1			! continue if (i < count)
532	bl	1b
533	 add	%g3, 0x0c, %g3			! advance out vector pointer
534
535	ba	7f
536	 nop
537
5384:	/* LENGTHS != NULL */
539
5405:
541	ld	[%o5 + 0x00], %f3		! ux = from[0]
542	ld	[%o5 + 0x04], %f5		! uy = from[1]
543	ld	[%o5 + 0x08], %f7		! uz = from[2]
544	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
545	add	%o4, 1, %o4			! i++
546
547	ld	[%o3], %f13			! LSU
548	add	%o3, 4, %o3			! IEU0
549
550	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
551
552	fmuls	%f3, %f13, %f3
553	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
554	fmuls	%f5, %f13, %f5
555	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
556	fmuls	%f7, %f13, %f7
557	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
558
559	cmp	%o4, %g1			! continue if (i < count)
560	bl	5b
561	 add	%g3, 0x0c, %g3			! advance out vector pointer
562
5637:	retl
564	 nop
565
566	.globl	_mesa_sparc_rescale_normals
567_mesa_sparc_rescale_normals:
568	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
569
570	sethi	%hi(ONE_DOT_ZERO), %g2
571	sub	%sp, 16, %sp
572	st	%o1, [%sp + STACK_VAR_OFF+0x0]
573	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
574	add	%sp, 16, %sp
575
576	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
577	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
578	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
579	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
580
581	/* dest->count = in->count */
582	st	%g1, [%o4 + V4F_COUNT]
583
584	cmp	%g1, 1
585	bl	7f
586	 clr	%o4				! 'i' for STRIDE_LOOP
587
5881:
589	ld	[%o5 + 0x00], %f3		! ux = from[0]
590	ld	[%o5 + 0x04], %f5		! uy = from[1]
591	ld	[%o5 + 0x08], %f7		! uz = from[2]
592	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
593	add	%o4, 1, %o4			! i++
594
595	/* f3=tx, f5=ty, f7=tz */
596
597	fmuls	%f3, %f15, %f3
598	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
599	fmuls	%f5, %f15, %f5
600	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
601	fmuls	%f7, %f15, %f7
602	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
603
604	cmp	%o4, %g1			! continue if (i < count)
605	bl	1b
606	 add	%g3, 0x0c, %g3			! advance out vector pointer
607
6087:	retl
609	 nop
610