1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg/*
27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29848b8605Smrg * in there will break the build on some platforms.
30848b8605Smrg */
31848b8605Smrg
32848b8605Smrg#include "assyntax.h"
33848b8605Smrg#include "matypes.h"
34848b8605Smrg#include "xform_args.h"
35848b8605Smrg
36848b8605Smrg	SEG_TEXT
37848b8605Smrg
38848b8605Smrg#define FP_ONE		1065353216
39848b8605Smrg#define FP_ZERO		0
40848b8605Smrg
41848b8605Smrg#define SRC0		REGOFF(0, ESI)
42848b8605Smrg#define SRC1		REGOFF(4, ESI)
43848b8605Smrg#define SRC2		REGOFF(8, ESI)
44848b8605Smrg#define SRC3		REGOFF(12, ESI)
45848b8605Smrg#define DST0		REGOFF(0, EDI)
46848b8605Smrg#define DST1		REGOFF(4, EDI)
47848b8605Smrg#define DST2		REGOFF(8, EDI)
48848b8605Smrg#define DST3		REGOFF(12, EDI)
49848b8605Smrg#define MAT0		REGOFF(0, EDX)
50848b8605Smrg#define MAT1		REGOFF(4, EDX)
51848b8605Smrg#define MAT2		REGOFF(8, EDX)
52848b8605Smrg#define MAT3		REGOFF(12, EDX)
53848b8605Smrg#define MAT4		REGOFF(16, EDX)
54848b8605Smrg#define MAT5		REGOFF(20, EDX)
55848b8605Smrg#define MAT6		REGOFF(24, EDX)
56848b8605Smrg#define MAT7		REGOFF(28, EDX)
57848b8605Smrg#define MAT8		REGOFF(32, EDX)
58848b8605Smrg#define MAT9		REGOFF(36, EDX)
59848b8605Smrg#define MAT10		REGOFF(40, EDX)
60848b8605Smrg#define MAT11		REGOFF(44, EDX)
61848b8605Smrg#define MAT12		REGOFF(48, EDX)
62848b8605Smrg#define MAT13		REGOFF(52, EDX)
63848b8605Smrg#define MAT14		REGOFF(56, EDX)
64848b8605Smrg#define MAT15		REGOFF(60, EDX)
65848b8605Smrg
66848b8605Smrg
67848b8605SmrgALIGNTEXT16
68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_general )
69848b8605SmrgHIDDEN(_mesa_x86_transform_points3_general)
70848b8605SmrgGLNAME( _mesa_x86_transform_points3_general ):
71848b8605Smrg
72848b8605Smrg#define FRAME_OFFSET 8
73848b8605Smrg	PUSH_L( ESI )
74848b8605Smrg	PUSH_L( EDI )
75848b8605Smrg
76848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
77848b8605Smrg	MOV_L( ARG_DEST, EDI )
78848b8605Smrg
79848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
80848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81848b8605Smrg
82848b8605Smrg	TEST_L( ECX, ECX )
83848b8605Smrg	JZ( LLBL(x86_p3_gr_done) )
84848b8605Smrg
85848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87848b8605Smrg
88848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90848b8605Smrg
91848b8605Smrg	SHL_L( CONST(4), ECX )
92848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
93848b8605Smrg
94848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
95848b8605Smrg	ADD_L( EDI, ECX )
96848b8605Smrg
97848b8605SmrgALIGNTEXT16
98848b8605SmrgLLBL(x86_p3_gr_loop):
99848b8605Smrg
100848b8605Smrg	FLD_S( SRC0 )			/* F4 */
101848b8605Smrg	FMUL_S( MAT0 )
102848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
103848b8605Smrg	FMUL_S( MAT1 )
104848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
105848b8605Smrg	FMUL_S( MAT2 )
106848b8605Smrg	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107848b8605Smrg	FMUL_S( MAT3 )
108848b8605Smrg
109848b8605Smrg	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110848b8605Smrg	FMUL_S( MAT4 )
111848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112848b8605Smrg	FMUL_S( MAT5 )
113848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114848b8605Smrg	FMUL_S( MAT6 )
115848b8605Smrg	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116848b8605Smrg	FMUL_S( MAT7 )
117848b8605Smrg
118848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124848b8605Smrg
125848b8605Smrg	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
126848b8605Smrg	FMUL_S( MAT8 )
127848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
128848b8605Smrg	FMUL_S( MAT9 )
129848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
130848b8605Smrg	FMUL_S( MAT10 )
131848b8605Smrg	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
132848b8605Smrg	FMUL_S( MAT11 )
133848b8605Smrg
134848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
135848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
136848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
137848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
138848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
139848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
140848b8605Smrg
141848b8605Smrg	FXCH( ST(3) )			/* F4 F6 F5 F7 */
142848b8605Smrg	FADD_S( MAT12 )
143848b8605Smrg	FXCH( ST(2) )			/* F5 F6 F4 F7 */
144848b8605Smrg	FADD_S( MAT13 )
145848b8605Smrg	FXCH( ST(1) )			/* F6 F5 F4 F7 */
146848b8605Smrg	FADD_S( MAT14 )
147848b8605Smrg	FXCH( ST(3) )			/* F7 F5 F4 F6 */
148848b8605Smrg	FADD_S( MAT15 )
149848b8605Smrg
150848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F7 F6 */
151848b8605Smrg	FSTP_S( DST0 )		/* F5 F7 F6 */
152848b8605Smrg	FSTP_S( DST1 )		/* F7 F6 */
153848b8605Smrg	FXCH( ST(1) )			/* F6 F7 */
154848b8605Smrg	FSTP_S( DST2 )		/* F7 */
155848b8605Smrg	FSTP_S( DST3 )		/* */
156848b8605Smrg
157848b8605SmrgLLBL(x86_p3_gr_skip):
158848b8605Smrg
159848b8605Smrg	ADD_L( CONST(16), EDI )
160848b8605Smrg	ADD_L( EAX, ESI )
161848b8605Smrg	CMP_L( ECX, EDI )
162848b8605Smrg	JNE( LLBL(x86_p3_gr_loop) )
163848b8605Smrg
164848b8605SmrgLLBL(x86_p3_gr_done):
165848b8605Smrg
166848b8605Smrg	POP_L( EDI )
167848b8605Smrg	POP_L( ESI )
168848b8605Smrg	RET
169848b8605Smrg#undef FRAME_OFFSET
170848b8605Smrg
171848b8605Smrg
172848b8605Smrg
173848b8605Smrg
174848b8605SmrgALIGNTEXT16
175848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_perspective )
176848b8605SmrgHIDDEN(_mesa_x86_transform_points3_perspective)
177848b8605SmrgGLNAME( _mesa_x86_transform_points3_perspective ):
178848b8605Smrg
179848b8605Smrg#define FRAME_OFFSET 12
180848b8605Smrg	PUSH_L( ESI )
181848b8605Smrg	PUSH_L( EDI )
182848b8605Smrg	PUSH_L( EBX )
183848b8605Smrg
184848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
185848b8605Smrg	MOV_L( ARG_DEST, EDI )
186848b8605Smrg
187848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
188848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
189848b8605Smrg
190848b8605Smrg	TEST_L( ECX, ECX )
191848b8605Smrg	JZ( LLBL(x86_p3_pr_done) )
192848b8605Smrg
193848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
194848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
195848b8605Smrg
196848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
197848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
198848b8605Smrg
199848b8605Smrg	SHL_L( CONST(4), ECX )
200848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
201848b8605Smrg
202848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
203848b8605Smrg	ADD_L( EDI, ECX )
204848b8605Smrg
205848b8605SmrgALIGNTEXT16
206848b8605SmrgLLBL(x86_p3_pr_loop):
207848b8605Smrg
208848b8605Smrg	FLD_S( SRC0 )			/* F4 */
209848b8605Smrg	FMUL_S( MAT0 )
210848b8605Smrg
211848b8605Smrg	FLD_S( SRC1 )			/* F5 F4 */
212848b8605Smrg	FMUL_S( MAT5 )
213848b8605Smrg
214848b8605Smrg	FLD_S( SRC2 )			/* F0 F5 F4 */
215848b8605Smrg	FMUL_S( MAT8 )
216848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
217848b8605Smrg	FMUL_S( MAT9 )
218848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */
219848b8605Smrg	FMUL_S( MAT10 )
220848b8605Smrg
221848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */
222848b8605Smrg	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */
223848b8605Smrg	FADDP( ST0, ST(2) )		/* F2 F5 F4 */
224848b8605Smrg	FLD_S( MAT14 )		/* F6 F2 F5 F4 */
225848b8605Smrg	FXCH( ST(1) )			/* F2 F6 F5 F4 */
226848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
227848b8605Smrg
228848b8605Smrg	MOV_L( SRC2, EBX )
229848b8605Smrg	XOR_L( CONST(-2147483648), EBX )/* change sign */
230848b8605Smrg
231848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
232848b8605Smrg	FSTP_S( DST0 )		/* F5 F6 */
233848b8605Smrg	FSTP_S( DST1 )		/* F6 */
234848b8605Smrg	FSTP_S( DST2 )		/* */
235848b8605Smrg	MOV_L( EBX, DST3 )
236848b8605Smrg
237848b8605SmrgLLBL(x86_p3_pr_skip):
238848b8605Smrg
239848b8605Smrg	ADD_L( CONST(16), EDI )
240848b8605Smrg	ADD_L( EAX, ESI )
241848b8605Smrg	CMP_L( ECX, EDI )
242848b8605Smrg	JNE( LLBL(x86_p3_pr_loop) )
243848b8605Smrg
244848b8605SmrgLLBL(x86_p3_pr_done):
245848b8605Smrg
246848b8605Smrg	POP_L( EBX )
247848b8605Smrg	POP_L( EDI )
248848b8605Smrg	POP_L( ESI )
249848b8605Smrg	RET
250848b8605Smrg#undef FRAME_OFFSET
251848b8605Smrg
252848b8605Smrg
253848b8605Smrg
254848b8605Smrg
255848b8605SmrgALIGNTEXT16
256848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_3d )
257848b8605SmrgHIDDEN(_mesa_x86_transform_points3_3d)
258848b8605SmrgGLNAME( _mesa_x86_transform_points3_3d ):
259848b8605Smrg
260848b8605Smrg#define FRAME_OFFSET 8
261848b8605Smrg	PUSH_L( ESI )
262848b8605Smrg	PUSH_L( EDI )
263848b8605Smrg
264848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
265848b8605Smrg	MOV_L( ARG_DEST, EDI )
266848b8605Smrg
267848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
268848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
269848b8605Smrg
270848b8605Smrg	TEST_L( ECX, ECX )
271848b8605Smrg	JZ( LLBL(x86_p3_3dr_done) )
272848b8605Smrg
273848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
274848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
275848b8605Smrg
276848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
277848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
278848b8605Smrg
279848b8605Smrg	SHL_L( CONST(4), ECX )
280848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
281848b8605Smrg
282848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
283848b8605Smrg	ADD_L( EDI, ECX )
284848b8605Smrg
285848b8605SmrgALIGNTEXT16
286848b8605SmrgLLBL(x86_p3_3dr_loop):
287848b8605Smrg
288848b8605Smrg	FLD_S( SRC0 )			/* F4 */
289848b8605Smrg	FMUL_S( MAT0 )
290848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
291848b8605Smrg	FMUL_S( MAT1 )
292848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
293848b8605Smrg	FMUL_S( MAT2 )
294848b8605Smrg
295848b8605Smrg	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
296848b8605Smrg	FMUL_S( MAT4 )
297848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
298848b8605Smrg	FMUL_S( MAT5 )
299848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
300848b8605Smrg	FMUL_S( MAT6 )
301848b8605Smrg
302848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
303848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
304848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
305848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
306848b8605Smrg
307848b8605Smrg	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
308848b8605Smrg	FMUL_S( MAT8 )
309848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
310848b8605Smrg	FMUL_S( MAT9 )
311848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
312848b8605Smrg	FMUL_S( MAT10 )
313848b8605Smrg
314848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
315848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
316848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
317848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
318848b8605Smrg
319848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
320848b8605Smrg	FADD_S( MAT12 )
321848b8605Smrg	FXCH( ST(1) )			/* F5 F4 F6 */
322848b8605Smrg	FADD_S( MAT13 )
323848b8605Smrg	FXCH( ST(2) )			/* F6 F4 F5 */
324848b8605Smrg	FADD_S( MAT14 )
325848b8605Smrg
326848b8605Smrg	FXCH( ST(1) )			/* F4 F6 F5 */
327848b8605Smrg	FSTP_S( DST0   )		/* F6 F5 */
328848b8605Smrg	FXCH( ST(1) )			/* F5 F6 */
329848b8605Smrg	FSTP_S( DST1   )		/* F6 */
330848b8605Smrg	FSTP_S( DST2   )		/* */
331848b8605Smrg
332848b8605SmrgLLBL(x86_p3_3dr_skip):
333848b8605Smrg
334848b8605Smrg	ADD_L( CONST(16), EDI )
335848b8605Smrg	ADD_L( EAX, ESI )
336848b8605Smrg	CMP_L( ECX, EDI )
337848b8605Smrg	JNE( LLBL(x86_p3_3dr_loop) )
338848b8605Smrg
339848b8605SmrgLLBL(x86_p3_3dr_done):
340848b8605Smrg
341848b8605Smrg	POP_L( EDI )
342848b8605Smrg	POP_L( ESI )
343848b8605Smrg	RET
344848b8605Smrg#undef FRAME_OFFSET
345848b8605Smrg
346848b8605Smrg
347848b8605Smrg
348848b8605Smrg
349848b8605SmrgALIGNTEXT16
350848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
351848b8605SmrgHIDDEN(_mesa_x86_transform_points3_3d_no_rot)
352848b8605SmrgGLNAME( _mesa_x86_transform_points3_3d_no_rot ):
353848b8605Smrg
354848b8605Smrg#define FRAME_OFFSET 8
355848b8605Smrg	PUSH_L( ESI )
356848b8605Smrg	PUSH_L( EDI )
357848b8605Smrg
358848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
359848b8605Smrg	MOV_L( ARG_DEST, EDI )
360848b8605Smrg
361848b8605Smrg
362848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
363848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
364848b8605Smrg
365848b8605Smrg	TEST_L( ECX, ECX )
366848b8605Smrg	JZ( LLBL(x86_p3_3dnrr_done) )
367848b8605Smrg
368848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
369848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
370848b8605Smrg
371848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
372848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
373848b8605Smrg
374848b8605Smrg	SHL_L( CONST(4), ECX )
375848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
376848b8605Smrg
377848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
378848b8605Smrg	ADD_L( EDI, ECX )
379848b8605Smrg
380848b8605SmrgALIGNTEXT16
381848b8605SmrgLLBL(x86_p3_3dnrr_loop):
382848b8605Smrg
383848b8605Smrg	FLD_S( SRC0 )			/* F4 */
384848b8605Smrg	FMUL_S( MAT0 )
385848b8605Smrg
386848b8605Smrg	FLD_S( SRC1 )			/* F1 F4 */
387848b8605Smrg	FMUL_S( MAT5 )
388848b8605Smrg
389848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F4 */
390848b8605Smrg	FMUL_S( MAT10 )
391848b8605Smrg
392848b8605Smrg	FXCH( ST(2) )			/* F4 F1 F2 */
393848b8605Smrg	FADD_S( MAT12 )
394848b8605Smrg	FLD_S( MAT13 )		/* F5 F4 F1 F2 */
395848b8605Smrg	FXCH( ST(2) )			/* F1 F4 F5 F2 */
396848b8605Smrg	FADDP( ST0, ST(2) )		/* F4 F5 F2 */
397848b8605Smrg	FLD_S( MAT14 )		/* F6 F4 F5 F2 */
398848b8605Smrg	FXCH( ST(3) )			/* F2 F4 F5 F6 */
399848b8605Smrg	FADDP( ST0, ST(3) )		/* F4 F5 F6 */
400848b8605Smrg
401848b8605Smrg	FSTP_S( DST0   )		/* F5 F6 */
402848b8605Smrg	FSTP_S( DST1   )		/* F6 */
403848b8605Smrg	FSTP_S( DST2   )		/* */
404848b8605Smrg
405848b8605SmrgLLBL(x86_p3_3dnrr_skip):
406848b8605Smrg
407848b8605Smrg	ADD_L( CONST(16), EDI )
408848b8605Smrg	ADD_L( EAX, ESI )
409848b8605Smrg	CMP_L( ECX, EDI )
410848b8605Smrg	JNE( LLBL(x86_p3_3dnrr_loop) )
411848b8605Smrg
412848b8605SmrgLLBL(x86_p3_3dnrr_done):
413848b8605Smrg
414848b8605Smrg	POP_L( EDI )
415848b8605Smrg	POP_L( ESI )
416848b8605Smrg	RET
417848b8605Smrg#undef FRAME_OFFSET
418848b8605Smrg
419848b8605Smrg
420848b8605Smrg
421848b8605Smrg
422848b8605SmrgALIGNTEXT16
423848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_2d )
424848b8605SmrgHIDDEN(_mesa_x86_transform_points3_2d)
425848b8605SmrgGLNAME( _mesa_x86_transform_points3_2d ):
426848b8605Smrg
427848b8605Smrg#define FRAME_OFFSET 12
428848b8605Smrg	PUSH_L( ESI )
429848b8605Smrg	PUSH_L( EDI )
430848b8605Smrg	PUSH_L( EBX )
431848b8605Smrg
432848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
433848b8605Smrg	MOV_L( ARG_DEST, EDI )
434848b8605Smrg
435848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
436848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
437848b8605Smrg
438848b8605Smrg	TEST_L( ECX, ECX )
439848b8605Smrg	JZ( LLBL(x86_p3_2dr_done) )
440848b8605Smrg
441848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
442848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
443848b8605Smrg
444848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
445848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
446848b8605Smrg
447848b8605Smrg	SHL_L( CONST(4), ECX )
448848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
449848b8605Smrg
450848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
451848b8605Smrg	ADD_L( EDI, ECX )
452848b8605Smrg
453848b8605SmrgALIGNTEXT16
454848b8605SmrgLLBL(x86_p3_2dr_loop):
455848b8605Smrg
456848b8605Smrg	FLD_S( SRC0 )			/* F4 */
457848b8605Smrg	FMUL_S( MAT0 )
458848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
459848b8605Smrg	FMUL_S( MAT1 )
460848b8605Smrg
461848b8605Smrg	FLD_S( SRC1 )			/* F0 F5 F4 */
462848b8605Smrg	FMUL_S( MAT4 )
463848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
464848b8605Smrg	FMUL_S( MAT5 )
465848b8605Smrg
466848b8605Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
467848b8605Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
468848b8605Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
469848b8605Smrg
470848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
471848b8605Smrg	FADD_S( MAT12 )
472848b8605Smrg	FXCH( ST(1) )			/* F5 F4 */
473848b8605Smrg	FADD_S( MAT13 )
474848b8605Smrg
475848b8605Smrg	MOV_L( SRC2, EBX )
476848b8605Smrg
477848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
478848b8605Smrg	FSTP_S( DST0   )		/* F5 */
479848b8605Smrg	FSTP_S( DST1   )		/* */
480848b8605Smrg	MOV_L( EBX, DST2 )
481848b8605Smrg
482848b8605SmrgLLBL(x86_p3_2dr_skip):
483848b8605Smrg
484848b8605Smrg	ADD_L( CONST(16), EDI )
485848b8605Smrg	ADD_L( EAX, ESI )
486848b8605Smrg	CMP_L( ECX, EDI )
487848b8605Smrg	JNE( LLBL(x86_p3_2dr_loop) )
488848b8605Smrg
489848b8605SmrgLLBL(x86_p3_2dr_done):
490848b8605Smrg
491848b8605Smrg	POP_L( EBX )
492848b8605Smrg	POP_L( EDI )
493848b8605Smrg	POP_L( ESI )
494848b8605Smrg	RET
495848b8605Smrg#undef FRAME_OFFSET
496848b8605Smrg
497848b8605Smrg
498848b8605Smrg
499848b8605Smrg
500848b8605SmrgALIGNTEXT16
501848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
502848b8605SmrgHIDDEN(_mesa_x86_transform_points3_2d_no_rot)
503848b8605SmrgGLNAME( _mesa_x86_transform_points3_2d_no_rot ):
504848b8605Smrg
505848b8605Smrg#define FRAME_OFFSET 12
506848b8605Smrg	PUSH_L( ESI )
507848b8605Smrg	PUSH_L( EDI )
508848b8605Smrg	PUSH_L( EBX )
509848b8605Smrg
510848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
511848b8605Smrg	MOV_L( ARG_DEST, EDI )
512848b8605Smrg
513848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
514848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
515848b8605Smrg
516848b8605Smrg	TEST_L( ECX, ECX )
517848b8605Smrg	JZ( LLBL(x86_p3_2dnrr_done) )
518848b8605Smrg
519848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
520848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
521848b8605Smrg
522848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
523848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
524848b8605Smrg
525848b8605Smrg	SHL_L( CONST(4), ECX )
526848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
527848b8605Smrg
528848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
529848b8605Smrg	ADD_L( EDI, ECX )
530848b8605Smrg
531848b8605SmrgALIGNTEXT16
532848b8605SmrgLLBL(x86_p3_2dnrr_loop):
533848b8605Smrg
534848b8605Smrg	FLD_S( SRC0 )			/* F4 */
535848b8605Smrg	FMUL_S( MAT0 )
536848b8605Smrg
537848b8605Smrg	FLD_S( SRC1 )			/* F1 F4 */
538848b8605Smrg	FMUL_S( MAT5 )
539848b8605Smrg
540848b8605Smrg	FXCH( ST(1) )			/* F4 F1 */
541848b8605Smrg	FADD_S( MAT12 )
542848b8605Smrg	FLD_S( MAT13 )		/* F5 F4 F1 */
543848b8605Smrg
544848b8605Smrg	FXCH( ST(2) )			/* F1 F4 F5 */
545848b8605Smrg	FADDP( ST0, ST(2) )		/* F4 F5 */
546848b8605Smrg
547848b8605Smrg	MOV_L( SRC2, EBX )
548848b8605Smrg
549848b8605Smrg	FSTP_S( DST0 )		/* F5 */
550848b8605Smrg	FSTP_S( DST1 )		/* */
551848b8605Smrg	MOV_L( EBX, DST2 )
552848b8605Smrg
553848b8605SmrgLLBL(x86_p3_2dnrr_skip):
554848b8605Smrg
555848b8605Smrg	ADD_L( CONST(16), EDI )
556848b8605Smrg	ADD_L( EAX, ESI )
557848b8605Smrg	CMP_L( ECX, EDI )
558848b8605Smrg	JNE( LLBL(x86_p3_2dnrr_loop) )
559848b8605Smrg
560848b8605SmrgLLBL(x86_p3_2dnrr_done):
561848b8605Smrg
562848b8605Smrg	POP_L( EBX )
563848b8605Smrg	POP_L( EDI )
564848b8605Smrg	POP_L( ESI )
565848b8605Smrg	RET
566848b8605Smrg#undef FRAME_OFFSET
567848b8605Smrg
568848b8605Smrg
569848b8605Smrg
570848b8605Smrg
571848b8605SmrgALIGNTEXT16
572848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_identity )
573848b8605SmrgHIDDEN(_mesa_x86_transform_points3_identity)
574848b8605SmrgGLNAME(_mesa_x86_transform_points3_identity ):
575848b8605Smrg
576848b8605Smrg#define FRAME_OFFSET 16
577848b8605Smrg	PUSH_L( ESI )
578848b8605Smrg	PUSH_L( EDI )
579848b8605Smrg	PUSH_L( EBX )
580848b8605Smrg	PUSH_L( EBP )
581848b8605Smrg
582848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
583848b8605Smrg	MOV_L( ARG_DEST, EDI )
584848b8605Smrg
585848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
586848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
587848b8605Smrg
588848b8605Smrg	TEST_L( ECX, ECX )
589848b8605Smrg	JZ( LLBL(x86_p3_ir_done) )
590848b8605Smrg
591848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
592848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
593848b8605Smrg
594848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
595848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
596848b8605Smrg
597848b8605Smrg	SHL_L( CONST(4), ECX )
598848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
599848b8605Smrg
600848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
601848b8605Smrg	ADD_L( EDI, ECX )
602848b8605Smrg
603848b8605Smrg	CMP_L( ESI, EDI )
604848b8605Smrg	JE( LLBL(x86_p3_ir_done) )
605848b8605Smrg
606848b8605SmrgALIGNTEXT16
607848b8605SmrgLLBL(x86_p3_ir_loop):
608848b8605Smrg
609848b8605Smrg#if 1
610848b8605Smrg	MOV_L( SRC0, EBX )
611848b8605Smrg	MOV_L( SRC1, EBP )
612848b8605Smrg	MOV_L( SRC2, EDX )
613848b8605Smrg
614848b8605Smrg	MOV_L( EBX, DST0 )
615848b8605Smrg	MOV_L( EBP, DST1 )
616848b8605Smrg	MOV_L( EDX, DST2 )
617848b8605Smrg#else
618848b8605Smrg	FLD_S( SRC0 )
619848b8605Smrg	FLD_S( SRC1 )
620848b8605Smrg	FLD_S( SRC2 )
621848b8605Smrg
622848b8605Smrg	FSTP_S( DST2 )
623848b8605Smrg	FSTP_S( DST1 )
624848b8605Smrg	FSTP_S( DST0 )
625848b8605Smrg#endif
626848b8605Smrg
627848b8605SmrgLLBL(x86_p3_ir_skip):
628848b8605Smrg
629848b8605Smrg	ADD_L( CONST(16), EDI )
630848b8605Smrg	ADD_L( EAX, ESI )
631848b8605Smrg	CMP_L( ECX, EDI )
632848b8605Smrg	JNE( LLBL(x86_p3_ir_loop) )
633848b8605Smrg
634848b8605SmrgLLBL(x86_p3_ir_done):
635848b8605Smrg
636848b8605Smrg	POP_L( EBP )
637848b8605Smrg	POP_L( EBX )
638848b8605Smrg	POP_L( EDI )
639848b8605Smrg	POP_L( ESI )
640848b8605Smrg	RET
641848b8605Smrg
642848b8605Smrg#if defined (__ELF__) && defined (__linux__)
643848b8605Smrg	.section .note.GNU-stack,"",%progbits
644848b8605Smrg#endif
645