1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg/*
27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29848b8605Smrg * in there will break the build on some platforms.
30848b8605Smrg */
31848b8605Smrg
32848b8605Smrg#include "assyntax.h"
33848b8605Smrg#include "matypes.h"
34848b8605Smrg#include "xform_args.h"
35848b8605Smrg
36848b8605Smrg	SEG_TEXT
37848b8605Smrg
38848b8605Smrg#define FP_ONE		1065353216
39848b8605Smrg#define FP_ZERO		0
40848b8605Smrg
41848b8605Smrg#define SRC0		REGOFF(0, ESI)
42848b8605Smrg#define SRC1		REGOFF(4, ESI)
43848b8605Smrg#define SRC2		REGOFF(8, ESI)
44848b8605Smrg#define SRC3		REGOFF(12, ESI)
45848b8605Smrg#define DST0		REGOFF(0, EDI)
46848b8605Smrg#define DST1		REGOFF(4, EDI)
47848b8605Smrg#define DST2		REGOFF(8, EDI)
48848b8605Smrg#define DST3		REGOFF(12, EDI)
49848b8605Smrg#define MAT0		REGOFF(0, EDX)
50848b8605Smrg#define MAT1		REGOFF(4, EDX)
51848b8605Smrg#define MAT2		REGOFF(8, EDX)
52848b8605Smrg#define MAT3		REGOFF(12, EDX)
53848b8605Smrg#define MAT4		REGOFF(16, EDX)
54848b8605Smrg#define MAT5		REGOFF(20, EDX)
55848b8605Smrg#define MAT6		REGOFF(24, EDX)
56848b8605Smrg#define MAT7		REGOFF(28, EDX)
57848b8605Smrg#define MAT8		REGOFF(32, EDX)
58848b8605Smrg#define MAT9		REGOFF(36, EDX)
59848b8605Smrg#define MAT10		REGOFF(40, EDX)
60848b8605Smrg#define MAT11		REGOFF(44, EDX)
61848b8605Smrg#define MAT12		REGOFF(48, EDX)
62848b8605Smrg#define MAT13		REGOFF(52, EDX)
63848b8605Smrg#define MAT14		REGOFF(56, EDX)
64848b8605Smrg#define MAT15		REGOFF(60, EDX)
65848b8605Smrg
66848b8605Smrg
67848b8605SmrgALIGNTEXT16
68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_general )
69848b8605SmrgHIDDEN(_mesa_x86_transform_points4_general)
70848b8605SmrgGLNAME( _mesa_x86_transform_points4_general ):
71848b8605Smrg
72848b8605Smrg#define FRAME_OFFSET 8
73848b8605Smrg	PUSH_L( ESI )
74848b8605Smrg	PUSH_L( EDI )
75848b8605Smrg
76848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
77848b8605Smrg	MOV_L( ARG_DEST, EDI )
78848b8605Smrg
79848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
80848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81848b8605Smrg
82848b8605Smrg	TEST_L( ECX, ECX )
83848b8605Smrg	JZ( LLBL(x86_p4_gr_done) )
84848b8605Smrg
85848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87848b8605Smrg
88848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90848b8605Smrg
91848b8605Smrg	SHL_L( CONST(4), ECX )
92848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
93848b8605Smrg
94848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
95848b8605Smrg	ADD_L( EDI, ECX )
96848b8605Smrg
97848b8605SmrgALIGNTEXT16
98848b8605SmrgLLBL(x86_p4_gr_loop):
99848b8605Smrg
100848b8605Smrg	FLD_S( SRC0 )			/* F4 */
101848b8605Smrg	FMUL_S( MAT0 )
102848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
103848b8605Smrg	FMUL_S( MAT1 )
104848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
105848b8605Smrg	FMUL_S( MAT2 )
106848b8605Smrg	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107848b8605Smrg	FMUL_S( MAT3 )
108848b8605Smrg
109848b8605Smrg	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110848b8605Smrg	FMUL_S( MAT4 )
111848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112848b8605Smrg	FMUL_S( MAT5 )
113848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114848b8605Smrg	FMUL_S( MAT6 )
115848b8605Smrg	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116848b8605Smrg	FMUL_S( MAT7 )
117848b8605Smrg
118848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124848b8605Smrg
125848b8605Smrg	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
126848b8605Smrg	FMUL_S( MAT8 )
127848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
128848b8605Smrg	FMUL_S( MAT9 )
129848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
130848b8605Smrg	FMUL_S( MAT10 )
131848b8605Smrg	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
132848b8605Smrg	FMUL_S( MAT11 )
133848b8605Smrg
134848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
135848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
136848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
137848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
138848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
139848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
140848b8605Smrg
141848b8605Smrg	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
142848b8605Smrg	FMUL_S( MAT12 )
143848b8605Smrg	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
144848b8605Smrg	FMUL_S( MAT13 )
145848b8605Smrg	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
146848b8605Smrg	FMUL_S( MAT14 )
147848b8605Smrg	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
148848b8605Smrg	FMUL_S( MAT15 )
149848b8605Smrg
150848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
151848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
152848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
153848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
154848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
155848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
156848b8605Smrg
157848b8605Smrg	FXCH( ST(3) )			/* F4 F6 F5 F7 */
158848b8605Smrg	FSTP_S( DST0 )		/* F6 F5 F7 */
159848b8605Smrg	FXCH( ST(1) )			/* F5 F6 F7 */
160848b8605Smrg	FSTP_S( DST1 )		/* F6 F7 */
161848b8605Smrg	FSTP_S( DST2 )		/* F7 */
162848b8605Smrg	FSTP_S( DST3 )		/* */
163848b8605Smrg
164848b8605SmrgLLBL(x86_p4_gr_skip):
165848b8605Smrg
166848b8605Smrg	ADD_L( CONST(16), EDI )
167848b8605Smrg	ADD_L( EAX, ESI )
168848b8605Smrg	CMP_L( ECX, EDI )
169848b8605Smrg	JNE( LLBL(x86_p4_gr_loop) )
170848b8605Smrg
171848b8605SmrgLLBL(x86_p4_gr_done):
172848b8605Smrg
173848b8605Smrg	POP_L( EDI )
174848b8605Smrg	POP_L( ESI )
175848b8605Smrg	RET
176848b8605Smrg#undef FRAME_OFFSET
177848b8605Smrg
178848b8605Smrg
179848b8605Smrg
180848b8605Smrg
181848b8605SmrgALIGNTEXT16
182848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_perspective )
183848b8605SmrgHIDDEN(_mesa_x86_transform_points4_perspective)
184848b8605SmrgGLNAME( _mesa_x86_transform_points4_perspective ):
185848b8605Smrg
186848b8605Smrg#define FRAME_OFFSET 12
187848b8605Smrg	PUSH_L( ESI )
188848b8605Smrg	PUSH_L( EDI )
189848b8605Smrg	PUSH_L( EBX )
190848b8605Smrg
191848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
192848b8605Smrg	MOV_L( ARG_DEST, EDI )
193848b8605Smrg
194848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
195848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
196848b8605Smrg
197848b8605Smrg	TEST_L( ECX, ECX )
198848b8605Smrg	JZ( LLBL(x86_p4_pr_done) )
199848b8605Smrg
200848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
201848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
202848b8605Smrg
203848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
204848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
205848b8605Smrg
206848b8605Smrg	SHL_L( CONST(4), ECX )
207848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
208848b8605Smrg
209848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
210848b8605Smrg	ADD_L( EDI, ECX )
211848b8605Smrg
212848b8605SmrgALIGNTEXT16
213848b8605SmrgLLBL(x86_p4_pr_loop):
214848b8605Smrg
215848b8605Smrg	FLD_S( SRC0 )			/* F4 */
216848b8605Smrg	FMUL_S( MAT0 )
217848b8605Smrg
218848b8605Smrg	FLD_S( SRC1 )			/* F5 F4 */
219848b8605Smrg	FMUL_S( MAT5 )
220848b8605Smrg
221848b8605Smrg	FLD_S( SRC2 )			/* F0 F5 F4 */
222848b8605Smrg	FMUL_S( MAT8 )
223848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
224848b8605Smrg	FMUL_S( MAT9 )
225848b8605Smrg	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
226848b8605Smrg	FMUL_S( MAT10 )
227848b8605Smrg
228848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
229848b8605Smrg	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
230848b8605Smrg	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
231848b8605Smrg
232848b8605Smrg	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
233848b8605Smrg	FMUL_S( MAT14 )
234848b8605Smrg
235848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
236848b8605Smrg
237848b8605Smrg	MOV_L( SRC2, EBX )
238848b8605Smrg	XOR_L( CONST(-2147483648), EBX )/* change sign */
239848b8605Smrg
240848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
241848b8605Smrg	FSTP_S( DST0 )		/* F5 F6 */
242848b8605Smrg	FSTP_S( DST1 )		/* F6 */
243848b8605Smrg	FSTP_S( DST2 )		/* */
244848b8605Smrg	MOV_L( EBX, DST3 )
245848b8605Smrg
246848b8605SmrgLLBL(x86_p4_pr_skip):
247848b8605Smrg
248848b8605Smrg	ADD_L( CONST(16), EDI )
249848b8605Smrg	ADD_L( EAX, ESI )
250848b8605Smrg	CMP_L( ECX, EDI )
251848b8605Smrg	JNE( LLBL(x86_p4_pr_loop) )
252848b8605Smrg
253848b8605SmrgLLBL(x86_p4_pr_done):
254848b8605Smrg
255848b8605Smrg	POP_L( EBX )
256848b8605Smrg	POP_L( EDI )
257848b8605Smrg	POP_L( ESI )
258848b8605Smrg	RET
259848b8605Smrg#undef FRAME_OFFSET
260848b8605Smrg
261848b8605Smrg
262848b8605Smrg
263848b8605Smrg
264848b8605SmrgALIGNTEXT16
265848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_3d )
266848b8605SmrgHIDDEN(_mesa_x86_transform_points4_3d)
267848b8605SmrgGLNAME( _mesa_x86_transform_points4_3d ):
268848b8605Smrg
269848b8605Smrg#define FRAME_OFFSET 12
270848b8605Smrg	PUSH_L( ESI )
271848b8605Smrg	PUSH_L( EDI )
272848b8605Smrg	PUSH_L( EBX )
273848b8605Smrg
274848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
275848b8605Smrg	MOV_L( ARG_DEST, EDI )
276848b8605Smrg
277848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
278848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
279848b8605Smrg
280848b8605Smrg	TEST_L( ECX, ECX )
281848b8605Smrg	JZ( LLBL(x86_p4_3dr_done) )
282848b8605Smrg
283848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
284848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
285848b8605Smrg
286848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
287848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
288848b8605Smrg
289848b8605Smrg	SHL_L( CONST(4), ECX )
290848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
291848b8605Smrg
292848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
293848b8605Smrg	ADD_L( EDI, ECX )
294848b8605Smrg
295848b8605SmrgALIGNTEXT16
296848b8605SmrgLLBL(x86_p4_3dr_loop):
297848b8605Smrg
298848b8605Smrg	FLD_S( SRC0 )			/* F4 */
299848b8605Smrg	FMUL_S( MAT0 )
300848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
301848b8605Smrg	FMUL_S( MAT1 )
302848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
303848b8605Smrg	FMUL_S( MAT2 )
304848b8605Smrg
305848b8605Smrg	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
306848b8605Smrg	FMUL_S( MAT4 )
307848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
308848b8605Smrg	FMUL_S( MAT5 )
309848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
310848b8605Smrg	FMUL_S( MAT6 )
311848b8605Smrg
312848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
313848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
314848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
315848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
316848b8605Smrg
317848b8605Smrg	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
318848b8605Smrg	FMUL_S( MAT8 )
319848b8605Smrg	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
320848b8605Smrg	FMUL_S( MAT9 )
321848b8605Smrg	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
322848b8605Smrg	FMUL_S( MAT10 )
323848b8605Smrg
324848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
325848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
326848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
327848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
328848b8605Smrg
329848b8605Smrg	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
330848b8605Smrg	FMUL_S( MAT12 )
331848b8605Smrg	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
332848b8605Smrg	FMUL_S( MAT13 )
333848b8605Smrg	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
334848b8605Smrg	FMUL_S( MAT14 )
335848b8605Smrg
336848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
337848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
338848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
339848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
340848b8605Smrg
341848b8605Smrg	MOV_L( SRC3, EBX )
342848b8605Smrg
343848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
344848b8605Smrg	FSTP_S( DST0 )		/* F5 F6 */
345848b8605Smrg	FSTP_S( DST1 )		/* F6 */
346848b8605Smrg	FSTP_S( DST2 )		/* */
347848b8605Smrg	MOV_L( EBX, DST3 )
348848b8605Smrg
349848b8605SmrgLLBL(x86_p4_3dr_skip):
350848b8605Smrg
351848b8605Smrg	ADD_L( CONST(16), EDI )
352848b8605Smrg	ADD_L( EAX, ESI )
353848b8605Smrg	CMP_L( ECX, EDI )
354848b8605Smrg	JNE( LLBL(x86_p4_3dr_loop) )
355848b8605Smrg
356848b8605SmrgLLBL(x86_p4_3dr_done):
357848b8605Smrg
358848b8605Smrg	POP_L( EBX )
359848b8605Smrg	POP_L( EDI )
360848b8605Smrg	POP_L( ESI )
361848b8605Smrg	RET
362848b8605Smrg#undef FRAME_OFFSET
363848b8605Smrg
364848b8605Smrg
365848b8605Smrg
366848b8605Smrg
367848b8605SmrgALIGNTEXT16
368848b8605SmrgGLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
369848b8605SmrgHIDDEN(_mesa_x86_transform_points4_3d_no_rot)
370848b8605SmrgGLNAME(_mesa_x86_transform_points4_3d_no_rot):
371848b8605Smrg
372848b8605Smrg#define FRAME_OFFSET 12
373848b8605Smrg	PUSH_L( ESI )
374848b8605Smrg	PUSH_L( EDI )
375848b8605Smrg	PUSH_L( EBX )
376848b8605Smrg
377848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
378848b8605Smrg	MOV_L( ARG_DEST, EDI )
379848b8605Smrg
380848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
381848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
382848b8605Smrg
383848b8605Smrg	TEST_L( ECX, ECX )
384848b8605Smrg	JZ( LLBL(x86_p4_3dnrr_done) )
385848b8605Smrg
386848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
387848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
388848b8605Smrg
389848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
390848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
391848b8605Smrg
392848b8605Smrg	SHL_L( CONST(4), ECX )
393848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
394848b8605Smrg
395848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
396848b8605Smrg	ADD_L( EDI, ECX )
397848b8605Smrg
398848b8605SmrgALIGNTEXT16
399848b8605SmrgLLBL(x86_p4_3dnrr_loop):
400848b8605Smrg
401848b8605Smrg	FLD_S( SRC0 )			/* F4 */
402848b8605Smrg	FMUL_S( MAT0 )
403848b8605Smrg
404848b8605Smrg	FLD_S( SRC1 )			/* F5 F4 */
405848b8605Smrg	FMUL_S( MAT5 )
406848b8605Smrg
407848b8605Smrg	FLD_S( SRC2 )			/* F6 F5 F4 */
408848b8605Smrg	FMUL_S( MAT10 )
409848b8605Smrg
410848b8605Smrg	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
411848b8605Smrg	FMUL_S( MAT12 )
412848b8605Smrg	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
413848b8605Smrg	FMUL_S( MAT13 )
414848b8605Smrg	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
415848b8605Smrg	FMUL_S( MAT14 )
416848b8605Smrg
417848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
418848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
419848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
420848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
421848b8605Smrg
422848b8605Smrg	MOV_L( SRC3, EBX )
423848b8605Smrg
424848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
425848b8605Smrg	FSTP_S( DST0   )		/* F5 F6 */
426848b8605Smrg	FSTP_S( DST1   )		/* F6 */
427848b8605Smrg	FSTP_S( DST2   )		/* */
428848b8605Smrg	MOV_L( EBX, DST3 )
429848b8605Smrg
430848b8605SmrgLLBL(x86_p4_3dnrr_skip):
431848b8605Smrg
432848b8605Smrg	ADD_L( CONST(16), EDI )
433848b8605Smrg	ADD_L( EAX, ESI )
434848b8605Smrg	CMP_L( ECX, EDI )
435848b8605Smrg	JNE( LLBL(x86_p4_3dnrr_loop) )
436848b8605Smrg
437848b8605SmrgLLBL(x86_p4_3dnrr_done):
438848b8605Smrg
439848b8605Smrg	POP_L( EBX )
440848b8605Smrg	POP_L( EDI )
441848b8605Smrg	POP_L( ESI )
442848b8605Smrg	RET
443848b8605Smrg#undef FRAME_OFFSET
444848b8605Smrg
445848b8605Smrg
446848b8605Smrg
447848b8605Smrg
448848b8605SmrgALIGNTEXT16
449848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_2d )
450848b8605SmrgHIDDEN(_mesa_x86_transform_points4_2d)
451848b8605SmrgGLNAME( _mesa_x86_transform_points4_2d ):
452848b8605Smrg
453848b8605Smrg#define FRAME_OFFSET 16
454848b8605Smrg	PUSH_L( ESI )
455848b8605Smrg	PUSH_L( EDI )
456848b8605Smrg	PUSH_L( EBX )
457848b8605Smrg	PUSH_L( EBP )
458848b8605Smrg
459848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
460848b8605Smrg	MOV_L( ARG_DEST, EDI )
461848b8605Smrg
462848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
463848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
464848b8605Smrg
465848b8605Smrg	TEST_L( ECX, ECX )
466848b8605Smrg	JZ( LLBL(x86_p4_2dr_done) )
467848b8605Smrg
468848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
469848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
470848b8605Smrg
471848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
472848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
473848b8605Smrg
474848b8605Smrg	SHL_L( CONST(4), ECX )
475848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
476848b8605Smrg
477848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
478848b8605Smrg	ADD_L( EDI, ECX )
479848b8605Smrg
480848b8605SmrgALIGNTEXT16
481848b8605SmrgLLBL(x86_p4_2dr_loop):
482848b8605Smrg
483848b8605Smrg	FLD_S( SRC0 )			/* F4 */
484848b8605Smrg	FMUL_S( MAT0 )
485848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
486848b8605Smrg	FMUL_S( MAT1 )
487848b8605Smrg
488848b8605Smrg	FLD_S( SRC1 )			/* F0 F5 F4 */
489848b8605Smrg	FMUL_S( MAT4 )
490848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
491848b8605Smrg	FMUL_S( MAT5 )
492848b8605Smrg
493848b8605Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
494848b8605Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
495848b8605Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
496848b8605Smrg
497848b8605Smrg	FLD_S( SRC3 )			/* F0 F5 F4 */
498848b8605Smrg	FMUL_S( MAT12 )
499848b8605Smrg	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
500848b8605Smrg	FMUL_S( MAT13 )
501848b8605Smrg
502848b8605Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
503848b8605Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
504848b8605Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
505848b8605Smrg
506848b8605Smrg	MOV_L( SRC2, EBX )
507848b8605Smrg	MOV_L( SRC3, EBP )
508848b8605Smrg
509848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
510848b8605Smrg	FSTP_S( DST0 )		/* F5 */
511848b8605Smrg	FSTP_S( DST1 )		/* */
512848b8605Smrg	MOV_L( EBX, DST2 )
513848b8605Smrg	MOV_L( EBP, DST3 )
514848b8605Smrg
515848b8605SmrgLLBL(x86_p4_2dr_skip):
516848b8605Smrg
517848b8605Smrg	ADD_L( CONST(16), EDI )
518848b8605Smrg	ADD_L( EAX, ESI )
519848b8605Smrg	CMP_L( ECX, EDI )
520848b8605Smrg	JNE( LLBL(x86_p4_2dr_loop) )
521848b8605Smrg
522848b8605SmrgLLBL(x86_p4_2dr_done):
523848b8605Smrg
524848b8605Smrg	POP_L( EBP )
525848b8605Smrg	POP_L( EBX )
526848b8605Smrg	POP_L( EDI )
527848b8605Smrg	POP_L( ESI )
528848b8605Smrg	RET
529848b8605Smrg#undef FRAME_OFFSET
530848b8605Smrg
531848b8605Smrg
532848b8605Smrg
533848b8605Smrg
534848b8605SmrgALIGNTEXT16
535848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
536848b8605SmrgHIDDEN(_mesa_x86_transform_points4_2d_no_rot)
537848b8605SmrgGLNAME( _mesa_x86_transform_points4_2d_no_rot ):
538848b8605Smrg
539848b8605Smrg#define FRAME_OFFSET 16
540848b8605Smrg	PUSH_L( ESI )
541848b8605Smrg	PUSH_L( EDI )
542848b8605Smrg	PUSH_L( EBX )
543848b8605Smrg	PUSH_L( EBP )
544848b8605Smrg
545848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
546848b8605Smrg	MOV_L( ARG_DEST, EDI )
547848b8605Smrg
548848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
549848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
550848b8605Smrg
551848b8605Smrg	TEST_L( ECX, ECX )
552848b8605Smrg	JZ( LLBL(x86_p4_2dnrr_done) )
553848b8605Smrg
554848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
555848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
556848b8605Smrg
557848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
558848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
559848b8605Smrg
560848b8605Smrg	SHL_L( CONST(4), ECX )
561848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
562848b8605Smrg
563848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
564848b8605Smrg	ADD_L( EDI, ECX )
565848b8605Smrg
566848b8605SmrgALIGNTEXT16
567848b8605SmrgLLBL(x86_p4_2dnrr_loop):
568848b8605Smrg
569848b8605Smrg	FLD_S( SRC0 )			/* F4 */
570848b8605Smrg	FMUL_S( MAT0 )
571848b8605Smrg
572848b8605Smrg	FLD_S( SRC1 )			/* F5 F4 */
573848b8605Smrg	FMUL_S( MAT5 )
574848b8605Smrg
575848b8605Smrg	FLD_S( SRC3 )			/* F0 F5 F4 */
576848b8605Smrg	FMUL_S( MAT12 )
577848b8605Smrg	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
578848b8605Smrg	FMUL_S( MAT13 )
579848b8605Smrg
580848b8605Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
581848b8605Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
582848b8605Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
583848b8605Smrg
584848b8605Smrg	MOV_L( SRC2, EBX )
585848b8605Smrg	MOV_L( SRC3, EBP )
586848b8605Smrg
587848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
588848b8605Smrg	FSTP_S( DST0   )		/* F5 */
589848b8605Smrg	FSTP_S( DST1   )		/* */
590848b8605Smrg	MOV_L( EBX, DST2 )
591848b8605Smrg	MOV_L( EBP, DST3 )
592848b8605Smrg
593848b8605SmrgLLBL(x86_p4_2dnrr_skip):
594848b8605Smrg
595848b8605Smrg	ADD_L( CONST(16), EDI )
596848b8605Smrg	ADD_L( EAX, ESI )
597848b8605Smrg	CMP_L( ECX, EDI )
598848b8605Smrg	JNE( LLBL(x86_p4_2dnrr_loop) )
599848b8605Smrg
600848b8605SmrgLLBL(x86_p4_2dnrr_done):
601848b8605Smrg
602848b8605Smrg	POP_L( EBP )
603848b8605Smrg	POP_L( EBX )
604848b8605Smrg	POP_L( EDI )
605848b8605Smrg	POP_L( ESI )
606848b8605Smrg	RET
607848b8605Smrg#undef FRAME_OFFSET
608848b8605Smrg
609848b8605Smrg
610848b8605Smrg
611848b8605Smrg
612848b8605SmrgALIGNTEXT16
613848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_identity )
614848b8605SmrgHIDDEN(_mesa_x86_transform_points4_identity)
615848b8605SmrgGLNAME( _mesa_x86_transform_points4_identity ):
616848b8605Smrg
617848b8605Smrg#define FRAME_OFFSET 12
618848b8605Smrg	PUSH_L( ESI )
619848b8605Smrg	PUSH_L( EDI )
620848b8605Smrg	PUSH_L( EBX )
621848b8605Smrg
622848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
623848b8605Smrg	MOV_L( ARG_DEST, EDI )
624848b8605Smrg
625848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
626848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
627848b8605Smrg
628848b8605Smrg	TEST_L( ECX, ECX )
629848b8605Smrg	JZ( LLBL(x86_p4_ir_done) )
630848b8605Smrg
631848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
632848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
633848b8605Smrg
634848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
635848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
636848b8605Smrg
637848b8605Smrg	SHL_L( CONST(4), ECX )
638848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
639848b8605Smrg
640848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
641848b8605Smrg	ADD_L( EDI, ECX )
642848b8605Smrg
643848b8605Smrg	CMP_L( ESI, EDI )
644848b8605Smrg	JE( LLBL(x86_p4_ir_done) )
645848b8605Smrg
646848b8605SmrgALIGNTEXT16
647848b8605SmrgLLBL(x86_p4_ir_loop):
648848b8605Smrg
649848b8605Smrg	MOV_L( SRC0, EBX )
650848b8605Smrg	MOV_L( SRC1, EDX )
651848b8605Smrg
652848b8605Smrg	MOV_L( EBX, DST0 )
653848b8605Smrg	MOV_L( EDX, DST1 )
654848b8605Smrg
655848b8605Smrg	MOV_L( SRC2, EBX )
656848b8605Smrg	MOV_L( SRC3, EDX )
657848b8605Smrg
658848b8605Smrg	MOV_L( EBX, DST2 )
659848b8605Smrg	MOV_L( EDX, DST3 )
660848b8605Smrg
661848b8605SmrgLLBL(x86_p4_ir_skip):
662848b8605Smrg
663848b8605Smrg	ADD_L( CONST(16), EDI )
664848b8605Smrg	ADD_L( EAX, ESI )
665848b8605Smrg	CMP_L( ECX, EDI )
666848b8605Smrg	JNE( LLBL(x86_p4_ir_loop) )
667848b8605Smrg
668848b8605SmrgLLBL(x86_p4_ir_done):
669848b8605Smrg
670848b8605Smrg	POP_L( EBX )
671848b8605Smrg	POP_L( EDI )
672848b8605Smrg	POP_L( ESI )
673848b8605Smrg	RET
674848b8605Smrg
675848b8605Smrg#if defined (__ELF__) && defined (__linux__)
676848b8605Smrg	.section .note.GNU-stack,"",%progbits
677848b8605Smrg#endif
678