1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg/*
27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29848b8605Smrg * in there will break the build on some platforms.
30848b8605Smrg */
31848b8605Smrg
32848b8605Smrg#include "assyntax.h"
33848b8605Smrg#include "matypes.h"
34848b8605Smrg#include "xform_args.h"
35848b8605Smrg
36848b8605Smrg	SEG_TEXT
37848b8605Smrg
38848b8605Smrg#define FP_ONE		1065353216
39848b8605Smrg#define FP_ZERO		0
40848b8605Smrg
41848b8605Smrg#define SRC0		REGOFF(0, ESI)
42848b8605Smrg#define SRC1		REGOFF(4, ESI)
43848b8605Smrg#define SRC2		REGOFF(8, ESI)
44848b8605Smrg#define SRC3		REGOFF(12, ESI)
45848b8605Smrg#define DST0		REGOFF(0, EDI)
46848b8605Smrg#define DST1		REGOFF(4, EDI)
47848b8605Smrg#define DST2		REGOFF(8, EDI)
48848b8605Smrg#define DST3		REGOFF(12, EDI)
49848b8605Smrg#define MAT0		REGOFF(0, EDX)
50848b8605Smrg#define MAT1		REGOFF(4, EDX)
51848b8605Smrg#define MAT2		REGOFF(8, EDX)
52848b8605Smrg#define MAT3		REGOFF(12, EDX)
53848b8605Smrg#define MAT4		REGOFF(16, EDX)
54848b8605Smrg#define MAT5		REGOFF(20, EDX)
55848b8605Smrg#define MAT6		REGOFF(24, EDX)
56848b8605Smrg#define MAT7		REGOFF(28, EDX)
57848b8605Smrg#define MAT8		REGOFF(32, EDX)
58848b8605Smrg#define MAT9		REGOFF(36, EDX)
59848b8605Smrg#define MAT10		REGOFF(40, EDX)
60848b8605Smrg#define MAT11		REGOFF(44, EDX)
61848b8605Smrg#define MAT12		REGOFF(48, EDX)
62848b8605Smrg#define MAT13		REGOFF(52, EDX)
63848b8605Smrg#define MAT14		REGOFF(56, EDX)
64848b8605Smrg#define MAT15		REGOFF(60, EDX)
65848b8605Smrg
66848b8605Smrg
67848b8605SmrgALIGNTEXT16
68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_general )
69848b8605SmrgHIDDEN(_mesa_x86_transform_points2_general)
70848b8605SmrgGLNAME( _mesa_x86_transform_points2_general ):
71848b8605Smrg
72848b8605Smrg#define FRAME_OFFSET 8
73848b8605Smrg	PUSH_L( ESI )
74848b8605Smrg	PUSH_L( EDI )
75848b8605Smrg
76848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
77848b8605Smrg	MOV_L( ARG_DEST, EDI )
78848b8605Smrg
79848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
80848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81848b8605Smrg
82848b8605Smrg	TEST_L( ECX, ECX )
83848b8605Smrg	JZ( LLBL(x86_p2_gr_done) )
84848b8605Smrg
85848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87848b8605Smrg
88848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90848b8605Smrg
91848b8605Smrg	SHL_L( CONST(4), ECX )
92848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
93848b8605Smrg
94848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
95848b8605Smrg	ADD_L( EDI, ECX )
96848b8605Smrg
97848b8605SmrgALIGNTEXT16
98848b8605SmrgLLBL(x86_p2_gr_loop):
99848b8605Smrg
100848b8605Smrg	FLD_S( SRC0 )			/* F4 */
101848b8605Smrg	FMUL_S( MAT0 )
102848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
103848b8605Smrg	FMUL_S( MAT1 )
104848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
105848b8605Smrg	FMUL_S( MAT2 )
106848b8605Smrg	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107848b8605Smrg	FMUL_S( MAT3 )
108848b8605Smrg
109848b8605Smrg	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110848b8605Smrg	FMUL_S( MAT4 )
111848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112848b8605Smrg	FMUL_S( MAT5 )
113848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114848b8605Smrg	FMUL_S( MAT6 )
115848b8605Smrg	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116848b8605Smrg	FMUL_S( MAT7 )
117848b8605Smrg
118848b8605Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119848b8605Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120848b8605Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121848b8605Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122848b8605Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123848b8605Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124848b8605Smrg
125848b8605Smrg	FXCH( ST(3) )			/* F4 F6 F5 F7 */
126848b8605Smrg	FADD_S( MAT12 )
127848b8605Smrg	FXCH( ST(2) )			/* F5 F6 F4 F7 */
128848b8605Smrg	FADD_S( MAT13 )
129848b8605Smrg	FXCH( ST(1) )			/* F6 F5 F4 F7 */
130848b8605Smrg	FADD_S( MAT14 )
131848b8605Smrg	FXCH( ST(3) )			/* F7 F5 F4 F6 */
132848b8605Smrg	FADD_S( MAT15 )
133848b8605Smrg
134848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F7 F6 */
135848b8605Smrg	FSTP_S( DST0 )			/* F5 F7 F6 */
136848b8605Smrg	FSTP_S( DST1 )			/* F7 F6 */
137848b8605Smrg	FXCH( ST(1) )			/* F6 F7 */
138848b8605Smrg	FSTP_S( DST2 )			/* F7 */
139848b8605Smrg	FSTP_S( DST3 )			/* */
140848b8605Smrg
141848b8605SmrgLLBL(x86_p2_gr_skip):
142848b8605Smrg
143848b8605Smrg	ADD_L( CONST(16), EDI )
144848b8605Smrg	ADD_L( EAX, ESI )
145848b8605Smrg	CMP_L( ECX, EDI )
146848b8605Smrg	JNE( LLBL(x86_p2_gr_loop) )
147848b8605Smrg
148848b8605SmrgLLBL(x86_p2_gr_done):
149848b8605Smrg
150848b8605Smrg	POP_L( EDI )
151848b8605Smrg	POP_L( ESI )
152848b8605Smrg	RET
153848b8605Smrg#undef FRAME_OFFSET
154848b8605Smrg
155848b8605Smrg
156848b8605Smrg
157848b8605Smrg
158848b8605SmrgALIGNTEXT16
159848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_perspective )
160848b8605SmrgHIDDEN(_mesa_x86_transform_points2_perspective)
161848b8605SmrgGLNAME( _mesa_x86_transform_points2_perspective ):
162848b8605Smrg
163848b8605Smrg#define FRAME_OFFSET 12
164848b8605Smrg	PUSH_L( ESI )
165848b8605Smrg	PUSH_L( EDI )
166848b8605Smrg	PUSH_L( EBX )
167848b8605Smrg
168848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
169848b8605Smrg	MOV_L( ARG_DEST, EDI )
170848b8605Smrg
171848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
172848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
173848b8605Smrg
174848b8605Smrg	TEST_L( ECX, ECX )
175848b8605Smrg	JZ( LLBL(x86_p2_pr_done) )
176848b8605Smrg
177848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
178848b8605Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
179848b8605Smrg
180848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
181848b8605Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
182848b8605Smrg
183848b8605Smrg	SHL_L( CONST(4), ECX )
184848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
185848b8605Smrg
186848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
187848b8605Smrg	ADD_L( EDI, ECX )
188848b8605Smrg
189848b8605Smrg	MOV_L( MAT14, EBX )
190848b8605Smrg
191848b8605SmrgALIGNTEXT16
192848b8605SmrgLLBL(x86_p2_pr_loop):
193848b8605Smrg
194848b8605Smrg	FLD_S( SRC0 )			/* F4 */
195848b8605Smrg	FMUL_S( MAT0 )
196848b8605Smrg
197848b8605Smrg	FLD_S( SRC1 )			/* F1 F4 */
198848b8605Smrg	FMUL_S( MAT5 )
199848b8605Smrg
200848b8605Smrg	FXCH( ST(1) )			/* F4 F1 */
201848b8605Smrg	FSTP_S( DST0   )		/* F1 */
202848b8605Smrg	FSTP_S( DST1   )		/* */
203848b8605Smrg	MOV_L( EBX, DST2 )
204848b8605Smrg	MOV_L( CONST(FP_ZERO), DST3 )
205848b8605Smrg
206848b8605SmrgLLBL(x86_p2_pr_skip):
207848b8605Smrg
208848b8605Smrg	ADD_L( CONST(16), EDI )
209848b8605Smrg	ADD_L( EAX, ESI )
210848b8605Smrg	CMP_L( ECX, EDI )
211848b8605Smrg	JNE( LLBL(x86_p2_pr_loop) )
212848b8605Smrg
213848b8605SmrgLLBL(x86_p2_pr_done):
214848b8605Smrg
215848b8605Smrg	POP_L( EBX )
216848b8605Smrg	POP_L( EDI )
217848b8605Smrg	POP_L( ESI )
218848b8605Smrg	RET
219848b8605Smrg#undef FRAME_OFFSET
220848b8605Smrg
221848b8605Smrg
222848b8605Smrg
223848b8605Smrg
224848b8605SmrgALIGNTEXT16
225848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d )
226848b8605SmrgHIDDEN(_mesa_x86_transform_points2_3d)
227848b8605SmrgGLNAME( _mesa_x86_transform_points2_3d ):
228848b8605Smrg
229848b8605Smrg#define FRAME_OFFSET 8
230848b8605Smrg	PUSH_L( ESI )
231848b8605Smrg	PUSH_L( EDI )
232848b8605Smrg
233848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
234848b8605Smrg	MOV_L( ARG_DEST, EDI )
235848b8605Smrg
236848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
237848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
238848b8605Smrg
239848b8605Smrg	TEST_L( ECX, ECX )
240848b8605Smrg	JZ( LLBL(x86_p2_3dr_done) )
241848b8605Smrg
242848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
243848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
244848b8605Smrg
245848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
246848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
247848b8605Smrg
248848b8605Smrg	SHL_L( CONST(4), ECX )
249848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
250848b8605Smrg
251848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
252848b8605Smrg	ADD_L( EDI, ECX )
253848b8605Smrg
254848b8605SmrgALIGNTEXT16
255848b8605SmrgLLBL(x86_p2_3dr_loop):
256848b8605Smrg
257848b8605Smrg	FLD_S( SRC0 )			/* F4 */
258848b8605Smrg	FMUL_S( MAT0 )
259848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
260848b8605Smrg	FMUL_S( MAT1 )
261848b8605Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
262848b8605Smrg	FMUL_S( MAT2 )
263848b8605Smrg
264848b8605Smrg	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
265848b8605Smrg	FMUL_S( MAT4 )
266848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
267848b8605Smrg	FMUL_S( MAT5 )
268848b8605Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
269848b8605Smrg	FMUL_S( MAT6 )
270848b8605Smrg
271848b8605Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
272848b8605Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
273848b8605Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
274848b8605Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
275848b8605Smrg
276848b8605Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
277848b8605Smrg	FADD_S( MAT12 )
278848b8605Smrg	FXCH( ST(1) )			/* F5 F4 F6 */
279848b8605Smrg	FADD_S( MAT13 )
280848b8605Smrg	FXCH( ST(2) )			/* F6 F4 F5 */
281848b8605Smrg	FADD_S( MAT14 )
282848b8605Smrg
283848b8605Smrg	FXCH( ST(1) )			/* F4 F6 F5 */
284848b8605Smrg	FSTP_S( DST0 )			/* F6 F5 */
285848b8605Smrg	FXCH( ST(1) )			/* F5 F6 */
286848b8605Smrg	FSTP_S( DST1 )			/* F6 */
287848b8605Smrg	FSTP_S( DST2 )			/* */
288848b8605Smrg
289848b8605SmrgLLBL(x86_p2_3dr_skip):
290848b8605Smrg
291848b8605Smrg	ADD_L( CONST(16), EDI )
292848b8605Smrg	ADD_L( EAX, ESI )
293848b8605Smrg	CMP_L( ECX, EDI )
294848b8605Smrg	JNE( LLBL(x86_p2_3dr_loop) )
295848b8605Smrg
296848b8605SmrgLLBL(x86_p2_3dr_done):
297848b8605Smrg
298848b8605Smrg	POP_L( EDI )
299848b8605Smrg	POP_L( ESI )
300848b8605Smrg	RET
301848b8605Smrg#undef FRAME_OFFSET
302848b8605Smrg
303848b8605Smrg
304848b8605Smrg
305848b8605Smrg
306848b8605SmrgALIGNTEXT16
307848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
308848b8605SmrgHIDDEN(_mesa_x86_transform_points2_3d_no_rot)
309848b8605SmrgGLNAME( _mesa_x86_transform_points2_3d_no_rot ):
310848b8605Smrg
311848b8605Smrg#define FRAME_OFFSET 12
312848b8605Smrg	PUSH_L( ESI )
313848b8605Smrg	PUSH_L( EDI )
314848b8605Smrg	PUSH_L( EBX )
315848b8605Smrg
316848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
317848b8605Smrg	MOV_L( ARG_DEST, EDI )
318848b8605Smrg
319848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
320848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
321848b8605Smrg
322848b8605Smrg	TEST_L( ECX, ECX )
323848b8605Smrg	JZ( LLBL(x86_p2_3dnrr_done) )
324848b8605Smrg
325848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
326848b8605Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
327848b8605Smrg
328848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
329848b8605Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
330848b8605Smrg
331848b8605Smrg	SHL_L( CONST(4), ECX )
332848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
333848b8605Smrg
334848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
335848b8605Smrg	ADD_L( EDI, ECX )
336848b8605Smrg
337848b8605Smrg	MOV_L( MAT14, EBX )
338848b8605Smrg
339848b8605SmrgALIGNTEXT16
340848b8605SmrgLLBL(x86_p2_3dnrr_loop):
341848b8605Smrg
342848b8605Smrg	FLD_S( SRC0 )			/* F4 */
343848b8605Smrg	FMUL_S( MAT0 )
344848b8605Smrg
345848b8605Smrg	FLD_S( SRC1 )			/* F1 F4 */
346848b8605Smrg	FMUL_S( MAT5 )
347848b8605Smrg
348848b8605Smrg	FXCH( ST(1) )			/* F4 F1 */
349848b8605Smrg	FADD_S( MAT12 )
350848b8605Smrg	FLD_S( MAT13 )		/* F5 F4 F1 */
351848b8605Smrg	FXCH( ST(2) )			/* F1 F4 F5 */
352848b8605Smrg	FADDP( ST0, ST(2) )		/* F4 F5 */
353848b8605Smrg
354848b8605Smrg	FSTP_S( DST0 )		/* F5 */
355848b8605Smrg	FSTP_S( DST1 )		/* */
356848b8605Smrg	MOV_L( EBX, DST2 )
357848b8605Smrg
358848b8605SmrgLLBL(x86_p2_3dnrr_skip):
359848b8605Smrg
360848b8605Smrg	ADD_L( CONST(16), EDI )
361848b8605Smrg	ADD_L( EAX, ESI )
362848b8605Smrg	CMP_L( ECX, EDI )
363848b8605Smrg	JNE( LLBL(x86_p2_3dnrr_loop) )
364848b8605Smrg
365848b8605SmrgLLBL(x86_p2_3dnrr_done):
366848b8605Smrg
367848b8605Smrg	POP_L( EBX )
368848b8605Smrg	POP_L( EDI )
369848b8605Smrg	POP_L( ESI )
370848b8605Smrg	RET
371848b8605Smrg#undef FRAME_OFFSET
372848b8605Smrg
373848b8605Smrg
374848b8605Smrg
375848b8605Smrg
376848b8605SmrgALIGNTEXT16
377848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d )
378848b8605SmrgHIDDEN(_mesa_x86_transform_points2_2d)
379848b8605SmrgGLNAME( _mesa_x86_transform_points2_2d ):
380848b8605Smrg
381848b8605Smrg#define FRAME_OFFSET 8
382848b8605Smrg	PUSH_L( ESI )
383848b8605Smrg	PUSH_L( EDI )
384848b8605Smrg
385848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
386848b8605Smrg	MOV_L( ARG_DEST, EDI )
387848b8605Smrg
388848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
389848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
390848b8605Smrg
391848b8605Smrg	TEST_L( ECX, ECX )
392848b8605Smrg	JZ( LLBL(x86_p2_2dr_done) )
393848b8605Smrg
394848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
395848b8605Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
396848b8605Smrg
397848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
398848b8605Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
399848b8605Smrg
400848b8605Smrg	SHL_L( CONST(4), ECX )
401848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
402848b8605Smrg
403848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
404848b8605Smrg	ADD_L( EDI, ECX )
405848b8605Smrg
406848b8605SmrgALIGNTEXT16
407848b8605SmrgLLBL(x86_p2_2dr_loop):
408848b8605Smrg
409848b8605Smrg	FLD_S( SRC0 )			/* F4 */
410848b8605Smrg	FMUL_S( MAT0 )
411848b8605Smrg	FLD_S( SRC0 )			/* F5 F4 */
412848b8605Smrg	FMUL_S( MAT1 )
413848b8605Smrg
414848b8605Smrg	FLD_S( SRC1 )			/* F0 F5 F4 */
415848b8605Smrg	FMUL_S( MAT4 )
416848b8605Smrg	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
417848b8605Smrg	FMUL_S( MAT5 )
418848b8605Smrg
419848b8605Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
420848b8605Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
421848b8605Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
422848b8605Smrg
423848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
424848b8605Smrg	FADD_S( MAT12 )
425848b8605Smrg	FXCH( ST(1) )			/* F5 F4 */
426848b8605Smrg	FADD_S( MAT13 )
427848b8605Smrg
428848b8605Smrg	FXCH( ST(1) )			/* F4 F5 */
429848b8605Smrg	FSTP_S( DST0 )		/* F5 */
430848b8605Smrg	FSTP_S( DST1 )		/* */
431848b8605Smrg
432848b8605SmrgLLBL(x86_p2_2dr_skip):
433848b8605Smrg
434848b8605Smrg	ADD_L( CONST(16), EDI )
435848b8605Smrg	ADD_L( EAX, ESI )
436848b8605Smrg	CMP_L( ECX, EDI )
437848b8605Smrg	JNE( LLBL(x86_p2_2dr_loop) )
438848b8605Smrg
439848b8605SmrgLLBL(x86_p2_2dr_done):
440848b8605Smrg
441848b8605Smrg	POP_L( EDI )
442848b8605Smrg	POP_L( ESI )
443848b8605Smrg	RET
444848b8605Smrg#undef FRAME_OFFSET
445848b8605Smrg
446848b8605Smrg
447848b8605Smrg
448848b8605Smrg
449848b8605SmrgALIGNTEXT4
450848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
451848b8605SmrgHIDDEN(_mesa_x86_transform_points2_2d_no_rot)
452848b8605SmrgGLNAME( _mesa_x86_transform_points2_2d_no_rot ):
453848b8605Smrg
454848b8605Smrg#define FRAME_OFFSET 8
455848b8605Smrg	PUSH_L( ESI )
456848b8605Smrg	PUSH_L( EDI )
457848b8605Smrg
458848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
459848b8605Smrg	MOV_L( ARG_DEST, EDI )
460848b8605Smrg
461848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
462848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
463848b8605Smrg
464848b8605Smrg	TEST_L( ECX, ECX )
465848b8605Smrg	JZ( LLBL(x86_p2_2dnrr_done) )
466848b8605Smrg
467848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
468848b8605Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
469848b8605Smrg
470848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
471848b8605Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
472848b8605Smrg
473848b8605Smrg	SHL_L( CONST(4), ECX )
474848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
475848b8605Smrg
476848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
477848b8605Smrg	ADD_L( EDI, ECX )
478848b8605Smrg
479848b8605SmrgALIGNTEXT16
480848b8605SmrgLLBL(x86_p2_2dnrr_loop):
481848b8605Smrg
482848b8605Smrg	FLD_S( SRC0 )			/* F4 */
483848b8605Smrg	FMUL_S( MAT0 )
484848b8605Smrg
485848b8605Smrg	FLD_S( SRC1 )			/* F1 F4 */
486848b8605Smrg	FMUL_S( MAT5 )
487848b8605Smrg
488848b8605Smrg	FXCH( ST(1) )			/* F4 F1 */
489848b8605Smrg	FADD_S( MAT12 )
490848b8605Smrg	FLD_S( MAT13 )		/* F5 F4 F1 */
491848b8605Smrg	FXCH( ST(2) )			/* F1 F4 F5 */
492848b8605Smrg	FADDP( ST0, ST(2) )		/* F4 F5 */
493848b8605Smrg
494848b8605Smrg	FSTP_S( DST0   )		/* F5 */
495848b8605Smrg	FSTP_S( DST1   )		/* */
496848b8605Smrg
497848b8605SmrgLLBL(x86_p2_2dnrr_skip):
498848b8605Smrg
499848b8605Smrg	ADD_L( CONST(16), EDI )
500848b8605Smrg	ADD_L( EAX, ESI )
501848b8605Smrg	CMP_L( ECX, EDI )
502848b8605Smrg	JNE( LLBL(x86_p2_2dnrr_loop) )
503848b8605Smrg
504848b8605SmrgLLBL(x86_p2_2dnrr_done):
505848b8605Smrg
506848b8605Smrg	POP_L( EDI )
507848b8605Smrg	POP_L( ESI )
508848b8605Smrg	RET
509848b8605Smrg#undef FRAME_OFFSET
510848b8605Smrg
511848b8605Smrg
512848b8605Smrg
513848b8605Smrg
514848b8605SmrgALIGNTEXT16
515848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_identity )
516848b8605SmrgHIDDEN(_mesa_x86_transform_points2_identity)
517848b8605SmrgGLNAME( _mesa_x86_transform_points2_identity ):
518848b8605Smrg
519848b8605Smrg#define FRAME_OFFSET 12
520848b8605Smrg	PUSH_L( ESI )
521848b8605Smrg	PUSH_L( EDI )
522848b8605Smrg	PUSH_L( EBX )
523848b8605Smrg
524848b8605Smrg	MOV_L( ARG_SOURCE, ESI )
525848b8605Smrg	MOV_L( ARG_DEST, EDI )
526848b8605Smrg
527848b8605Smrg	MOV_L( ARG_MATRIX, EDX )
528848b8605Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
529848b8605Smrg
530848b8605Smrg	TEST_L( ECX, ECX )
531848b8605Smrg	JZ( LLBL(x86_p2_ir_done) )
532848b8605Smrg
533848b8605Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
534848b8605Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
535848b8605Smrg
536848b8605Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
537848b8605Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
538848b8605Smrg
539848b8605Smrg	SHL_L( CONST(4), ECX )
540848b8605Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
541848b8605Smrg
542848b8605Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
543848b8605Smrg	ADD_L( EDI, ECX )
544848b8605Smrg
545848b8605Smrg	CMP_L( ESI, EDI )
546848b8605Smrg	JE( LLBL(x86_p2_ir_done) )
547848b8605Smrg
548848b8605SmrgALIGNTEXT16
549848b8605SmrgLLBL(x86_p2_ir_loop):
550848b8605Smrg
551848b8605Smrg	MOV_L( SRC0, EBX )
552848b8605Smrg	MOV_L( SRC1, EDX )
553848b8605Smrg
554848b8605Smrg	MOV_L( EBX, DST0 )
555848b8605Smrg	MOV_L( EDX, DST1 )
556848b8605Smrg
557848b8605SmrgLLBL(x86_p2_ir_skip):
558848b8605Smrg
559848b8605Smrg	ADD_L( CONST(16), EDI )
560848b8605Smrg	ADD_L( EAX, ESI )
561848b8605Smrg	CMP_L( ECX, EDI )
562848b8605Smrg	JNE( LLBL(x86_p2_ir_loop) )
563848b8605Smrg
564848b8605SmrgLLBL(x86_p2_ir_done):
565848b8605Smrg
566848b8605Smrg	POP_L( EBX )
567848b8605Smrg	POP_L( EDI )
568848b8605Smrg	POP_L( ESI )
569848b8605Smrg	RET
570848b8605Smrg#undef FRAME_OFFSET
571848b8605Smrg
572848b8605Smrg#if defined (__ELF__) && defined (__linux__)
573848b8605Smrg	.section .note.GNU-stack,"",%progbits
574848b8605Smrg#endif
575