1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg#ifdef USE_3DNOW_ASM
27848b8605Smrg#include "assyntax.h"
28848b8605Smrg#include "matypes.h"
29848b8605Smrg#include "xform_args.h"
30848b8605Smrg
31848b8605Smrg    SEG_TEXT
32848b8605Smrg
33848b8605Smrg#define FRAME_OFFSET	4
34848b8605Smrg
35848b8605Smrg
36848b8605SmrgALIGNTEXT16
37848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_general )
38848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_general)
39848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_general ):
40848b8605Smrg
41848b8605Smrg    PUSH_L    ( ESI )
42848b8605Smrg
43848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
44848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
45848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
46848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50848b8605Smrg
51848b8605Smrg    PUSH_L    ( EDI )
52848b8605Smrg
53848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
54848b8605Smrg    MOV_L     ( ESI, ECX )
55848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58848b8605Smrg
59848b8605Smrg    TEST_L    ( ESI, ESI )
60848b8605Smrg    JZ        ( LLBL( G3TPGR_3 ) )
61848b8605Smrg
62848b8605Smrg    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
63848b8605Smrg    MOVQ      ( REGOFF(8, ECX), MM1 )	/* m03             | m02             */
64848b8605Smrg
65848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
66848b8605Smrg    MOVQ      ( REGOFF(56, ECX), MM3 )	/* m33             | m32             */
67848b8605Smrg
68848b8605SmrgALIGNTEXT16
69848b8605SmrgLLBL( G3TPGR_2 ):
70848b8605Smrg
71848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
72848b8605Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
73848b8605Smrg
74848b8605Smrg    MOVQ      ( MM4, MM5 )		/* x0              | x0              */
75848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
76848b8605Smrg
77848b8605Smrg    PFMUL     ( MM1, MM5 )		/* x0*m03          | x0*m02          */
78848b8605Smrg    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
79848b8605Smrg
80848b8605Smrg    PFADD     ( MM3, MM5 )		/* x0*m03+m33      | x0*m02+m32      */
81848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
82848b8605Smrg
83848b8605Smrg    MOVQ      ( MM5, REGOFF(8, EDX) )	/* write r3, r2                      */
84848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
85848b8605Smrg
86848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
87848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
88848b8605Smrg
89848b8605Smrg    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
90848b8605Smrg
91848b8605SmrgLLBL( G3TPGR_3 ):
92848b8605Smrg
93848b8605Smrg    FEMMS
94848b8605Smrg    POP_L     ( EDI )
95848b8605Smrg    POP_L     ( ESI )
96848b8605Smrg    RET
97848b8605Smrg
98848b8605Smrg
99848b8605Smrg
100848b8605Smrg
101848b8605SmrgALIGNTEXT16
102848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
103848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_identity)
104848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_identity ):
105848b8605Smrg
106848b8605Smrg    PUSH_L    ( ESI )
107848b8605Smrg
108848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
109848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
110848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
111848b8605Smrg    MOV_L     ( CONST(1), REGOFF(V4F_SIZE, ECX) )
112848b8605Smrg    OR_B      ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
113848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
114848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
115848b8605Smrg
116848b8605Smrg    PUSH_L    ( EDI )
117848b8605Smrg
118848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
119848b8605Smrg    MOV_L     ( ESI, ECX )
120848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
121848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
122848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
123848b8605Smrg
124848b8605Smrg    TEST_L    ( ESI, ESI )
125848b8605Smrg    JZ        ( LLBL( G3TPIR_4) )
126848b8605Smrg
127848b8605SmrgALIGNTEXT16
128848b8605SmrgLLBL( G3TPIR_3 ):
129848b8605Smrg
130848b8605Smrg    MOVD      ( REGIND(EAX), MM0 )	/*                 | x0              */
131848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
132848b8605Smrg
133848b8605Smrg    MOVD      ( MM0, REGIND(EDX) )	/*                 | r0              */
134848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
135848b8605Smrg
136848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
137848b8605Smrg    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
138848b8605Smrg
139848b8605SmrgLLBL( G3TPIR_4 ):
140848b8605Smrg
141848b8605Smrg    FEMMS
142848b8605Smrg    POP_L     ( EDI )
143848b8605Smrg    POP_L     ( ESI )
144848b8605Smrg    RET
145848b8605Smrg
146848b8605Smrg
147848b8605Smrg
148848b8605Smrg
149848b8605SmrgALIGNTEXT16
150848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
151848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_3d_no_rot)
152848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
153848b8605Smrg
154848b8605Smrg    PUSH_L    ( ESI )
155848b8605Smrg
156848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
157848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
158848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
159848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
160848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
161848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
162848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
163848b8605Smrg
164848b8605Smrg    PUSH_L    ( EDI )
165848b8605Smrg
166848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
167848b8605Smrg    MOV_L     ( ESI, ECX )
168848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
169848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
170848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
171848b8605Smrg
172848b8605Smrg    TEST_L    ( ESI, ESI )
173848b8605Smrg    JZ        ( LLBL( G3TP3NRR_3 ) )
174848b8605Smrg
175848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
176848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
177848b8605Smrg
178848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
179848b8605Smrg
180848b8605SmrgALIGNTEXT16
181848b8605SmrgLLBL( G3TP3NRR_2 ):
182848b8605Smrg
183848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
184848b8605Smrg    PFMUL     ( MM0, MM4 )		/*                 | x0*m00          */
185848b8605Smrg
186848b8605Smrg    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
187848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
188848b8605Smrg
189848b8605Smrg    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
190848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
191848b8605Smrg
192848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
193848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
194848b8605Smrg
195848b8605Smrg    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
196848b8605Smrg
197848b8605SmrgLLBL( G3TP3NRR_3 ):
198848b8605Smrg
199848b8605Smrg    FEMMS
200848b8605Smrg    POP_L     ( EDI )
201848b8605Smrg    POP_L     ( ESI )
202848b8605Smrg    RET
203848b8605Smrg
204848b8605Smrg
205848b8605Smrg
206848b8605Smrg
207848b8605SmrgALIGNTEXT16
208848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
209848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_perspective)
210848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_perspective ):
211848b8605Smrg
212848b8605Smrg    PUSH_L    ( ESI )
213848b8605Smrg
214848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
215848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
216848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
217848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
218848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
219848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
220848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
221848b8605Smrg
222848b8605Smrg    PUSH_L    ( EDI )
223848b8605Smrg
224848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
225848b8605Smrg    MOV_L     ( ESI, ECX )
226848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
227848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
228848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
229848b8605Smrg
230848b8605Smrg    TEST_L    ( ESI, ESI )
231848b8605Smrg    JZ        ( LLBL( G3TPPR_3 ) )
232848b8605Smrg
233848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
234848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
235848b8605Smrg
236848b8605SmrgALIGNTEXT16
237848b8605SmrgLLBL( G3TPPR_2 ):
238848b8605Smrg
239848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/* 0               | x0              */
240848b8605Smrg    PFMUL     ( MM0, MM4 )		/* 0               | x0*m00          */
241848b8605Smrg
242848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
243848b8605Smrg    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
244848b8605Smrg
245848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
246848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
247848b8605Smrg
248848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
249848b8605Smrg    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
250848b8605Smrg
251848b8605SmrgLLBL( G3TPPR_3 ):
252848b8605Smrg
253848b8605Smrg    FEMMS
254848b8605Smrg    POP_L     ( EDI )
255848b8605Smrg    POP_L     ( ESI )
256848b8605Smrg    RET
257848b8605Smrg
258848b8605Smrg
259848b8605Smrg
260848b8605Smrg
261848b8605SmrgALIGNTEXT16
262848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
263848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_2d)
264848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_2d ):
265848b8605Smrg
266848b8605Smrg    PUSH_L    ( ESI )
267848b8605Smrg
268848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
269848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
270848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
271848b8605Smrg    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
272848b8605Smrg    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
273848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
274848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
275848b8605Smrg
276848b8605Smrg    PUSH_L    ( EDI )
277848b8605Smrg
278848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
279848b8605Smrg    MOV_L     ( ESI, ECX )
280848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
281848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
282848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
283848b8605Smrg
284848b8605Smrg    TEST_L    ( ESI, ESI )
285848b8605Smrg    JZ        ( LLBL( G3TP2R_3 ) )
286848b8605Smrg
287848b8605Smrg    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
288848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
289848b8605Smrg
290848b8605SmrgALIGNTEXT16
291848b8605SmrgLLBL( G3TP2R_2 ):
292848b8605Smrg
293848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
294848b8605Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
295848b8605Smrg
296848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
297848b8605Smrg    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
298848b8605Smrg
299848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
300848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
301848b8605Smrg
302848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
303848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
304848b8605Smrg
305848b8605Smrg    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
306848b8605Smrg
307848b8605SmrgLLBL( G3TP2R_3 ):
308848b8605Smrg
309848b8605Smrg    FEMMS
310848b8605Smrg    POP_L     ( EDI )
311848b8605Smrg    POP_L     ( ESI )
312848b8605Smrg    RET
313848b8605Smrg
314848b8605Smrg
315848b8605Smrg
316848b8605Smrg
317848b8605SmrgALIGNTEXT16
318848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
319848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_2d_no_rot)
320848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
321848b8605Smrg
322848b8605Smrg    PUSH_L    ( ESI )
323848b8605Smrg
324848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
325848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
326848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
327848b8605Smrg    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
328848b8605Smrg    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
329848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
330848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
331848b8605Smrg
332848b8605Smrg    PUSH_L    ( EDI )
333848b8605Smrg
334848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
335848b8605Smrg    MOV_L     ( ESI, ECX )
336848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
337848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
338848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
339848b8605Smrg
340848b8605Smrg    TEST_L    ( ESI, ESI )
341848b8605Smrg    JZ        ( LLBL( G3TP2NRR_3 ) )
342848b8605Smrg
343848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
344848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
345848b8605Smrg
346848b8605SmrgALIGNTEXT16
347848b8605SmrgLLBL( G3TP2NRR_2 ):
348848b8605Smrg
349848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
350848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
351848b8605Smrg
352848b8605Smrg    PFMUL     ( MM0, MM4 )		/*                 | x0*m00          */
353848b8605Smrg    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
354848b8605Smrg
355848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
356848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
357848b8605Smrg
358848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
359848b8605Smrg    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
360848b8605Smrg
361848b8605SmrgLLBL( G3TP2NRR_3 ):
362848b8605Smrg
363848b8605Smrg    FEMMS
364848b8605Smrg    POP_L     ( EDI )
365848b8605Smrg    POP_L     ( ESI )
366848b8605Smrg    RET
367848b8605Smrg
368848b8605Smrg
369848b8605Smrg
370848b8605Smrg
371848b8605SmrgALIGNTEXT16
372848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
373848b8605SmrgHIDDEN(_mesa_3dnow_transform_points1_3d)
374848b8605SmrgGLNAME( _mesa_3dnow_transform_points1_3d ):
375848b8605Smrg
376848b8605Smrg    PUSH_L    ( ESI )
377848b8605Smrg
378848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
379848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
380848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
381848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
382848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
383848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
384848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
385848b8605Smrg
386848b8605Smrg    PUSH_L    ( EDI )
387848b8605Smrg
388848b8605Smrg    MOV_L     ( REGOFF(4, ECX), EDX )
389848b8605Smrg    MOV_L     ( ESI, ECX )
390848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
391848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
392848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
393848b8605Smrg
394848b8605Smrg    TEST_L    ( ESI, ESI )
395848b8605Smrg    JZ        ( LLBL( G3TP3R_3 ) )
396848b8605Smrg
397848b8605Smrg    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
398848b8605Smrg    MOVD      ( REGOFF(8, ECX), MM1 )	/*                 | m02             */
399848b8605Smrg
400848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
401848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
402848b8605Smrg
403848b8605SmrgALIGNTEXT16
404848b8605SmrgLLBL( G3TP3R_2 ):
405848b8605Smrg
406848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
407848b8605Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
408848b8605Smrg
409848b8605Smrg    MOVQ      ( MM4, MM5 )		/*                 | x0              */
410848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
411848b8605Smrg
412848b8605Smrg    PFMUL     ( MM1, MM5 )		/*                 | x0*m02          */
413848b8605Smrg    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
414848b8605Smrg
415848b8605Smrg    PFADD     ( MM3, MM5 )		/*                 | x0*m02+m32      */
416848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
417848b8605Smrg
418848b8605Smrg    MOVD      ( MM5, REGOFF(8, EDX) )	/* write r2                          */
419848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
420848b8605Smrg
421848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
422848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
423848b8605Smrg
424848b8605Smrg    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
425848b8605Smrg
426848b8605SmrgLLBL( G3TP3R_3 ):
427848b8605Smrg
428848b8605Smrg    FEMMS
429848b8605Smrg    POP_L     ( EDI )
430848b8605Smrg    POP_L     ( ESI )
431848b8605Smrg    RET
432848b8605Smrg
433848b8605Smrg#endif
434848b8605Smrg
435848b8605Smrg#if defined (__ELF__) && defined (__linux__)
436848b8605Smrg	.section .note.GNU-stack,"",%progbits
437848b8605Smrg#endif
438