1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg#ifdef USE_3DNOW_ASM
27848b8605Smrg#include "assyntax.h"
28848b8605Smrg#include "matypes.h"
29848b8605Smrg#include "xform_args.h"
30848b8605Smrg
31848b8605Smrg    SEG_TEXT
32848b8605Smrg
33848b8605Smrg#define FRAME_OFFSET	4
34848b8605Smrg
35848b8605Smrg
36848b8605SmrgALIGNTEXT16
37848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_general )
38848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_general)
39848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_general ):
40848b8605Smrg
41848b8605Smrg    PUSH_L    ( ESI )
42848b8605Smrg
43848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
44848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
45848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
46848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50848b8605Smrg
51848b8605Smrg    PUSH_L    ( EDI )
52848b8605Smrg
53848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
54848b8605Smrg    MOV_L     ( ESI, ECX )
55848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58848b8605Smrg
59848b8605Smrg    TEST_L    ( ESI, ESI )
60848b8605Smrg    JZ        ( LLBL( G3TPGR_3 ) )
61848b8605Smrg
62848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
63848b8605Smrg    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
64848b8605Smrg
65848b8605Smrg    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
66848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
67848b8605Smrg
68848b8605Smrg    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
69848b8605Smrg    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
70848b8605Smrg
71848b8605Smrg    MOVD      ( REGOFF(12, ECX), MM3 )	/*                 | m03             */
72848b8605Smrg    PUNPCKLDQ ( REGOFF(28, ECX), MM3 )	/* m13             | m03             */
73848b8605Smrg
74848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
75848b8605Smrg    MOVQ      ( REGOFF(56, ECX), MM5 )	/* m33             | m32             */
76848b8605Smrg
77848b8605SmrgALIGNTEXT16
78848b8605SmrgLLBL( G3TPGR_2 ):
79848b8605Smrg
80848b8605Smrg    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
81848b8605Smrg    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
82848b8605Smrg
83848b8605Smrg    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
84848b8605Smrg    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
85848b8605Smrg
86848b8605Smrg    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
87848b8605Smrg    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
88848b8605Smrg
89848b8605Smrg    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
90848b8605Smrg    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
91848b8605Smrg
92848b8605Smrg    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
93848b8605Smrg    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
94848b8605Smrg
95848b8605Smrg    PFMUL     ( MM3, MM7 )		/* x1*m13          | x0*m03          */
96848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
97848b8605Smrg
98848b8605Smrg    PFACC     ( MM7, MM6 )		/* x0*m03+x1*m13   | x0*x02+x1*m12   */
99848b8605Smrg    PFADD     ( MM5, MM6 )		/* x0*...*m13+m33  | x0*...*m12+m32  */
100848b8605Smrg
101848b8605Smrg    MOVQ      ( MM6, REGOFF(8, EDX) )	/* write r3, r2                      */
102848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
103848b8605Smrg
104848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
105848b8605Smrg    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
106848b8605Smrg
107848b8605SmrgLLBL( G3TPGR_3 ):
108848b8605Smrg
109848b8605Smrg    FEMMS
110848b8605Smrg    POP_L     ( EDI )
111848b8605Smrg    POP_L     ( ESI )
112848b8605Smrg    RET
113848b8605Smrg
114848b8605Smrg
115848b8605Smrg
116848b8605Smrg
117848b8605SmrgALIGNTEXT16
118848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
119848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_perspective)
120848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_perspective ):
121848b8605Smrg
122848b8605Smrg    PUSH_L    ( ESI )
123848b8605Smrg
124848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
125848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
126848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
127848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
128848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
129848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
130848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
131848b8605Smrg
132848b8605Smrg    PUSH_L    ( EDI )
133848b8605Smrg
134848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
135848b8605Smrg    MOV_L     ( ESI, ECX )
136848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
137848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
138848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
139848b8605Smrg
140848b8605Smrg    TEST_L    ( ESI, ESI )
141848b8605Smrg    JZ        ( LLBL( G3TPPR_3 ) )
142848b8605Smrg
143848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
144848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
145848b8605Smrg
146848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
147848b8605Smrg
148848b8605SmrgALIGNTEXT16
149848b8605SmrgLLBL( G3TPPR_2 ):
150848b8605Smrg
151848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
152848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
153848b8605Smrg
154848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
155848b8605Smrg    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
156848b8605Smrg
157848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
158848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
159848b8605Smrg
160848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
161848b8605Smrg    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
162848b8605Smrg
163848b8605SmrgLLBL( G3TPPR_3 ):
164848b8605Smrg
165848b8605Smrg    FEMMS
166848b8605Smrg    POP_L     ( EDI )
167848b8605Smrg    POP_L     ( ESI )
168848b8605Smrg    RET
169848b8605Smrg
170848b8605Smrg
171848b8605Smrg
172848b8605Smrg
173848b8605SmrgALIGNTEXT16
174848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
175848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_3d)
176848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_3d ):
177848b8605Smrg
178848b8605Smrg    PUSH_L    ( ESI )
179848b8605Smrg
180848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
181848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
182848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
183848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
184848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
185848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
186848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
187848b8605Smrg
188848b8605Smrg    PUSH_L    ( EDI )
189848b8605Smrg
190848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
191848b8605Smrg    MOV_L     ( ESI, ECX )
192848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
193848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
194848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
195848b8605Smrg
196848b8605Smrg    TEST_L    ( ESI, ESI )
197848b8605Smrg    JZ        ( LLBL( G3TP3R_3 ) )
198848b8605Smrg
199848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
200848b8605Smrg    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
201848b8605Smrg
202848b8605Smrg    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
203848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
204848b8605Smrg
205848b8605Smrg    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
206848b8605Smrg    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
207848b8605Smrg
208848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
209848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM5 )	/*                 | m32             */
210848b8605Smrg
211848b8605SmrgALIGNTEXT16
212848b8605SmrgLLBL( G3TP3R_2 ):
213848b8605Smrg
214848b8605Smrg    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
215848b8605Smrg    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
216848b8605Smrg
217848b8605Smrg    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
218848b8605Smrg    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
219848b8605Smrg
220848b8605Smrg    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
221848b8605Smrg    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
222848b8605Smrg
223848b8605Smrg    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
224848b8605Smrg    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
225848b8605Smrg
226848b8605Smrg    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
227848b8605Smrg    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
228848b8605Smrg
229848b8605Smrg    PFACC     ( MM7, MM6 )		/* ***trash***     | x0*x02+x1*m12   */
230848b8605Smrg    PFADD     ( MM5, MM6 )		/* ***trash***     | x0*...*m12+m32  */
231848b8605Smrg
232848b8605Smrg    MOVD      ( MM6, REGOFF(8, EDX) )	/* write r2                          */
233848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
234848b8605Smrg
235848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
236848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
237848b8605Smrg
238848b8605Smrg    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
239848b8605Smrg
240848b8605SmrgLLBL( G3TP3R_3 ):
241848b8605Smrg
242848b8605Smrg    FEMMS
243848b8605Smrg    POP_L     ( EDI )
244848b8605Smrg    POP_L     ( ESI )
245848b8605Smrg    RET
246848b8605Smrg
247848b8605Smrg
248848b8605Smrg
249848b8605Smrg
250848b8605SmrgALIGNTEXT16
251848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
252848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
253848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
254848b8605Smrg
255848b8605Smrg    PUSH_L    ( ESI )
256848b8605Smrg
257848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
258848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
259848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
260848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
261848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
262848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
263848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
264848b8605Smrg
265848b8605Smrg    PUSH_L    ( EDI )
266848b8605Smrg
267848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
268848b8605Smrg    MOV_L     ( ESI, ECX )
269848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
270848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
271848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
272848b8605Smrg
273848b8605Smrg    TEST_L    ( ESI, ESI )
274848b8605Smrg    JZ        ( LLBL( G3TP3NRR_3 ) )
275848b8605Smrg
276848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
277848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
278848b8605Smrg
279848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
280848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
281848b8605Smrg
282848b8605SmrgALIGNTEXT16
283848b8605SmrgLLBL( G3TP3NRR_2 ):
284848b8605Smrg
285848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
286848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
287848b8605Smrg
288848b8605Smrg    PFADD     ( MM2, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
289848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
290848b8605Smrg
291848b8605Smrg    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
292848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
293848b8605Smrg
294848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
295848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
296848b8605Smrg
297848b8605Smrg    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
298848b8605Smrg
299848b8605SmrgLLBL( G3TP3NRR_3 ):
300848b8605Smrg
301848b8605Smrg    FEMMS
302848b8605Smrg    POP_L     ( EDI )
303848b8605Smrg    POP_L     ( ESI )
304848b8605Smrg    RET
305848b8605Smrg
306848b8605Smrg
307848b8605Smrg
308848b8605Smrg
309848b8605SmrgALIGNTEXT16
310848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
311848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_2d)
312848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_2d ):
313848b8605Smrg
314848b8605Smrg    PUSH_L    ( ESI )
315848b8605Smrg
316848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
317848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
318848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
319848b8605Smrg    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
320848b8605Smrg    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
321848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
322848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
323848b8605Smrg
324848b8605Smrg    PUSH_L    ( EDI )
325848b8605Smrg
326848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
327848b8605Smrg    MOV_L     ( ESI, ECX )
328848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
329848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
330848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
331848b8605Smrg
332848b8605Smrg    TEST_L    ( ESI, ESI )
333848b8605Smrg    JZ        ( LLBL( G3TP2R_3 ) )
334848b8605Smrg
335848b8605Smrg    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
336848b8605Smrg    MOVQ      ( REGOFF(16, ECX), MM1 )	/* m11             | m10             */
337848b8605Smrg
338848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
339848b8605Smrg
340848b8605SmrgALIGNTEXT16
341848b8605SmrgLLBL( G3TP2R_2 ):
342848b8605Smrg
343848b8605Smrg    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
344848b8605Smrg    MOVD      ( REGOFF(4, EAX), MM5 )	/*                 | x1              */
345848b8605Smrg
346848b8605Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
347848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
348848b8605Smrg
349848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
350848b8605Smrg    PUNPCKLDQ ( MM5, MM5 )		/* x1              | x1              */
351848b8605Smrg
352848b8605Smrg    PFMUL     ( MM1, MM5 )		/* x1*m11          | x1*m10          */
353848b8605Smrg    PFADD     ( MM2, MM4 )		/* x...x1*m11+31   | x0*..*m10+m30   */
354848b8605Smrg
355848b8605Smrg    PFADD     ( MM5, MM4 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
356848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
357848b8605Smrg
358848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
359848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
360848b8605Smrg
361848b8605Smrg    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
362848b8605Smrg
363848b8605SmrgLLBL( G3TP2R_3 ):
364848b8605Smrg
365848b8605Smrg    FEMMS
366848b8605Smrg    POP_L     ( EDI )
367848b8605Smrg    POP_L     ( ESI )
368848b8605Smrg    RET
369848b8605Smrg
370848b8605Smrg
371848b8605Smrg
372848b8605Smrg
373848b8605SmrgALIGNTEXT16
374848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
375848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
376848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
377848b8605Smrg
378848b8605Smrg    PUSH_L    ( ESI )
379848b8605Smrg
380848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
381848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
382848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
383848b8605Smrg    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
384848b8605Smrg    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
385848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
386848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
387848b8605Smrg
388848b8605Smrg    PUSH_L    ( EDI )
389848b8605Smrg
390848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
391848b8605Smrg    MOV_L     ( ESI, ECX )
392848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
393848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
394848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
395848b8605Smrg
396848b8605Smrg    TEST_L    ( ESI, ESI )
397848b8605Smrg    JZ        ( LLBL( G3TP2NRR_3 ) )
398848b8605Smrg
399848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
400848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
401848b8605Smrg
402848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
403848b8605Smrg
404848b8605SmrgALIGNTEXT16
405848b8605SmrgLLBL( G3TP2NRR_2 ):
406848b8605Smrg
407848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
408848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
409848b8605Smrg
410848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
411848b8605Smrg    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
412848b8605Smrg
413848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
414848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
415848b8605Smrg
416848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
417848b8605Smrg    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
418848b8605Smrg
419848b8605SmrgLLBL( G3TP2NRR_3 ):
420848b8605Smrg
421848b8605Smrg    FEMMS
422848b8605Smrg    POP_L     ( EDI )
423848b8605Smrg    POP_L     ( ESI )
424848b8605Smrg    RET
425848b8605Smrg
426848b8605Smrg
427848b8605Smrg
428848b8605Smrg
429848b8605SmrgALIGNTEXT16
430848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
431848b8605SmrgHIDDEN(_mesa_3dnow_transform_points2_identity)
432848b8605SmrgGLNAME( _mesa_3dnow_transform_points2_identity ):
433848b8605Smrg
434848b8605Smrg    PUSH_L    ( ESI )
435848b8605Smrg
436848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
437848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
438848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
439848b8605Smrg    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
440848b8605Smrg    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
441848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
442848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
443848b8605Smrg
444848b8605Smrg    PUSH_L    ( EDI )
445848b8605Smrg
446848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
447848b8605Smrg    MOV_L     ( ESI, ECX )
448848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
449848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
450848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
451848b8605Smrg
452848b8605Smrg    TEST_L    ( ESI, ESI )
453848b8605Smrg    JZ        ( LLBL( G3TPIR_3 ) )
454848b8605Smrg
455848b8605SmrgALIGNTEXT16
456848b8605SmrgLLBL( G3TPIR_3 ):
457848b8605Smrg
458848b8605Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
459848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
460848b8605Smrg
461848b8605Smrg    MOVQ      ( MM0, REGIND(EDX) )	/* r1              | r0              */
462848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
463848b8605Smrg
464848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
465848b8605Smrg    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
466848b8605Smrg
467848b8605SmrgLLBL( G3TPIR_4 ):
468848b8605Smrg
469848b8605Smrg    FEMMS
470848b8605Smrg    POP_L     ( EDI )
471848b8605Smrg    POP_L     ( ESI )
472848b8605Smrg    RET
473848b8605Smrg#endif
474848b8605Smrg
475848b8605Smrg#if defined (__ELF__) && defined (__linux__)
476848b8605Smrg	.section .note.GNU-stack,"",%progbits
477848b8605Smrg#endif
478