1848b8605Smrg
2848b8605Smrg/*
3848b8605Smrg * Mesa 3-D graphics library
4848b8605Smrg *
5848b8605Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6848b8605Smrg *
7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg * copy of this software and associated documentation files (the "Software"),
9848b8605Smrg * to deal in the Software without restriction, including without limitation
10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
12848b8605Smrg * Software is furnished to do so, subject to the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice shall be included
15848b8605Smrg * in all copies or substantial portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg */
25848b8605Smrg
26848b8605Smrg#ifdef USE_3DNOW_ASM
27848b8605Smrg#include "assyntax.h"
28848b8605Smrg#include "matypes.h"
29848b8605Smrg#include "xform_args.h"
30848b8605Smrg
31848b8605Smrg    SEG_TEXT
32848b8605Smrg
33848b8605Smrg#define FRAME_OFFSET	4
34848b8605Smrg
35848b8605Smrg
36848b8605SmrgALIGNTEXT16
37848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_general )
38848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_general)
39848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_general ):
40848b8605Smrg
41848b8605Smrg    PUSH_L    ( ESI )
42848b8605Smrg
43848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
44848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
45848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
46848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50848b8605Smrg
51848b8605Smrg    PUSH_L    ( EDI )
52848b8605Smrg
53848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
54848b8605Smrg    MOV_L     ( ESI, ECX )
55848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58848b8605Smrg
59848b8605Smrg    TEST_L    ( ESI, ESI )
60848b8605Smrg    JZ        ( LLBL( G3TPGR_2 ) )
61848b8605Smrg
62848b8605Smrg    PREFETCHW ( REGIND(EDX) )
63848b8605Smrg
64848b8605SmrgALIGNTEXT16
65848b8605SmrgLLBL( G3TPGR_1 ):
66848b8605Smrg
67848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
68848b8605Smrg
69848b8605Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
70848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM2 )	/*                 | x2              */
71848b8605Smrg
72848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
73848b8605Smrg    PREFETCH  ( REGIND(EAX) )
74848b8605Smrg
75848b8605Smrg    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
76848b8605Smrg    PUNPCKLDQ ( MM2, MM2 )		/* x2              | x2              */
77848b8605Smrg
78848b8605Smrg    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
79848b8605Smrg    MOVQ      ( MM2, MM5 )		/* x2              | x2              */
80848b8605Smrg
81848b8605Smrg    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
82848b8605Smrg    PFMUL     ( REGOFF(32, ECX), MM2 )	/* x2*m9           | x2*m8           */
83848b8605Smrg
84848b8605Smrg    MOVQ      ( MM0, MM3 )		/* x0              | x0              */
85848b8605Smrg    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
86848b8605Smrg
87848b8605Smrg    MOVQ      ( MM1, MM4 )		/* x1              | x1              */
88848b8605Smrg    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
89848b8605Smrg
90848b8605Smrg    PFADD     ( REGOFF(48, ECX), MM2 )	/* x2*m9+m13       | x2*m8+m12       */
91848b8605Smrg    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
92848b8605Smrg
93848b8605Smrg    PFADD     ( REGOFF(56, ECX), MM5 )	/* x2*m11+m15      | x2*m10+m14      */
94848b8605Smrg    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
95848b8605Smrg
96848b8605Smrg    PFMUL     ( REGOFF(8, ECX), MM3 )	/* x0*m3           | x0*m2           */
97848b8605Smrg    PFADD     ( MM1, MM2 )		/* r1              | r0              */
98848b8605Smrg
99848b8605Smrg    PFMUL     ( REGOFF(24, ECX), MM4 )	/* x1*m7           | x1*m6           */
100848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next output vertex                */
101848b8605Smrg
102848b8605Smrg    PFADD     ( MM3, MM4 )		/* x0*m3+x1*m7     | x0*m2+x1*m6     */
103848b8605Smrg    MOVQ      ( MM2, REGOFF(-16, EDX) )	/* write r0, r1                      */
104848b8605Smrg
105848b8605Smrg    PFADD     ( MM4, MM5 )		/* r3              | r2              */
106848b8605Smrg    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
107848b8605Smrg
108848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
109848b8605Smrg    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
110848b8605Smrg
111848b8605SmrgLLBL( G3TPGR_2 ):
112848b8605Smrg
113848b8605Smrg    FEMMS
114848b8605Smrg    POP_L     ( EDI )
115848b8605Smrg    POP_L     ( ESI )
116848b8605Smrg    RET
117848b8605Smrg
118848b8605Smrg
119848b8605Smrg
120848b8605Smrg
121848b8605SmrgALIGNTEXT16
122848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
123848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_perspective)
124848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_perspective ):
125848b8605Smrg
126848b8605Smrg    PUSH_L    ( ESI )
127848b8605Smrg
128848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
129848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
130848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
131848b8605Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
132848b8605Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
133848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
134848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
135848b8605Smrg
136848b8605Smrg    PUSH_L    ( EDI )
137848b8605Smrg
138848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
139848b8605Smrg    MOV_L     ( ESI, ECX )
140848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
141848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
142848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
143848b8605Smrg
144848b8605Smrg    TEST_L    ( ESI, ESI )
145848b8605Smrg    JZ        ( LLBL( G3TPPR_2 ) )
146848b8605Smrg
147848b8605Smrg    PREFETCH  ( REGIND(EAX) )
148848b8605Smrg    PREFETCHW ( REGIND(EDX) )
149848b8605Smrg
150848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
151848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
152848b8605Smrg
153848b8605Smrg    MOVQ      ( REGOFF(32, ECX), MM1 )	/* m21             | m20             */
154848b8605Smrg    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
155848b8605Smrg
156848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
157848b8605Smrg
158848b8605SmrgALIGNTEXT16
159848b8605SmrgLLBL( G3TPPR_1 ):
160848b8605Smrg
161848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
162848b8605Smrg
163848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
164848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
165848b8605Smrg
166848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
167848b8605Smrg    PREFETCH  ( REGIND(EAX) )
168848b8605Smrg
169848b8605Smrg    PXOR      ( MM7, MM7 )		/* 0               | 0               */
170848b8605Smrg    MOVQ      ( MM5, MM6 )		/*                 | x2              */
171848b8605Smrg
172848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
173848b8605Smrg    PFSUB     ( MM5, MM7 )		/*                 | -x2             */
174848b8605Smrg
175848b8605Smrg    PFMUL     ( MM2, MM6 )		/*                 | x2*m22          */
176848b8605Smrg    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
177848b8605Smrg
178848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
179848b8605Smrg    PFMUL     ( MM1, MM5 )		/* x2*m21          | x2*m20          */
180848b8605Smrg
181848b8605Smrg    PFADD     ( MM3, MM6 )		/*                 | x2*m22+m32      */
182848b8605Smrg    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
183848b8605Smrg
184848b8605Smrg    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
185848b8605Smrg    MOVD      ( MM6, REGOFF(-8, EDX) )	/* write r2                          */
186848b8605Smrg
187848b8605Smrg    MOVD      ( MM7, REGOFF(-4, EDX) )	/* write r3                          */
188848b8605Smrg
189848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
190848b8605Smrg    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
191848b8605Smrg
192848b8605SmrgLLBL( G3TPPR_2 ):
193848b8605Smrg
194848b8605Smrg    FEMMS
195848b8605Smrg    POP_L     ( EDI )
196848b8605Smrg    POP_L     ( ESI )
197848b8605Smrg    RET
198848b8605Smrg
199848b8605Smrg
200848b8605Smrg
201848b8605Smrg
202848b8605SmrgALIGNTEXT16
203848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
204848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_3d)
205848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_3d ):
206848b8605Smrg
207848b8605Smrg    PUSH_L    ( ESI )
208848b8605Smrg
209848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
210848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
211848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
212848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
213848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
214848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
215848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
216848b8605Smrg
217848b8605Smrg    PUSH_L    ( EDI )
218848b8605Smrg
219848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
220848b8605Smrg    MOV_L     ( ESI, ECX )
221848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
222848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
223848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
224848b8605Smrg
225848b8605Smrg    TEST_L    ( ESI, ESI )
226848b8605Smrg    JZ        ( LLBL( G3TP3R_2 ) )
227848b8605Smrg
228848b8605Smrg    PREFETCH  ( REGIND(EAX) )
229848b8605Smrg    PREFETCH  ( REGIND(EDX) )
230848b8605Smrg
231848b8605Smrg    MOVD      ( REGOFF(8, ECX), MM7 )	/*                 | m2              */
232848b8605Smrg    PUNPCKLDQ ( REGOFF(24, ECX), MM7 )	/* m6              | m2              */
233848b8605Smrg
234848b8605Smrg
235848b8605SmrgALIGNTEXT16
236848b8605SmrgLLBL( G3TP3R_1 ):
237848b8605Smrg
238848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
239848b8605Smrg
240848b8605Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
241848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
242848b8605Smrg
243848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
244848b8605Smrg    PREFETCH  ( REGIND(EAX) )
245848b8605Smrg
246848b8605Smrg    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
247848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
248848b8605Smrg
249848b8605Smrg    PUNPCKLDQ ( MM2, MM2 )		/* x0              | x0              */
250848b8605Smrg    MOVQ      ( MM0, MM3 )		/* x1              | x0              */
251848b8605Smrg
252848b8605Smrg    PFMUL     ( REGIND(ECX), MM2 )	/* x0*m1           | x0*m0           */
253848b8605Smrg    PUNPCKHDQ ( MM3, MM3 )		/* x1              | x1              */
254848b8605Smrg
255848b8605Smrg    MOVQ      ( MM1, MM4 )		/*                 | x2              */
256848b8605Smrg    PFMUL     ( REGOFF(16, ECX), MM3 )	/* x1*m5           | x1*m4           */
257848b8605Smrg
258848b8605Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
259848b8605Smrg    PFADD     ( MM2, MM3 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
260848b8605Smrg
261848b8605Smrg    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
262848b8605Smrg    PFADD     ( REGOFF(48, ECX), MM3 )	/* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
263848b8605Smrg
264848b8605Smrg    PFMUL     ( MM7, MM0 )		/* x1*m6           | x0*m2           */
265848b8605Smrg    PFADD     ( MM4, MM3 )		/* r1              | r0              */
266848b8605Smrg
267848b8605Smrg    PFMUL     ( REGOFF(40, ECX), MM1 )	/*                 | x2*m10          */
268848b8605Smrg    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m14             | x2*m10          */
269848b8605Smrg
270848b8605Smrg    PFACC     ( MM0, MM1 )
271848b8605Smrg
272848b8605Smrg    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
273848b8605Smrg    PFACC     ( MM1, MM1 )		/*                 | r2              */
274848b8605Smrg
275848b8605Smrg    MOVD      ( MM1, REGOFF(-8, EDX) )	/* write r2                          */
276848b8605Smrg
277848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
278848b8605Smrg    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
279848b8605Smrg
280848b8605SmrgLLBL( G3TP3R_2 ):
281848b8605Smrg
282848b8605Smrg    FEMMS
283848b8605Smrg    POP_L     ( EDI )
284848b8605Smrg    POP_L     ( ESI )
285848b8605Smrg    RET
286848b8605Smrg
287848b8605Smrg
288848b8605Smrg
289848b8605Smrg
290848b8605SmrgALIGNTEXT16
291848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
292848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
293848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
294848b8605Smrg
295848b8605Smrg    PUSH_L    ( ESI )
296848b8605Smrg
297848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
298848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
299848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
300848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
301848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
302848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
303848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
304848b8605Smrg
305848b8605Smrg    PUSH_L    ( EDI )
306848b8605Smrg
307848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
308848b8605Smrg    MOV_L     ( ESI, ECX )
309848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
310848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
311848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
312848b8605Smrg
313848b8605Smrg    TEST_L    ( ESI, ESI )
314848b8605Smrg    JZ        ( LLBL( G3TP3NRR_2 ) )
315848b8605Smrg
316848b8605Smrg    PREFETCH  ( REGIND(EAX) )
317848b8605Smrg    PREFETCHW ( REGIND(EDX) )
318848b8605Smrg
319848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
320848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
321848b8605Smrg
322848b8605Smrg    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
323848b8605Smrg    PUNPCKLDQ ( MM2, MM2 )		/* m22             | m22             */
324848b8605Smrg
325848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
326848b8605Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
327848b8605Smrg
328848b8605Smrg    PUNPCKLDQ ( MM3, MM3 )		/* m32             | m32             */
329848b8605Smrg
330848b8605Smrg
331848b8605SmrgALIGNTEXT16
332848b8605SmrgLLBL( G3TP3NRR_1 ):
333848b8605Smrg
334848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
335848b8605Smrg
336848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
337848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
338848b8605Smrg
339848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
340848b8605Smrg    PREFETCHW ( REGIND(EAX) )
341848b8605Smrg
342848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
343848b8605Smrg
344848b8605Smrg    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
345848b8605Smrg    PFMUL     ( MM2, MM5 )		/*                 | x2*m22          */
346848b8605Smrg
347848b8605Smrg    PFADD     ( MM3, MM5 )		/*                 | x2*m22+m32      */
348848b8605Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r0, r1                      */
349848b8605Smrg
350848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
351848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
352848b8605Smrg
353848b8605Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2                          */
354848b8605Smrg    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
355848b8605Smrg
356848b8605SmrgLLBL( G3TP3NRR_2 ):
357848b8605Smrg
358848b8605Smrg    FEMMS
359848b8605Smrg    POP_L     ( EDI )
360848b8605Smrg    POP_L     ( ESI )
361848b8605Smrg    RET
362848b8605Smrg
363848b8605Smrg
364848b8605Smrg
365848b8605Smrg
366848b8605SmrgALIGNTEXT16
367848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
368848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_2d)
369848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_2d ):
370848b8605Smrg
371848b8605Smrg    PUSH_L    ( ESI )
372848b8605Smrg
373848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
374848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
375848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
376848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
377848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
378848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
379848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
380848b8605Smrg
381848b8605Smrg    PUSH_L    ( EDI )
382848b8605Smrg
383848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
384848b8605Smrg    MOV_L     ( ESI, ECX )
385848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
386848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
387848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
388848b8605Smrg
389848b8605Smrg    TEST_L    ( ESI, ESI )
390848b8605Smrg    JZ        ( LLBL( G3TP2R_3) )
391848b8605Smrg
392848b8605Smrg    PREFETCH  ( REGIND(EAX) )
393848b8605Smrg    PREFETCHW ( REGIND(EDX) )
394848b8605Smrg
395848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
396848b8605Smrg    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
397848b8605Smrg
398848b8605Smrg    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
399848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
400848b8605Smrg
401848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
402848b8605Smrg
403848b8605SmrgALIGNTEXT16
404848b8605SmrgLLBL( G3TP2R_2 ):
405848b8605Smrg
406848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
407848b8605Smrg
408848b8605Smrg    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
409848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
410848b8605Smrg
411848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
412848b8605Smrg    PREFETCH  ( REGIND(EAX) )
413848b8605Smrg
414848b8605Smrg    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
415848b8605Smrg    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
416848b8605Smrg
417848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
418848b8605Smrg    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
419848b8605Smrg
420848b8605Smrg    PFACC     ( MM4, MM3 )		/* x0*m00+x1*m10   | x0*m01+x1*m11   */
421848b8605Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
422848b8605Smrg
423848b8605Smrg    PFADD     ( MM2, MM3 )		/* x0*...*m10+m30  | x0*...*m11+m31  */
424848b8605Smrg    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
425848b8605Smrg
426848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
427848b8605Smrg    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
428848b8605Smrg
429848b8605SmrgLLBL( G3TP2R_3 ):
430848b8605Smrg
431848b8605Smrg    FEMMS
432848b8605Smrg    POP_L     ( EDI )
433848b8605Smrg    POP_L     ( ESI )
434848b8605Smrg    RET
435848b8605Smrg
436848b8605Smrg
437848b8605Smrg
438848b8605Smrg
439848b8605SmrgALIGNTEXT16
440848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
441848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
442848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
443848b8605Smrg
444848b8605Smrg    PUSH_L    ( ESI )
445848b8605Smrg
446848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
447848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
448848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
449848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
450848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
451848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
452848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
453848b8605Smrg
454848b8605Smrg    PUSH_L    ( EDI )
455848b8605Smrg
456848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
457848b8605Smrg    MOV_L     ( ESI, ECX )
458848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
459848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
460848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
461848b8605Smrg
462848b8605Smrg    TEST_L    ( ESI, ESI )
463848b8605Smrg    JZ        ( LLBL( G3TP2NRR_2 ) )
464848b8605Smrg
465848b8605Smrg    PREFETCH  ( REGIND(EAX) )
466848b8605Smrg    PREFETCHW ( REGIND(EDX) )
467848b8605Smrg
468848b8605Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
469848b8605Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
470848b8605Smrg
471848b8605Smrg    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
472848b8605Smrg
473848b8605Smrg
474848b8605SmrgALIGNTEXT16
475848b8605SmrgLLBL( G3TP2NRR_1 ):
476848b8605Smrg
477848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
478848b8605Smrg
479848b8605Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
480848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
481848b8605Smrg
482848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
483848b8605Smrg    PREFETCH  ( REGIND(EAX) )
484848b8605Smrg
485848b8605Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
486848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
487848b8605Smrg
488848b8605Smrg    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
489848b8605Smrg
490848b8605Smrg    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
491848b8605Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
492848b8605Smrg
493848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
494848b8605Smrg    JNZ       ( LLBL( G3TP2NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
495848b8605Smrg
496848b8605SmrgLLBL( G3TP2NRR_2 ):
497848b8605Smrg
498848b8605Smrg    FEMMS
499848b8605Smrg    POP_L     ( EDI )
500848b8605Smrg    POP_L     ( ESI )
501848b8605Smrg    RET
502848b8605Smrg
503848b8605Smrg
504848b8605Smrg
505848b8605Smrg
506848b8605SmrgALIGNTEXT16
507848b8605SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
508848b8605SmrgHIDDEN(_mesa_3dnow_transform_points3_identity)
509848b8605SmrgGLNAME( _mesa_3dnow_transform_points3_identity ):
510848b8605Smrg
511848b8605Smrg    PUSH_L    ( ESI )
512848b8605Smrg
513848b8605Smrg    MOV_L     ( ARG_DEST, ECX )
514848b8605Smrg    MOV_L     ( ARG_MATRIX, ESI )
515848b8605Smrg    MOV_L     ( ARG_SOURCE, EAX )
516848b8605Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
517848b8605Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
518848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
519848b8605Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
520848b8605Smrg
521848b8605Smrg    PUSH_L    ( EDI )
522848b8605Smrg
523848b8605Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
524848b8605Smrg    MOV_L     ( ESI, ECX )
525848b8605Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
526848b8605Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
527848b8605Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
528848b8605Smrg
529848b8605Smrg    TEST_L    ( ESI, ESI )
530848b8605Smrg    JZ        ( LLBL( G3TPIR_2 ) )
531848b8605Smrg
532848b8605Smrg    PREFETCHW ( REGIND(EDX) )
533848b8605Smrg
534848b8605SmrgALIGNTEXT16
535848b8605SmrgLLBL( G3TPIR_1 ):
536848b8605Smrg
537848b8605Smrg    PREFETCHW ( REGOFF(32, EDX) )
538848b8605Smrg
539848b8605Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
540848b8605Smrg    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
541848b8605Smrg
542848b8605Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
543848b8605Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
544848b8605Smrg
545848b8605Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
546848b8605Smrg    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
547848b8605Smrg
548848b8605Smrg    MOVD      ( MM1, REGOFF(-8, EDX) )	/*                 | r2              */
549848b8605Smrg    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
550848b8605Smrg
551848b8605SmrgLLBL( G3TPIR_2 ):
552848b8605Smrg
553848b8605Smrg    FEMMS
554848b8605Smrg    POP_L     ( EDI )
555848b8605Smrg    POP_L     ( ESI )
556848b8605Smrg    RET
557848b8605Smrg#endif
558848b8605Smrg
559848b8605Smrg#if defined (__ELF__) && defined (__linux__)
560848b8605Smrg	.section .note.GNU-stack,"",%progbits
561848b8605Smrg#endif
562