17117f1b4Smrg
27117f1b4Smrg/*
37117f1b4Smrg * Mesa 3-D graphics library
47117f1b4Smrg *
57117f1b4Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
67117f1b4Smrg *
77117f1b4Smrg * Permission is hereby granted, free of charge, to any person obtaining a
87117f1b4Smrg * copy of this software and associated documentation files (the "Software"),
97117f1b4Smrg * to deal in the Software without restriction, including without limitation
107117f1b4Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
117117f1b4Smrg * and/or sell copies of the Software, and to permit persons to whom the
127117f1b4Smrg * Software is furnished to do so, subject to the following conditions:
137117f1b4Smrg *
147117f1b4Smrg * The above copyright notice and this permission notice shall be included
157117f1b4Smrg * in all copies or substantial portions of the Software.
167117f1b4Smrg *
177117f1b4Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
187117f1b4Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
197117f1b4Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21af69d88dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22af69d88dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23af69d88dSmrg * OTHER DEALINGS IN THE SOFTWARE.
247117f1b4Smrg */
257117f1b4Smrg
267117f1b4Smrg#ifdef USE_3DNOW_ASM
27c1f859d4Smrg#include "assyntax.h"
287ec681f3Smrg#define MATH_ASM_PTR_SIZE 4
297ec681f3Smrg#include "math/m_vector_asm.h"
307117f1b4Smrg#include "xform_args.h"
317117f1b4Smrg
327117f1b4Smrg    SEG_TEXT
337117f1b4Smrg
347117f1b4Smrg#define FRAME_OFFSET	4
357117f1b4Smrg
367117f1b4Smrg
377117f1b4SmrgALIGNTEXT16
387117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_general )
397117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_general)
407117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_general ):
417ec681f3Smrg    _CET_ENDBR
427117f1b4Smrg    PUSH_L    ( ESI )
437117f1b4Smrg
447117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
457117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
467117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
477117f1b4Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
487117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
497117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
507117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
517117f1b4Smrg
527117f1b4Smrg    PUSH_L    ( EDI )
537117f1b4Smrg
547117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
557117f1b4Smrg    MOV_L     ( ESI, ECX )
567117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
577117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
587117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
597117f1b4Smrg
607117f1b4Smrg    TEST_L    ( ESI, ESI )
617117f1b4Smrg    JZ        ( LLBL( G3TPGR_2 ) )
627117f1b4Smrg
637117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
647117f1b4Smrg
657117f1b4SmrgALIGNTEXT16
667117f1b4SmrgLLBL( G3TPGR_1 ):
677117f1b4Smrg
687117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
697117f1b4Smrg
707117f1b4Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
717117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM2 )	/*                 | x2              */
727117f1b4Smrg
737117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
747117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
757117f1b4Smrg
767117f1b4Smrg    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
777117f1b4Smrg    PUNPCKLDQ ( MM2, MM2 )		/* x2              | x2              */
787117f1b4Smrg
797117f1b4Smrg    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
807117f1b4Smrg    MOVQ      ( MM2, MM5 )		/* x2              | x2              */
817117f1b4Smrg
827117f1b4Smrg    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
837117f1b4Smrg    PFMUL     ( REGOFF(32, ECX), MM2 )	/* x2*m9           | x2*m8           */
847117f1b4Smrg
857117f1b4Smrg    MOVQ      ( MM0, MM3 )		/* x0              | x0              */
867117f1b4Smrg    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
877117f1b4Smrg
887117f1b4Smrg    MOVQ      ( MM1, MM4 )		/* x1              | x1              */
897117f1b4Smrg    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
907117f1b4Smrg
917117f1b4Smrg    PFADD     ( REGOFF(48, ECX), MM2 )	/* x2*m9+m13       | x2*m8+m12       */
927117f1b4Smrg    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
937117f1b4Smrg
947117f1b4Smrg    PFADD     ( REGOFF(56, ECX), MM5 )	/* x2*m11+m15      | x2*m10+m14      */
957117f1b4Smrg    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
967117f1b4Smrg
977117f1b4Smrg    PFMUL     ( REGOFF(8, ECX), MM3 )	/* x0*m3           | x0*m2           */
987117f1b4Smrg    PFADD     ( MM1, MM2 )		/* r1              | r0              */
997117f1b4Smrg
1007117f1b4Smrg    PFMUL     ( REGOFF(24, ECX), MM4 )	/* x1*m7           | x1*m6           */
1017117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next output vertex                */
1027117f1b4Smrg
1037117f1b4Smrg    PFADD     ( MM3, MM4 )		/* x0*m3+x1*m7     | x0*m2+x1*m6     */
1047117f1b4Smrg    MOVQ      ( MM2, REGOFF(-16, EDX) )	/* write r0, r1                      */
1057117f1b4Smrg
1067117f1b4Smrg    PFADD     ( MM4, MM5 )		/* r3              | r2              */
1077117f1b4Smrg    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
1087117f1b4Smrg
1097117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
1107117f1b4Smrg    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
1117117f1b4Smrg
1127117f1b4SmrgLLBL( G3TPGR_2 ):
1137117f1b4Smrg
1147117f1b4Smrg    FEMMS
1157117f1b4Smrg    POP_L     ( EDI )
1167117f1b4Smrg    POP_L     ( ESI )
1177117f1b4Smrg    RET
1187117f1b4Smrg
1197117f1b4Smrg
1207117f1b4Smrg
1217117f1b4Smrg
1227117f1b4SmrgALIGNTEXT16
1237117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
1247117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_perspective)
1257117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_perspective ):
1267ec681f3Smrg    _CET_ENDBR
1277117f1b4Smrg    PUSH_L    ( ESI )
1287117f1b4Smrg
1297117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
1307117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
1317117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
1327117f1b4Smrg    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
1337117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
1347117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
1357117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
1367117f1b4Smrg
1377117f1b4Smrg    PUSH_L    ( EDI )
1387117f1b4Smrg
1397117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
1407117f1b4Smrg    MOV_L     ( ESI, ECX )
1417117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
1427117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
1437117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
1447117f1b4Smrg
1457117f1b4Smrg    TEST_L    ( ESI, ESI )
1467117f1b4Smrg    JZ        ( LLBL( G3TPPR_2 ) )
1477117f1b4Smrg
1487117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
1497117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
1507117f1b4Smrg
1517117f1b4Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
1527117f1b4Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
1537117f1b4Smrg
1547117f1b4Smrg    MOVQ      ( REGOFF(32, ECX), MM1 )	/* m21             | m20             */
1557117f1b4Smrg    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
1567117f1b4Smrg
1577117f1b4Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
1587117f1b4Smrg
1597117f1b4SmrgALIGNTEXT16
1607117f1b4SmrgLLBL( G3TPPR_1 ):
1617117f1b4Smrg
1627117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
1637117f1b4Smrg
1647117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
1657117f1b4Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
1667117f1b4Smrg
1677117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
1687117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
1697117f1b4Smrg
1707117f1b4Smrg    PXOR      ( MM7, MM7 )		/* 0               | 0               */
1717117f1b4Smrg    MOVQ      ( MM5, MM6 )		/*                 | x2              */
1727117f1b4Smrg
1737117f1b4Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
1747117f1b4Smrg    PFSUB     ( MM5, MM7 )		/*                 | -x2             */
1757117f1b4Smrg
1767117f1b4Smrg    PFMUL     ( MM2, MM6 )		/*                 | x2*m22          */
1777117f1b4Smrg    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
1787117f1b4Smrg
1797117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
1807117f1b4Smrg    PFMUL     ( MM1, MM5 )		/* x2*m21          | x2*m20          */
1817117f1b4Smrg
1827117f1b4Smrg    PFADD     ( MM3, MM6 )		/*                 | x2*m22+m32      */
1837117f1b4Smrg    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
1847117f1b4Smrg
1857117f1b4Smrg    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
1867117f1b4Smrg    MOVD      ( MM6, REGOFF(-8, EDX) )	/* write r2                          */
1877117f1b4Smrg
1887117f1b4Smrg    MOVD      ( MM7, REGOFF(-4, EDX) )	/* write r3                          */
1897117f1b4Smrg
1907117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
1917117f1b4Smrg    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
1927117f1b4Smrg
1937117f1b4SmrgLLBL( G3TPPR_2 ):
1947117f1b4Smrg
1957117f1b4Smrg    FEMMS
1967117f1b4Smrg    POP_L     ( EDI )
1977117f1b4Smrg    POP_L     ( ESI )
1987117f1b4Smrg    RET
1997117f1b4Smrg
2007117f1b4Smrg
2017117f1b4Smrg
2027117f1b4Smrg
2037117f1b4SmrgALIGNTEXT16
2047117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
2057117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_3d)
2067117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_3d ):
2077ec681f3Smrg    _CET_ENDBR
2087117f1b4Smrg    PUSH_L    ( ESI )
2097117f1b4Smrg
2107117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
2117117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
2127117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
2137117f1b4Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
2147117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
2157117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
2167117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
2177117f1b4Smrg
2187117f1b4Smrg    PUSH_L    ( EDI )
2197117f1b4Smrg
2207117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
2217117f1b4Smrg    MOV_L     ( ESI, ECX )
2227117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
2237117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
2247117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
2257117f1b4Smrg
2267117f1b4Smrg    TEST_L    ( ESI, ESI )
2277117f1b4Smrg    JZ        ( LLBL( G3TP3R_2 ) )
2287117f1b4Smrg
2297117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
2307117f1b4Smrg    PREFETCH  ( REGIND(EDX) )
2317117f1b4Smrg
2327117f1b4Smrg    MOVD      ( REGOFF(8, ECX), MM7 )	/*                 | m2              */
2337117f1b4Smrg    PUNPCKLDQ ( REGOFF(24, ECX), MM7 )	/* m6              | m2              */
2347117f1b4Smrg
2357117f1b4Smrg
2367117f1b4SmrgALIGNTEXT16
2377117f1b4SmrgLLBL( G3TP3R_1 ):
2387117f1b4Smrg
2397117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
2407117f1b4Smrg
2417117f1b4Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
2427117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
2437117f1b4Smrg
2447117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
2457117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
2467117f1b4Smrg
2477117f1b4Smrg    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
2487117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
2497117f1b4Smrg
2507117f1b4Smrg    PUNPCKLDQ ( MM2, MM2 )		/* x0              | x0              */
2517117f1b4Smrg    MOVQ      ( MM0, MM3 )		/* x1              | x0              */
2527117f1b4Smrg
2537117f1b4Smrg    PFMUL     ( REGIND(ECX), MM2 )	/* x0*m1           | x0*m0           */
2547117f1b4Smrg    PUNPCKHDQ ( MM3, MM3 )		/* x1              | x1              */
2557117f1b4Smrg
2567117f1b4Smrg    MOVQ      ( MM1, MM4 )		/*                 | x2              */
2577117f1b4Smrg    PFMUL     ( REGOFF(16, ECX), MM3 )	/* x1*m5           | x1*m4           */
2587117f1b4Smrg
2597117f1b4Smrg    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
2607117f1b4Smrg    PFADD     ( MM2, MM3 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
2617117f1b4Smrg
2627117f1b4Smrg    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
2637117f1b4Smrg    PFADD     ( REGOFF(48, ECX), MM3 )	/* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
2647117f1b4Smrg
2657117f1b4Smrg    PFMUL     ( MM7, MM0 )		/* x1*m6           | x0*m2           */
2667117f1b4Smrg    PFADD     ( MM4, MM3 )		/* r1              | r0              */
2677117f1b4Smrg
2687117f1b4Smrg    PFMUL     ( REGOFF(40, ECX), MM1 )	/*                 | x2*m10          */
2697117f1b4Smrg    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m14             | x2*m10          */
2707117f1b4Smrg
2717117f1b4Smrg    PFACC     ( MM0, MM1 )
2727117f1b4Smrg
2737117f1b4Smrg    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
2747117f1b4Smrg    PFACC     ( MM1, MM1 )		/*                 | r2              */
2757117f1b4Smrg
2767117f1b4Smrg    MOVD      ( MM1, REGOFF(-8, EDX) )	/* write r2                          */
2777117f1b4Smrg
2787117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
2797117f1b4Smrg    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
2807117f1b4Smrg
2817117f1b4SmrgLLBL( G3TP3R_2 ):
2827117f1b4Smrg
2837117f1b4Smrg    FEMMS
2847117f1b4Smrg    POP_L     ( EDI )
2857117f1b4Smrg    POP_L     ( ESI )
2867117f1b4Smrg    RET
2877117f1b4Smrg
2887117f1b4Smrg
2897117f1b4Smrg
2907117f1b4Smrg
2917117f1b4SmrgALIGNTEXT16
2927117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
2937117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
2947117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
2957ec681f3Smrg    _CET_ENDBR
2967117f1b4Smrg    PUSH_L    ( ESI )
2977117f1b4Smrg
2987117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
2997117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
3007117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
3017117f1b4Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
3027117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
3037117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
3047117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
3057117f1b4Smrg
3067117f1b4Smrg    PUSH_L    ( EDI )
3077117f1b4Smrg
3087117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
3097117f1b4Smrg    MOV_L     ( ESI, ECX )
3107117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
3117117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
3127117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
3137117f1b4Smrg
3147117f1b4Smrg    TEST_L    ( ESI, ESI )
3157117f1b4Smrg    JZ        ( LLBL( G3TP3NRR_2 ) )
3167117f1b4Smrg
3177117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
3187117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
3197117f1b4Smrg
3207117f1b4Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
3217117f1b4Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
3227117f1b4Smrg
3237117f1b4Smrg    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
3247117f1b4Smrg    PUNPCKLDQ ( MM2, MM2 )		/* m22             | m22             */
3257117f1b4Smrg
3267117f1b4Smrg    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
3277117f1b4Smrg    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
3287117f1b4Smrg
3297117f1b4Smrg    PUNPCKLDQ ( MM3, MM3 )		/* m32             | m32             */
3307117f1b4Smrg
3317117f1b4Smrg
3327117f1b4SmrgALIGNTEXT16
3337117f1b4SmrgLLBL( G3TP3NRR_1 ):
3347117f1b4Smrg
3357117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
3367117f1b4Smrg
3377117f1b4Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
3387117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
3397117f1b4Smrg
3407117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
3417117f1b4Smrg    PREFETCHW ( REGIND(EAX) )
3427117f1b4Smrg
3437117f1b4Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
3447117f1b4Smrg
3457117f1b4Smrg    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
3467117f1b4Smrg    PFMUL     ( MM2, MM5 )		/*                 | x2*m22          */
3477117f1b4Smrg
3487117f1b4Smrg    PFADD     ( MM3, MM5 )		/*                 | x2*m22+m32      */
3497117f1b4Smrg    MOVQ      ( MM4, REGIND(EDX) )	/* write r0, r1                      */
3507117f1b4Smrg
3517117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
3527117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
3537117f1b4Smrg
3547117f1b4Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2                          */
3557117f1b4Smrg    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
3567117f1b4Smrg
3577117f1b4SmrgLLBL( G3TP3NRR_2 ):
3587117f1b4Smrg
3597117f1b4Smrg    FEMMS
3607117f1b4Smrg    POP_L     ( EDI )
3617117f1b4Smrg    POP_L     ( ESI )
3627117f1b4Smrg    RET
3637117f1b4Smrg
3647117f1b4Smrg
3657117f1b4Smrg
3667117f1b4Smrg
3677117f1b4SmrgALIGNTEXT16
3687117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
3697117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_2d)
3707117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_2d ):
3717ec681f3Smrg    _CET_ENDBR
3727117f1b4Smrg    PUSH_L    ( ESI )
3737117f1b4Smrg
3747117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
3757117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
3767117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
3777117f1b4Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
3787117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
3797117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
3807117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
3817117f1b4Smrg
3827117f1b4Smrg    PUSH_L    ( EDI )
3837117f1b4Smrg
3847117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
3857117f1b4Smrg    MOV_L     ( ESI, ECX )
3867117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
3877117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
3887117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
3897117f1b4Smrg
3907117f1b4Smrg    TEST_L    ( ESI, ESI )
3917117f1b4Smrg    JZ        ( LLBL( G3TP2R_3) )
3927117f1b4Smrg
3937117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
3947117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
3957117f1b4Smrg
3967117f1b4Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
3977117f1b4Smrg    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
3987117f1b4Smrg
3997117f1b4Smrg    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
4007117f1b4Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
4017117f1b4Smrg
4027117f1b4Smrg    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
4037117f1b4Smrg
4047117f1b4SmrgALIGNTEXT16
4057117f1b4SmrgLLBL( G3TP2R_2 ):
4067117f1b4Smrg
4077117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
4087117f1b4Smrg
4097117f1b4Smrg    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
4107117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
4117117f1b4Smrg
4127117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
4137117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
4147117f1b4Smrg
4157117f1b4Smrg    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
4167117f1b4Smrg    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
4177117f1b4Smrg
4187117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
4197117f1b4Smrg    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
4207117f1b4Smrg
4217117f1b4Smrg    PFACC     ( MM4, MM3 )		/* x0*m00+x1*m10   | x0*m01+x1*m11   */
4227117f1b4Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
4237117f1b4Smrg
4247117f1b4Smrg    PFADD     ( MM2, MM3 )		/* x0*...*m10+m30  | x0*...*m11+m31  */
4257117f1b4Smrg    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
4267117f1b4Smrg
4277117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
4287117f1b4Smrg    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
4297117f1b4Smrg
4307117f1b4SmrgLLBL( G3TP2R_3 ):
4317117f1b4Smrg
4327117f1b4Smrg    FEMMS
4337117f1b4Smrg    POP_L     ( EDI )
4347117f1b4Smrg    POP_L     ( ESI )
4357117f1b4Smrg    RET
4367117f1b4Smrg
4377117f1b4Smrg
4387117f1b4Smrg
4397117f1b4Smrg
4407117f1b4SmrgALIGNTEXT16
4417117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
4427117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
4437117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
4447ec681f3Smrg    _CET_ENDBR
4457117f1b4Smrg    PUSH_L    ( ESI )
4467117f1b4Smrg
4477117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
4487117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
4497117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
4507117f1b4Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
4517117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
4527117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
4537117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
4547117f1b4Smrg
4557117f1b4Smrg    PUSH_L    ( EDI )
4567117f1b4Smrg
4577117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
4587117f1b4Smrg    MOV_L     ( ESI, ECX )
4597117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
4607117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
4617117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
4627117f1b4Smrg
4637117f1b4Smrg    TEST_L    ( ESI, ESI )
4647117f1b4Smrg    JZ        ( LLBL( G3TP2NRR_2 ) )
4657117f1b4Smrg
4667117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
4677117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
4687117f1b4Smrg
4697117f1b4Smrg    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
4707117f1b4Smrg    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
4717117f1b4Smrg
4727117f1b4Smrg    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
4737117f1b4Smrg
4747117f1b4Smrg
4757117f1b4SmrgALIGNTEXT16
4767117f1b4SmrgLLBL( G3TP2NRR_1 ):
4777117f1b4Smrg
4787117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
4797117f1b4Smrg
4807117f1b4Smrg    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
4817117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
4827117f1b4Smrg
4837117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
4847117f1b4Smrg    PREFETCH  ( REGIND(EAX) )
4857117f1b4Smrg
4867117f1b4Smrg    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
4877117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
4887117f1b4Smrg
4897117f1b4Smrg    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
4907117f1b4Smrg
4917117f1b4Smrg    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
4927117f1b4Smrg    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
4937117f1b4Smrg
4947117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
4957117f1b4Smrg    JNZ       ( LLBL( G3TP2NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
4967117f1b4Smrg
4977117f1b4SmrgLLBL( G3TP2NRR_2 ):
4987117f1b4Smrg
4997117f1b4Smrg    FEMMS
5007117f1b4Smrg    POP_L     ( EDI )
5017117f1b4Smrg    POP_L     ( ESI )
5027117f1b4Smrg    RET
5037117f1b4Smrg
5047117f1b4Smrg
5057117f1b4Smrg
5067117f1b4Smrg
5077117f1b4SmrgALIGNTEXT16
5087117f1b4SmrgGLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
5097117f1b4SmrgHIDDEN(_mesa_3dnow_transform_points3_identity)
5107117f1b4SmrgGLNAME( _mesa_3dnow_transform_points3_identity ):
5117ec681f3Smrg    _CET_ENDBR
5127117f1b4Smrg    PUSH_L    ( ESI )
5137117f1b4Smrg
5147117f1b4Smrg    MOV_L     ( ARG_DEST, ECX )
5157117f1b4Smrg    MOV_L     ( ARG_MATRIX, ESI )
5167117f1b4Smrg    MOV_L     ( ARG_SOURCE, EAX )
5177117f1b4Smrg    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
5187117f1b4Smrg    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
5197117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
5207117f1b4Smrg    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
5217117f1b4Smrg
5227117f1b4Smrg    PUSH_L    ( EDI )
5237117f1b4Smrg
5247117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
5257117f1b4Smrg    MOV_L     ( ESI, ECX )
5267117f1b4Smrg    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
5277117f1b4Smrg    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
5287117f1b4Smrg    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
5297117f1b4Smrg
5307117f1b4Smrg    TEST_L    ( ESI, ESI )
5317117f1b4Smrg    JZ        ( LLBL( G3TPIR_2 ) )
5327117f1b4Smrg
5337117f1b4Smrg    PREFETCHW ( REGIND(EDX) )
5347117f1b4Smrg
5357117f1b4SmrgALIGNTEXT16
5367117f1b4SmrgLLBL( G3TPIR_1 ):
5377117f1b4Smrg
5387117f1b4Smrg    PREFETCHW ( REGOFF(32, EDX) )
5397117f1b4Smrg
5407117f1b4Smrg    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
5417117f1b4Smrg    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
5427117f1b4Smrg
5437117f1b4Smrg    ADD_L     ( EDI, EAX )		/* next vertex                       */
5447117f1b4Smrg    ADD_L     ( CONST(16), EDX )	/* next r                            */
5457117f1b4Smrg
5467117f1b4Smrg    DEC_L     ( ESI )			/* decrement vertex counter          */
5477117f1b4Smrg    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
5487117f1b4Smrg
5497117f1b4Smrg    MOVD      ( MM1, REGOFF(-8, EDX) )	/*                 | r2              */
5507117f1b4Smrg    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
5517117f1b4Smrg
5527117f1b4SmrgLLBL( G3TPIR_2 ):
5537117f1b4Smrg
5547117f1b4Smrg    FEMMS
5557117f1b4Smrg    POP_L     ( EDI )
5567117f1b4Smrg    POP_L     ( ESI )
5577117f1b4Smrg    RET
5587117f1b4Smrg#endif
5597117f1b4Smrg
5607117f1b4Smrg#if defined (__ELF__) && defined (__linux__)
5617117f1b4Smrg	.section .note.GNU-stack,"",%progbits
5627117f1b4Smrg#endif
563