17117f1b4Smrg
27117f1b4Smrg/*
37117f1b4Smrg * Mesa 3-D graphics library
47117f1b4Smrg *
57117f1b4Smrg * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
67117f1b4Smrg *
77117f1b4Smrg * Permission is hereby granted, free of charge, to any person obtaining a
87117f1b4Smrg * copy of this software and associated documentation files (the "Software"),
97117f1b4Smrg * to deal in the Software without restriction, including without limitation
107117f1b4Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
117117f1b4Smrg * and/or sell copies of the Software, and to permit persons to whom the
127117f1b4Smrg * Software is furnished to do so, subject to the following conditions:
137117f1b4Smrg *
147117f1b4Smrg * The above copyright notice and this permission notice shall be included
157117f1b4Smrg * in all copies or substantial portions of the Software.
167117f1b4Smrg *
177117f1b4Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
187117f1b4Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
197117f1b4Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21af69d88dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22af69d88dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23af69d88dSmrg * OTHER DEALINGS IN THE SOFTWARE.
247117f1b4Smrg */
257117f1b4Smrg
267117f1b4Smrg/*
277117f1b4Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
287117f1b4Smrg * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
297117f1b4Smrg * in there will break the build on some platforms.
307117f1b4Smrg */
317117f1b4Smrg
32c1f859d4Smrg#include "assyntax.h"
337ec681f3Smrg#define MATH_ASM_PTR_SIZE 4
347ec681f3Smrg#include "math/m_vector_asm.h"
357117f1b4Smrg#include "xform_args.h"
367117f1b4Smrg
377117f1b4Smrg	SEG_TEXT
387117f1b4Smrg
397117f1b4Smrg#define FP_ONE		1065353216
407117f1b4Smrg#define FP_ZERO		0
417117f1b4Smrg
427117f1b4Smrg#define SRC0		REGOFF(0, ESI)
437117f1b4Smrg#define SRC1		REGOFF(4, ESI)
447117f1b4Smrg#define SRC2		REGOFF(8, ESI)
457117f1b4Smrg#define SRC3		REGOFF(12, ESI)
467117f1b4Smrg#define DST0		REGOFF(0, EDI)
477117f1b4Smrg#define DST1		REGOFF(4, EDI)
487117f1b4Smrg#define DST2		REGOFF(8, EDI)
497117f1b4Smrg#define DST3		REGOFF(12, EDI)
507117f1b4Smrg#define MAT0		REGOFF(0, EDX)
517117f1b4Smrg#define MAT1		REGOFF(4, EDX)
527117f1b4Smrg#define MAT2		REGOFF(8, EDX)
537117f1b4Smrg#define MAT3		REGOFF(12, EDX)
547117f1b4Smrg#define MAT4		REGOFF(16, EDX)
557117f1b4Smrg#define MAT5		REGOFF(20, EDX)
567117f1b4Smrg#define MAT6		REGOFF(24, EDX)
577117f1b4Smrg#define MAT7		REGOFF(28, EDX)
587117f1b4Smrg#define MAT8		REGOFF(32, EDX)
597117f1b4Smrg#define MAT9		REGOFF(36, EDX)
607117f1b4Smrg#define MAT10		REGOFF(40, EDX)
617117f1b4Smrg#define MAT11		REGOFF(44, EDX)
627117f1b4Smrg#define MAT12		REGOFF(48, EDX)
637117f1b4Smrg#define MAT13		REGOFF(52, EDX)
647117f1b4Smrg#define MAT14		REGOFF(56, EDX)
657117f1b4Smrg#define MAT15		REGOFF(60, EDX)
667117f1b4Smrg
677117f1b4Smrg
687117f1b4SmrgALIGNTEXT16
697117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_general )
707117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_general)
717117f1b4SmrgGLNAME( _mesa_x86_transform_points2_general ):
727117f1b4Smrg
737117f1b4Smrg#define FRAME_OFFSET 8
747117f1b4Smrg	PUSH_L( ESI )
757117f1b4Smrg	PUSH_L( EDI )
767117f1b4Smrg
777117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
787117f1b4Smrg	MOV_L( ARG_DEST, EDI )
797117f1b4Smrg
807117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
817117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
827117f1b4Smrg
837117f1b4Smrg	TEST_L( ECX, ECX )
847117f1b4Smrg	JZ( LLBL(x86_p2_gr_done) )
857117f1b4Smrg
867117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
877117f1b4Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
887117f1b4Smrg
897117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
907117f1b4Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
917117f1b4Smrg
927117f1b4Smrg	SHL_L( CONST(4), ECX )
937117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
947117f1b4Smrg
957117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
967117f1b4Smrg	ADD_L( EDI, ECX )
977117f1b4Smrg
987117f1b4SmrgALIGNTEXT16
997117f1b4SmrgLLBL(x86_p2_gr_loop):
1007117f1b4Smrg
1017117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
1027117f1b4Smrg	FMUL_S( MAT0 )
1037117f1b4Smrg	FLD_S( SRC0 )			/* F5 F4 */
1047117f1b4Smrg	FMUL_S( MAT1 )
1057117f1b4Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
1067117f1b4Smrg	FMUL_S( MAT2 )
1077117f1b4Smrg	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
1087117f1b4Smrg	FMUL_S( MAT3 )
1097117f1b4Smrg
1107117f1b4Smrg	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
1117117f1b4Smrg	FMUL_S( MAT4 )
1127117f1b4Smrg	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
1137117f1b4Smrg	FMUL_S( MAT5 )
1147117f1b4Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
1157117f1b4Smrg	FMUL_S( MAT6 )
1167117f1b4Smrg	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
1177117f1b4Smrg	FMUL_S( MAT7 )
1187117f1b4Smrg
1197117f1b4Smrg	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
1207117f1b4Smrg	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
1217117f1b4Smrg	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
1227117f1b4Smrg	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
1237117f1b4Smrg	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
1247117f1b4Smrg	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
1257117f1b4Smrg
1267117f1b4Smrg	FXCH( ST(3) )			/* F4 F6 F5 F7 */
1277117f1b4Smrg	FADD_S( MAT12 )
1287117f1b4Smrg	FXCH( ST(2) )			/* F5 F6 F4 F7 */
1297117f1b4Smrg	FADD_S( MAT13 )
1307117f1b4Smrg	FXCH( ST(1) )			/* F6 F5 F4 F7 */
1317117f1b4Smrg	FADD_S( MAT14 )
1327117f1b4Smrg	FXCH( ST(3) )			/* F7 F5 F4 F6 */
1337117f1b4Smrg	FADD_S( MAT15 )
1347117f1b4Smrg
1357117f1b4Smrg	FXCH( ST(2) )			/* F4 F5 F7 F6 */
1367117f1b4Smrg	FSTP_S( DST0 )			/* F5 F7 F6 */
1377117f1b4Smrg	FSTP_S( DST1 )			/* F7 F6 */
1387117f1b4Smrg	FXCH( ST(1) )			/* F6 F7 */
1397117f1b4Smrg	FSTP_S( DST2 )			/* F7 */
1407117f1b4Smrg	FSTP_S( DST3 )			/* */
1417117f1b4Smrg
1427117f1b4SmrgLLBL(x86_p2_gr_skip):
1437117f1b4Smrg
1447117f1b4Smrg	ADD_L( CONST(16), EDI )
1457117f1b4Smrg	ADD_L( EAX, ESI )
1467117f1b4Smrg	CMP_L( ECX, EDI )
1477117f1b4Smrg	JNE( LLBL(x86_p2_gr_loop) )
1487117f1b4Smrg
1497117f1b4SmrgLLBL(x86_p2_gr_done):
1507117f1b4Smrg
1517117f1b4Smrg	POP_L( EDI )
1527117f1b4Smrg	POP_L( ESI )
1537117f1b4Smrg	RET
1547117f1b4Smrg#undef FRAME_OFFSET
1557117f1b4Smrg
1567117f1b4Smrg
1577117f1b4Smrg
1587117f1b4Smrg
1597117f1b4SmrgALIGNTEXT16
1607117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_perspective )
1617117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_perspective)
1627117f1b4SmrgGLNAME( _mesa_x86_transform_points2_perspective ):
1637117f1b4Smrg
1647117f1b4Smrg#define FRAME_OFFSET 12
1657117f1b4Smrg	PUSH_L( ESI )
1667117f1b4Smrg	PUSH_L( EDI )
1677117f1b4Smrg	PUSH_L( EBX )
1687117f1b4Smrg
1697117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
1707117f1b4Smrg	MOV_L( ARG_DEST, EDI )
1717117f1b4Smrg
1727117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
1737117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
1747117f1b4Smrg
1757117f1b4Smrg	TEST_L( ECX, ECX )
1767117f1b4Smrg	JZ( LLBL(x86_p2_pr_done) )
1777117f1b4Smrg
1787117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
1797117f1b4Smrg	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
1807117f1b4Smrg
1817117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
1827117f1b4Smrg	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
1837117f1b4Smrg
1847117f1b4Smrg	SHL_L( CONST(4), ECX )
1857117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
1867117f1b4Smrg
1877117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
1887117f1b4Smrg	ADD_L( EDI, ECX )
1897117f1b4Smrg
1907117f1b4Smrg	MOV_L( MAT14, EBX )
1917117f1b4Smrg
1927117f1b4SmrgALIGNTEXT16
1937117f1b4SmrgLLBL(x86_p2_pr_loop):
1947117f1b4Smrg
1957117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
1967117f1b4Smrg	FMUL_S( MAT0 )
1977117f1b4Smrg
1987117f1b4Smrg	FLD_S( SRC1 )			/* F1 F4 */
1997117f1b4Smrg	FMUL_S( MAT5 )
2007117f1b4Smrg
2017117f1b4Smrg	FXCH( ST(1) )			/* F4 F1 */
2027117f1b4Smrg	FSTP_S( DST0   )		/* F1 */
2037117f1b4Smrg	FSTP_S( DST1   )		/* */
2047117f1b4Smrg	MOV_L( EBX, DST2 )
2057117f1b4Smrg	MOV_L( CONST(FP_ZERO), DST3 )
2067117f1b4Smrg
2077117f1b4SmrgLLBL(x86_p2_pr_skip):
2087117f1b4Smrg
2097117f1b4Smrg	ADD_L( CONST(16), EDI )
2107117f1b4Smrg	ADD_L( EAX, ESI )
2117117f1b4Smrg	CMP_L( ECX, EDI )
2127117f1b4Smrg	JNE( LLBL(x86_p2_pr_loop) )
2137117f1b4Smrg
2147117f1b4SmrgLLBL(x86_p2_pr_done):
2157117f1b4Smrg
2167117f1b4Smrg	POP_L( EBX )
2177117f1b4Smrg	POP_L( EDI )
2187117f1b4Smrg	POP_L( ESI )
2197117f1b4Smrg	RET
2207117f1b4Smrg#undef FRAME_OFFSET
2217117f1b4Smrg
2227117f1b4Smrg
2237117f1b4Smrg
2247117f1b4Smrg
2257117f1b4SmrgALIGNTEXT16
2267117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d )
2277117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_3d)
2287117f1b4SmrgGLNAME( _mesa_x86_transform_points2_3d ):
2297117f1b4Smrg
2307117f1b4Smrg#define FRAME_OFFSET 8
2317117f1b4Smrg	PUSH_L( ESI )
2327117f1b4Smrg	PUSH_L( EDI )
2337117f1b4Smrg
2347117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
2357117f1b4Smrg	MOV_L( ARG_DEST, EDI )
2367117f1b4Smrg
2377117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
2387117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
2397117f1b4Smrg
2407117f1b4Smrg	TEST_L( ECX, ECX )
2417117f1b4Smrg	JZ( LLBL(x86_p2_3dr_done) )
2427117f1b4Smrg
2437117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
2447117f1b4Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
2457117f1b4Smrg
2467117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
2477117f1b4Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
2487117f1b4Smrg
2497117f1b4Smrg	SHL_L( CONST(4), ECX )
2507117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
2517117f1b4Smrg
2527117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
2537117f1b4Smrg	ADD_L( EDI, ECX )
2547117f1b4Smrg
2557117f1b4SmrgALIGNTEXT16
2567117f1b4SmrgLLBL(x86_p2_3dr_loop):
2577117f1b4Smrg
2587117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
2597117f1b4Smrg	FMUL_S( MAT0 )
2607117f1b4Smrg	FLD_S( SRC0 )			/* F5 F4 */
2617117f1b4Smrg	FMUL_S( MAT1 )
2627117f1b4Smrg	FLD_S( SRC0 )			/* F6 F5 F4 */
2637117f1b4Smrg	FMUL_S( MAT2 )
2647117f1b4Smrg
2657117f1b4Smrg	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
2667117f1b4Smrg	FMUL_S( MAT4 )
2677117f1b4Smrg	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
2687117f1b4Smrg	FMUL_S( MAT5 )
2697117f1b4Smrg	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
2707117f1b4Smrg	FMUL_S( MAT6 )
2717117f1b4Smrg
2727117f1b4Smrg	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
2737117f1b4Smrg	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
2747117f1b4Smrg	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
2757117f1b4Smrg	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
2767117f1b4Smrg
2777117f1b4Smrg	FXCH( ST(2) )			/* F4 F5 F6 */
2787117f1b4Smrg	FADD_S( MAT12 )
2797117f1b4Smrg	FXCH( ST(1) )			/* F5 F4 F6 */
2807117f1b4Smrg	FADD_S( MAT13 )
2817117f1b4Smrg	FXCH( ST(2) )			/* F6 F4 F5 */
2827117f1b4Smrg	FADD_S( MAT14 )
2837117f1b4Smrg
2847117f1b4Smrg	FXCH( ST(1) )			/* F4 F6 F5 */
2857117f1b4Smrg	FSTP_S( DST0 )			/* F6 F5 */
2867117f1b4Smrg	FXCH( ST(1) )			/* F5 F6 */
2877117f1b4Smrg	FSTP_S( DST1 )			/* F6 */
2887117f1b4Smrg	FSTP_S( DST2 )			/* */
2897117f1b4Smrg
2907117f1b4SmrgLLBL(x86_p2_3dr_skip):
2917117f1b4Smrg
2927117f1b4Smrg	ADD_L( CONST(16), EDI )
2937117f1b4Smrg	ADD_L( EAX, ESI )
2947117f1b4Smrg	CMP_L( ECX, EDI )
2957117f1b4Smrg	JNE( LLBL(x86_p2_3dr_loop) )
2967117f1b4Smrg
2977117f1b4SmrgLLBL(x86_p2_3dr_done):
2987117f1b4Smrg
2997117f1b4Smrg	POP_L( EDI )
3007117f1b4Smrg	POP_L( ESI )
3017117f1b4Smrg	RET
3027117f1b4Smrg#undef FRAME_OFFSET
3037117f1b4Smrg
3047117f1b4Smrg
3057117f1b4Smrg
3067117f1b4Smrg
3077117f1b4SmrgALIGNTEXT16
3087117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
3097117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_3d_no_rot)
3107117f1b4SmrgGLNAME( _mesa_x86_transform_points2_3d_no_rot ):
3117117f1b4Smrg
3127117f1b4Smrg#define FRAME_OFFSET 12
3137117f1b4Smrg	PUSH_L( ESI )
3147117f1b4Smrg	PUSH_L( EDI )
3157117f1b4Smrg	PUSH_L( EBX )
3167117f1b4Smrg
3177117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
3187117f1b4Smrg	MOV_L( ARG_DEST, EDI )
3197117f1b4Smrg
3207117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
3217117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
3227117f1b4Smrg
3237117f1b4Smrg	TEST_L( ECX, ECX )
3247117f1b4Smrg	JZ( LLBL(x86_p2_3dnrr_done) )
3257117f1b4Smrg
3267117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
3277117f1b4Smrg	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
3287117f1b4Smrg
3297117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
3307117f1b4Smrg	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
3317117f1b4Smrg
3327117f1b4Smrg	SHL_L( CONST(4), ECX )
3337117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
3347117f1b4Smrg
3357117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
3367117f1b4Smrg	ADD_L( EDI, ECX )
3377117f1b4Smrg
3387117f1b4Smrg	MOV_L( MAT14, EBX )
3397117f1b4Smrg
3407117f1b4SmrgALIGNTEXT16
3417117f1b4SmrgLLBL(x86_p2_3dnrr_loop):
3427117f1b4Smrg
3437117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
3447117f1b4Smrg	FMUL_S( MAT0 )
3457117f1b4Smrg
3467117f1b4Smrg	FLD_S( SRC1 )			/* F1 F4 */
3477117f1b4Smrg	FMUL_S( MAT5 )
3487117f1b4Smrg
3497117f1b4Smrg	FXCH( ST(1) )			/* F4 F1 */
3507117f1b4Smrg	FADD_S( MAT12 )
3517117f1b4Smrg	FLD_S( MAT13 )		/* F5 F4 F1 */
3527117f1b4Smrg	FXCH( ST(2) )			/* F1 F4 F5 */
3537117f1b4Smrg	FADDP( ST0, ST(2) )		/* F4 F5 */
3547117f1b4Smrg
3557117f1b4Smrg	FSTP_S( DST0 )		/* F5 */
3567117f1b4Smrg	FSTP_S( DST1 )		/* */
3577117f1b4Smrg	MOV_L( EBX, DST2 )
3587117f1b4Smrg
3597117f1b4SmrgLLBL(x86_p2_3dnrr_skip):
3607117f1b4Smrg
3617117f1b4Smrg	ADD_L( CONST(16), EDI )
3627117f1b4Smrg	ADD_L( EAX, ESI )
3637117f1b4Smrg	CMP_L( ECX, EDI )
3647117f1b4Smrg	JNE( LLBL(x86_p2_3dnrr_loop) )
3657117f1b4Smrg
3667117f1b4SmrgLLBL(x86_p2_3dnrr_done):
3677117f1b4Smrg
3687117f1b4Smrg	POP_L( EBX )
3697117f1b4Smrg	POP_L( EDI )
3707117f1b4Smrg	POP_L( ESI )
3717117f1b4Smrg	RET
3727117f1b4Smrg#undef FRAME_OFFSET
3737117f1b4Smrg
3747117f1b4Smrg
3757117f1b4Smrg
3767117f1b4Smrg
3777117f1b4SmrgALIGNTEXT16
3787117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d )
3797117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_2d)
3807117f1b4SmrgGLNAME( _mesa_x86_transform_points2_2d ):
3817117f1b4Smrg
3827117f1b4Smrg#define FRAME_OFFSET 8
3837117f1b4Smrg	PUSH_L( ESI )
3847117f1b4Smrg	PUSH_L( EDI )
3857117f1b4Smrg
3867117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
3877117f1b4Smrg	MOV_L( ARG_DEST, EDI )
3887117f1b4Smrg
3897117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
3907117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
3917117f1b4Smrg
3927117f1b4Smrg	TEST_L( ECX, ECX )
3937117f1b4Smrg	JZ( LLBL(x86_p2_2dr_done) )
3947117f1b4Smrg
3957117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
3967117f1b4Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
3977117f1b4Smrg
3987117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
3997117f1b4Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
4007117f1b4Smrg
4017117f1b4Smrg	SHL_L( CONST(4), ECX )
4027117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
4037117f1b4Smrg
4047117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
4057117f1b4Smrg	ADD_L( EDI, ECX )
4067117f1b4Smrg
4077117f1b4SmrgALIGNTEXT16
4087117f1b4SmrgLLBL(x86_p2_2dr_loop):
4097117f1b4Smrg
4107117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
4117117f1b4Smrg	FMUL_S( MAT0 )
4127117f1b4Smrg	FLD_S( SRC0 )			/* F5 F4 */
4137117f1b4Smrg	FMUL_S( MAT1 )
4147117f1b4Smrg
4157117f1b4Smrg	FLD_S( SRC1 )			/* F0 F5 F4 */
4167117f1b4Smrg	FMUL_S( MAT4 )
4177117f1b4Smrg	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
4187117f1b4Smrg	FMUL_S( MAT5 )
4197117f1b4Smrg
4207117f1b4Smrg	FXCH( ST(1) )			/* F0 F1 F5 F4 */
4217117f1b4Smrg	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
4227117f1b4Smrg	FADDP( ST0, ST(1) )		/* F5 F4 */
4237117f1b4Smrg
4247117f1b4Smrg	FXCH( ST(1) )			/* F4 F5 */
4257117f1b4Smrg	FADD_S( MAT12 )
4267117f1b4Smrg	FXCH( ST(1) )			/* F5 F4 */
4277117f1b4Smrg	FADD_S( MAT13 )
4287117f1b4Smrg
4297117f1b4Smrg	FXCH( ST(1) )			/* F4 F5 */
4307117f1b4Smrg	FSTP_S( DST0 )		/* F5 */
4317117f1b4Smrg	FSTP_S( DST1 )		/* */
4327117f1b4Smrg
4337117f1b4SmrgLLBL(x86_p2_2dr_skip):
4347117f1b4Smrg
4357117f1b4Smrg	ADD_L( CONST(16), EDI )
4367117f1b4Smrg	ADD_L( EAX, ESI )
4377117f1b4Smrg	CMP_L( ECX, EDI )
4387117f1b4Smrg	JNE( LLBL(x86_p2_2dr_loop) )
4397117f1b4Smrg
4407117f1b4SmrgLLBL(x86_p2_2dr_done):
4417117f1b4Smrg
4427117f1b4Smrg	POP_L( EDI )
4437117f1b4Smrg	POP_L( ESI )
4447117f1b4Smrg	RET
4457117f1b4Smrg#undef FRAME_OFFSET
4467117f1b4Smrg
4477117f1b4Smrg
4487117f1b4Smrg
4497117f1b4Smrg
4507117f1b4SmrgALIGNTEXT4
4517117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
4527117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_2d_no_rot)
4537117f1b4SmrgGLNAME( _mesa_x86_transform_points2_2d_no_rot ):
4547117f1b4Smrg
4557117f1b4Smrg#define FRAME_OFFSET 8
4567117f1b4Smrg	PUSH_L( ESI )
4577117f1b4Smrg	PUSH_L( EDI )
4587117f1b4Smrg
4597117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
4607117f1b4Smrg	MOV_L( ARG_DEST, EDI )
4617117f1b4Smrg
4627117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
4637117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
4647117f1b4Smrg
4657117f1b4Smrg	TEST_L( ECX, ECX )
4667117f1b4Smrg	JZ( LLBL(x86_p2_2dnrr_done) )
4677117f1b4Smrg
4687117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
4697117f1b4Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
4707117f1b4Smrg
4717117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
4727117f1b4Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
4737117f1b4Smrg
4747117f1b4Smrg	SHL_L( CONST(4), ECX )
4757117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
4767117f1b4Smrg
4777117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
4787117f1b4Smrg	ADD_L( EDI, ECX )
4797117f1b4Smrg
4807117f1b4SmrgALIGNTEXT16
4817117f1b4SmrgLLBL(x86_p2_2dnrr_loop):
4827117f1b4Smrg
4837117f1b4Smrg	FLD_S( SRC0 )			/* F4 */
4847117f1b4Smrg	FMUL_S( MAT0 )
4857117f1b4Smrg
4867117f1b4Smrg	FLD_S( SRC1 )			/* F1 F4 */
4877117f1b4Smrg	FMUL_S( MAT5 )
4887117f1b4Smrg
4897117f1b4Smrg	FXCH( ST(1) )			/* F4 F1 */
4907117f1b4Smrg	FADD_S( MAT12 )
4917117f1b4Smrg	FLD_S( MAT13 )		/* F5 F4 F1 */
4927117f1b4Smrg	FXCH( ST(2) )			/* F1 F4 F5 */
4937117f1b4Smrg	FADDP( ST0, ST(2) )		/* F4 F5 */
4947117f1b4Smrg
4957117f1b4Smrg	FSTP_S( DST0   )		/* F5 */
4967117f1b4Smrg	FSTP_S( DST1   )		/* */
4977117f1b4Smrg
4987117f1b4SmrgLLBL(x86_p2_2dnrr_skip):
4997117f1b4Smrg
5007117f1b4Smrg	ADD_L( CONST(16), EDI )
5017117f1b4Smrg	ADD_L( EAX, ESI )
5027117f1b4Smrg	CMP_L( ECX, EDI )
5037117f1b4Smrg	JNE( LLBL(x86_p2_2dnrr_loop) )
5047117f1b4Smrg
5057117f1b4SmrgLLBL(x86_p2_2dnrr_done):
5067117f1b4Smrg
5077117f1b4Smrg	POP_L( EDI )
5087117f1b4Smrg	POP_L( ESI )
5097117f1b4Smrg	RET
5107117f1b4Smrg#undef FRAME_OFFSET
5117117f1b4Smrg
5127117f1b4Smrg
5137117f1b4Smrg
5147117f1b4Smrg
5157117f1b4SmrgALIGNTEXT16
5167117f1b4SmrgGLOBL GLNAME( _mesa_x86_transform_points2_identity )
5177117f1b4SmrgHIDDEN(_mesa_x86_transform_points2_identity)
5187117f1b4SmrgGLNAME( _mesa_x86_transform_points2_identity ):
5197117f1b4Smrg
5207117f1b4Smrg#define FRAME_OFFSET 12
5217117f1b4Smrg	PUSH_L( ESI )
5227117f1b4Smrg	PUSH_L( EDI )
5237117f1b4Smrg	PUSH_L( EBX )
5247117f1b4Smrg
5257117f1b4Smrg	MOV_L( ARG_SOURCE, ESI )
5267117f1b4Smrg	MOV_L( ARG_DEST, EDI )
5277117f1b4Smrg
5287117f1b4Smrg	MOV_L( ARG_MATRIX, EDX )
5297117f1b4Smrg	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
5307117f1b4Smrg
5317117f1b4Smrg	TEST_L( ECX, ECX )
5327117f1b4Smrg	JZ( LLBL(x86_p2_ir_done) )
5337117f1b4Smrg
5347117f1b4Smrg	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
5357117f1b4Smrg	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
5367117f1b4Smrg
5377117f1b4Smrg	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
5387117f1b4Smrg	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
5397117f1b4Smrg
5407117f1b4Smrg	SHL_L( CONST(4), ECX )
5417117f1b4Smrg	MOV_L( REGOFF(V4F_START, ESI), ESI )
5427117f1b4Smrg
5437117f1b4Smrg	MOV_L( REGOFF(V4F_START, EDI), EDI )
5447117f1b4Smrg	ADD_L( EDI, ECX )
5457117f1b4Smrg
5467117f1b4Smrg	CMP_L( ESI, EDI )
5477117f1b4Smrg	JE( LLBL(x86_p2_ir_done) )
5487117f1b4Smrg
5497117f1b4SmrgALIGNTEXT16
5507117f1b4SmrgLLBL(x86_p2_ir_loop):
5517117f1b4Smrg
5527117f1b4Smrg	MOV_L( SRC0, EBX )
5537117f1b4Smrg	MOV_L( SRC1, EDX )
5547117f1b4Smrg
5557117f1b4Smrg	MOV_L( EBX, DST0 )
5567117f1b4Smrg	MOV_L( EDX, DST1 )
5577117f1b4Smrg
5587117f1b4SmrgLLBL(x86_p2_ir_skip):
5597117f1b4Smrg
5607117f1b4Smrg	ADD_L( CONST(16), EDI )
5617117f1b4Smrg	ADD_L( EAX, ESI )
5627117f1b4Smrg	CMP_L( ECX, EDI )
5637117f1b4Smrg	JNE( LLBL(x86_p2_ir_loop) )
5647117f1b4Smrg
5657117f1b4SmrgLLBL(x86_p2_ir_done):
5667117f1b4Smrg
5677117f1b4Smrg	POP_L( EBX )
5687117f1b4Smrg	POP_L( EDI )
5697117f1b4Smrg	POP_L( ESI )
5707117f1b4Smrg	RET
5717117f1b4Smrg#undef FRAME_OFFSET
5727117f1b4Smrg
5737117f1b4Smrg#if defined (__ELF__) && defined (__linux__)
5747117f1b4Smrg	.section .note.GNU-stack,"",%progbits
5757117f1b4Smrg#endif
576