17117f1b4Smrg/*
27117f1b4Smrg * SPARC assembly matrix code.
37117f1b4Smrg */
47117f1b4Smrg
57117f1b4Smrg#ifndef _SPARC_MATRIX_H
67117f1b4Smrg#define _SPARC_MATRIX_H
77117f1b4Smrg
87117f1b4Smrg#ifdef __arch64__
97117f1b4Smrg#define LDPTR		ldx
107ec681f3Smrg#define MATH_ASM_PTR_SIZE 8
117ec681f3Smrg#include "math/m_vector_asm.h"
127117f1b4Smrg#else
137117f1b4Smrg#define LDPTR		ld
147ec681f3Smrg#define MATH_ASM_PTR_SIZE 4
157ec681f3Smrg#include "math/m_vector_asm.h"
167117f1b4Smrg#endif
177117f1b4Smrg
187117f1b4Smrg#define M0		%f16
197117f1b4Smrg#define M1		%f17
207117f1b4Smrg#define M2		%f18
217117f1b4Smrg#define M3		%f19
227117f1b4Smrg#define M4		%f20
237117f1b4Smrg#define M5		%f21
247117f1b4Smrg#define M6		%f22
257117f1b4Smrg#define M7		%f23
267117f1b4Smrg#define M8		%f24
277117f1b4Smrg#define M9		%f25
287117f1b4Smrg#define M10		%f26
297117f1b4Smrg#define M11		%f27
307117f1b4Smrg#define M12		%f28
317117f1b4Smrg#define M13		%f29
327117f1b4Smrg#define M14		%f30
337117f1b4Smrg#define M15		%f31
347117f1b4Smrg
357117f1b4Smrg#define LDMATRIX_0_1_2_3_12_13_14_15(BASE)	\
367117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
377117f1b4Smrg	ldd	[BASE + ( 2 * 0x4)], M2;	\
387117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
397117f1b4Smrg	ldd	[BASE + (14 * 0x4)], M14
407117f1b4Smrg
417117f1b4Smrg#define LDMATRIX_0_1_12_13(BASE)		\
427117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
437117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12
447117f1b4Smrg
457117f1b4Smrg#define LDMATRIX_0_12_13(BASE)			\
467117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
477117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12
487117f1b4Smrg
497117f1b4Smrg#define LDMATRIX_0_1_2_12_13_14(BASE)		\
507117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
517117f1b4Smrg	ld	[BASE + ( 2 * 0x4)], M2;	\
527117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
537117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
547117f1b4Smrg
557117f1b4Smrg#define LDMATRIX_0_12_13_14(BASE)		\
567117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
577117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
587117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
597117f1b4Smrg
607117f1b4Smrg#define LDMATRIX_0_14(BASE)			\
617117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
627117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
637117f1b4Smrg
647117f1b4Smrg#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
657117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
667117f1b4Smrg	ldd	[BASE + ( 2 * 0x4)], M2;	\
677117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
687117f1b4Smrg	ldd	[BASE + ( 6 * 0x4)], M6;	\
697117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
707117f1b4Smrg	ldd	[BASE + (14 * 0x4)], M14
717117f1b4Smrg
727117f1b4Smrg#define LDMATRIX_0_5_12_13(BASE) 		\
737117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
747117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
757117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12
767117f1b4Smrg
777117f1b4Smrg#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE)	\
787117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
797117f1b4Smrg	ldd	[BASE + ( 2 * 0x4)], M2;	\
807117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
817117f1b4Smrg	ld	[BASE + ( 6 * 0x4)], M6;	\
827117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
837117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
847117f1b4Smrg
857117f1b4Smrg#define LDMATRIX_0_5_12_13_14(BASE)		\
867117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
877117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
887117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
897117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
907117f1b4Smrg
917117f1b4Smrg#define LDMATRIX_0_5_14(BASE)			\
927117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
937117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
947117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
957117f1b4Smrg
967117f1b4Smrg#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
977117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
987117f1b4Smrg	ldd	[BASE + ( 2 * 0x4)], M2;	\
997117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
1007117f1b4Smrg	ldd	[BASE + ( 6 * 0x4)], M6;	\
1017117f1b4Smrg	ldd	[BASE + ( 8 * 0x4)], M8;	\
1027117f1b4Smrg	ldd	[BASE + (10 * 0x4)], M10;	\
1037117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
1047117f1b4Smrg	ldd	[BASE + (14 * 0x4)], M14
1057117f1b4Smrg
1067117f1b4Smrg#define LDMATRIX_0_1_4_5_12_13(BASE) 		\
1077117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
1087117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
1097117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12
1107117f1b4Smrg
1117117f1b4Smrg#define LDMATRIX_0_5_12_13(BASE) 		\
1127117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
1137117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
1147117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12
1157117f1b4Smrg
1167117f1b4Smrg#define LDMATRIX_0_1_2_4_5_6_8_9_10(BASE) \
1177117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
1187117f1b4Smrg	ld	[BASE + ( 2 * 0x4)], M2;	\
1197117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
1207117f1b4Smrg	ld	[BASE + ( 6 * 0x4)], M6;	\
1217117f1b4Smrg	ldd	[BASE + ( 8 * 0x4)], M8;	\
1227117f1b4Smrg	ld	[BASE + (10 * 0x4)], M10
1237117f1b4Smrg
1247117f1b4Smrg#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
1257117f1b4Smrg	ldd	[BASE + ( 0 * 0x4)], M0;	\
1267117f1b4Smrg	ld	[BASE + ( 2 * 0x4)], M2;	\
1277117f1b4Smrg	ldd	[BASE + ( 4 * 0x4)], M4;	\
1287117f1b4Smrg	ld	[BASE + ( 6 * 0x4)], M6;	\
1297117f1b4Smrg	ldd	[BASE + ( 8 * 0x4)], M8;	\
1307117f1b4Smrg	ld	[BASE + (10 * 0x4)], M10;	\
1317117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
1327117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
1337117f1b4Smrg
1347117f1b4Smrg#define LDMATRIX_0_5_10(BASE) 			\
1357117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
1367117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
13701e04c3fSmrg	ld	[BASE + (10 * 0x4)], M10;
1387117f1b4Smrg
1397117f1b4Smrg#define LDMATRIX_0_5_10_12_13_14(BASE) 		\
1407117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
1417117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
1427117f1b4Smrg	ld	[BASE + (10 * 0x4)], M10;	\
1437117f1b4Smrg	ldd	[BASE + (12 * 0x4)], M12;	\
1447117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
1457117f1b4Smrg
1467117f1b4Smrg#define LDMATRIX_0_5_8_9_10_14(BASE) 		\
1477117f1b4Smrg	ld	[BASE + ( 0 * 0x4)], M0;	\
1487117f1b4Smrg	ld	[BASE + ( 5 * 0x4)], M5;	\
1497117f1b4Smrg	ldd	[BASE + ( 8 * 0x4)], M8;	\
1507117f1b4Smrg	ld	[BASE + (10 * 0x4)], M10;	\
1517117f1b4Smrg	ld	[BASE + (14 * 0x4)], M14
1527117f1b4Smrg
1537117f1b4Smrg#endif /* !(_SPARC_MATRIX_H) */
154