1/* 2 * SPARC assembly matrix code. 3 */ 4 5#ifndef _SPARC_MATRIX_H 6#define _SPARC_MATRIX_H 7 8#ifdef __arch64__ 9#define LDPTR ldx 10#define MATH_ASM_PTR_SIZE 8 11#include "math/m_vector_asm.h" 12#else 13#define LDPTR ld 14#define MATH_ASM_PTR_SIZE 4 15#include "math/m_vector_asm.h" 16#endif 17 18#define M0 %f16 19#define M1 %f17 20#define M2 %f18 21#define M3 %f19 22#define M4 %f20 23#define M5 %f21 24#define M6 %f22 25#define M7 %f23 26#define M8 %f24 27#define M9 %f25 28#define M10 %f26 29#define M11 %f27 30#define M12 %f28 31#define M13 %f29 32#define M14 %f30 33#define M15 %f31 34 35#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \ 36 ldd [BASE + ( 0 * 0x4)], M0; \ 37 ldd [BASE + ( 2 * 0x4)], M2; \ 38 ldd [BASE + (12 * 0x4)], M12; \ 39 ldd [BASE + (14 * 0x4)], M14 40 41#define LDMATRIX_0_1_12_13(BASE) \ 42 ldd [BASE + ( 0 * 0x4)], M0; \ 43 ldd [BASE + (12 * 0x4)], M12 44 45#define LDMATRIX_0_12_13(BASE) \ 46 ld [BASE + ( 0 * 0x4)], M0; \ 47 ldd [BASE + (12 * 0x4)], M12 48 49#define LDMATRIX_0_1_2_12_13_14(BASE) \ 50 ldd [BASE + ( 0 * 0x4)], M0; \ 51 ld [BASE + ( 2 * 0x4)], M2; \ 52 ldd [BASE + (12 * 0x4)], M12; \ 53 ld [BASE + (14 * 0x4)], M14 54 55#define LDMATRIX_0_12_13_14(BASE) \ 56 ld [BASE + ( 0 * 0x4)], M0; \ 57 ldd [BASE + (12 * 0x4)], M12; \ 58 ld [BASE + (14 * 0x4)], M14 59 60#define LDMATRIX_0_14(BASE) \ 61 ld [BASE + ( 0 * 0x4)], M0; \ 62 ld [BASE + (14 * 0x4)], M14 63 64#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \ 65 ldd [BASE + ( 0 * 0x4)], M0; \ 66 ldd [BASE + ( 2 * 0x4)], M2; \ 67 ldd [BASE + ( 4 * 0x4)], M4; \ 68 ldd [BASE + ( 6 * 0x4)], M6; \ 69 ldd [BASE + (12 * 0x4)], M12; \ 70 ldd [BASE + (14 * 0x4)], M14 71 72#define LDMATRIX_0_5_12_13(BASE) \ 73 ld [BASE + ( 0 * 0x4)], M0; \ 74 ld [BASE + ( 5 * 0x4)], M5; \ 75 ldd [BASE + (12 * 0x4)], M12 76 77#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \ 78 ldd [BASE + ( 0 * 0x4)], M0; \ 79 ldd [BASE + ( 2 * 0x4)], M2; \ 80 ldd [BASE + ( 4 * 0x4)], M4; \ 81 ld [BASE + ( 6 * 0x4)], M6; \ 82 ldd [BASE + (12 * 0x4)], M12; \ 83 ld [BASE + (14 * 0x4)], M14 84 85#define LDMATRIX_0_5_12_13_14(BASE) \ 86 ld [BASE + ( 0 * 0x4)], M0; \ 87 ld [BASE + ( 5 * 0x4)], M5; \ 88 ldd [BASE + (12 * 0x4)], M12; \ 89 ld [BASE + (14 * 0x4)], M14 90 91#define LDMATRIX_0_5_14(BASE) \ 92 ld [BASE + ( 0 * 0x4)], M0; \ 93 ld [BASE + ( 5 * 0x4)], M5; \ 94 ld [BASE + (14 * 0x4)], M14 95 96#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \ 97 ldd [BASE + ( 0 * 0x4)], M0; \ 98 ldd [BASE + ( 2 * 0x4)], M2; \ 99 ldd [BASE + ( 4 * 0x4)], M4; \ 100 ldd [BASE + ( 6 * 0x4)], M6; \ 101 ldd [BASE + ( 8 * 0x4)], M8; \ 102 ldd [BASE + (10 * 0x4)], M10; \ 103 ldd [BASE + (12 * 0x4)], M12; \ 104 ldd [BASE + (14 * 0x4)], M14 105 106#define LDMATRIX_0_1_4_5_12_13(BASE) \ 107 ldd [BASE + ( 0 * 0x4)], M0; \ 108 ldd [BASE + ( 4 * 0x4)], M4; \ 109 ldd [BASE + (12 * 0x4)], M12 110 111#define LDMATRIX_0_5_12_13(BASE) \ 112 ld [BASE + ( 0 * 0x4)], M0; \ 113 ld [BASE + ( 5 * 0x4)], M5; \ 114 ldd [BASE + (12 * 0x4)], M12 115 116#define LDMATRIX_0_1_2_4_5_6_8_9_10(BASE) \ 117 ldd [BASE + ( 0 * 0x4)], M0; \ 118 ld [BASE + ( 2 * 0x4)], M2; \ 119 ldd [BASE + ( 4 * 0x4)], M4; \ 120 ld [BASE + ( 6 * 0x4)], M6; \ 121 ldd [BASE + ( 8 * 0x4)], M8; \ 122 ld [BASE + (10 * 0x4)], M10 123 124#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \ 125 ldd [BASE + ( 0 * 0x4)], M0; \ 126 ld [BASE + ( 2 * 0x4)], M2; \ 127 ldd [BASE + ( 4 * 0x4)], M4; \ 128 ld [BASE + ( 6 * 0x4)], M6; \ 129 ldd [BASE + ( 8 * 0x4)], M8; \ 130 ld [BASE + (10 * 0x4)], M10; \ 131 ldd [BASE + (12 * 0x4)], M12; \ 132 ld [BASE + (14 * 0x4)], M14 133 134#define LDMATRIX_0_5_10(BASE) \ 135 ld [BASE + ( 0 * 0x4)], M0; \ 136 ld [BASE + ( 5 * 0x4)], M5; \ 137 ld [BASE + (10 * 0x4)], M10; 138 139#define LDMATRIX_0_5_10_12_13_14(BASE) \ 140 ld [BASE + ( 0 * 0x4)], M0; \ 141 ld [BASE + ( 5 * 0x4)], M5; \ 142 ld [BASE + (10 * 0x4)], M10; \ 143 ldd [BASE + (12 * 0x4)], M12; \ 144 ld [BASE + (14 * 0x4)], M14 145 146#define LDMATRIX_0_5_8_9_10_14(BASE) \ 147 ld [BASE + ( 0 * 0x4)], M0; \ 148 ld [BASE + ( 5 * 0x4)], M5; \ 149 ldd [BASE + ( 8 * 0x4)], M8; \ 150 ld [BASE + (10 * 0x4)], M10; \ 151 ld [BASE + (14 * 0x4)], M14 152 153#endif /* !(_SPARC_MATRIX_H) */ 154