op-2.h revision 1.1.1.7 1 1.1 mrg /* Software floating-point emulation.
2 1.1 mrg Basic two-word fraction declaration and manipulation.
3 1.1.1.7 mrg Copyright (C) 1997-2022 Free Software Foundation, Inc.
4 1.1 mrg This file is part of the GNU C Library.
5 1.1 mrg
6 1.1 mrg The GNU C Library is free software; you can redistribute it and/or
7 1.1 mrg modify it under the terms of the GNU Lesser General Public
8 1.1 mrg License as published by the Free Software Foundation; either
9 1.1 mrg version 2.1 of the License, or (at your option) any later version.
10 1.1 mrg
11 1.1 mrg In addition to the permissions in the GNU Lesser General Public
12 1.1 mrg License, the Free Software Foundation gives you unlimited
13 1.1 mrg permission to link the compiled version of this file into
14 1.1 mrg combinations with other programs, and to distribute those
15 1.1 mrg combinations without any restriction coming from the use of this
16 1.1 mrg file. (The Lesser General Public License restrictions do apply in
17 1.1 mrg other respects; for example, they cover modification of the file,
18 1.1 mrg and distribution when not linked into a combine executable.)
19 1.1 mrg
20 1.1 mrg The GNU C Library is distributed in the hope that it will be useful,
21 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
22 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 1.1 mrg Lesser General Public License for more details.
24 1.1 mrg
25 1.1 mrg You should have received a copy of the GNU Lesser General Public
26 1.1 mrg License along with the GNU C Library; if not, see
27 1.1.1.7 mrg <https://www.gnu.org/licenses/>. */
28 1.1 mrg
29 1.1.1.3 mrg #ifndef SOFT_FP_OP_2_H
30 1.1.1.3 mrg #define SOFT_FP_OP_2_H 1
31 1.1.1.3 mrg
32 1.1.1.3 mrg #define _FP_FRAC_DECL_2(X) \
33 1.1.1.3 mrg _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
34 1.1.1.2 mrg #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 1.1.1.2 mrg #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
36 1.1 mrg #define _FP_FRAC_HIGH_2(X) (X##_f1)
37 1.1 mrg #define _FP_FRAC_LOW_2(X) (X##_f0)
38 1.1.1.2 mrg #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
39 1.1 mrg
40 1.1.1.2 mrg #define _FP_FRAC_SLL_2(X, N) \
41 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
42 1.1.1.2 mrg ? ({ \
43 1.1.1.2 mrg if (__builtin_constant_p (N) && (N) == 1) \
44 1.1.1.2 mrg { \
45 1.1.1.2 mrg X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46 1.1.1.2 mrg X##_f0 += X##_f0; \
47 1.1.1.2 mrg } \
48 1.1.1.2 mrg else \
49 1.1.1.2 mrg { \
50 1.1.1.2 mrg X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 1.1.1.2 mrg X##_f0 <<= (N); \
52 1.1.1.2 mrg } \
53 1.1.1.2 mrg 0; \
54 1.1.1.2 mrg }) \
55 1.1.1.2 mrg : ({ \
56 1.1.1.2 mrg X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 1.1.1.2 mrg X##_f0 = 0; \
58 1.1.1.2 mrg }))
59 1.1.1.2 mrg
60 1.1.1.2 mrg
61 1.1.1.2 mrg #define _FP_FRAC_SRL_2(X, N) \
62 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
63 1.1.1.2 mrg ? ({ \
64 1.1.1.2 mrg X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 1.1.1.2 mrg X##_f1 >>= (N); \
66 1.1.1.2 mrg }) \
67 1.1.1.2 mrg : ({ \
68 1.1.1.2 mrg X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 1.1.1.2 mrg X##_f1 = 0; \
70 1.1.1.2 mrg }))
71 1.1 mrg
72 1.1 mrg /* Right shift with sticky-lsb. */
73 1.1.1.2 mrg #define _FP_FRAC_SRST_2(X, S, N, sz) \
74 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
75 1.1.1.2 mrg ? ({ \
76 1.1.1.2 mrg S = (__builtin_constant_p (N) && (N) == 1 \
77 1.1.1.2 mrg ? X##_f0 & 1 \
78 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 1.1.1.2 mrg X##_f1 >>= (N); \
81 1.1.1.2 mrg }) \
82 1.1.1.2 mrg : ({ \
83 1.1.1.2 mrg S = ((((N) == _FP_W_TYPE_SIZE \
84 1.1.1.2 mrg ? 0 \
85 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 1.1.1.2 mrg | X##_f0) != 0); \
87 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 1.1.1.2 mrg X##_f1 = 0; \
89 1.1.1.2 mrg }))
90 1.1.1.2 mrg
91 1.1.1.2 mrg #define _FP_FRAC_SRS_2(X, N, sz) \
92 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
93 1.1.1.2 mrg ? ({ \
94 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95 1.1.1.2 mrg | (__builtin_constant_p (N) && (N) == 1 \
96 1.1.1.2 mrg ? X##_f0 & 1 \
97 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 1.1.1.2 mrg X##_f1 >>= (N); \
99 1.1.1.2 mrg }) \
100 1.1.1.2 mrg : ({ \
101 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
102 1.1.1.2 mrg | ((((N) == _FP_W_TYPE_SIZE \
103 1.1.1.2 mrg ? 0 \
104 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 1.1.1.2 mrg | X##_f0) != 0)); \
106 1.1.1.2 mrg X##_f1 = 0; \
107 1.1.1.2 mrg }))
108 1.1.1.2 mrg
109 1.1.1.2 mrg #define _FP_FRAC_ADDI_2(X, I) \
110 1.1.1.2 mrg __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
111 1.1.1.2 mrg
112 1.1.1.2 mrg #define _FP_FRAC_ADD_2(R, X, Y) \
113 1.1.1.2 mrg __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114 1.1.1.2 mrg
115 1.1.1.2 mrg #define _FP_FRAC_SUB_2(R, X, Y) \
116 1.1.1.2 mrg __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117 1.1.1.2 mrg
118 1.1.1.2 mrg #define _FP_FRAC_DEC_2(X, Y) \
119 1.1.1.2 mrg __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
120 1.1.1.2 mrg
121 1.1.1.2 mrg #define _FP_FRAC_CLZ_2(R, X) \
122 1.1.1.2 mrg do \
123 1.1.1.2 mrg { \
124 1.1.1.2 mrg if (X##_f1) \
125 1.1.1.2 mrg __FP_CLZ ((R), X##_f1); \
126 1.1.1.2 mrg else \
127 1.1.1.2 mrg { \
128 1.1.1.2 mrg __FP_CLZ ((R), X##_f0); \
129 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \
130 1.1.1.2 mrg } \
131 1.1.1.2 mrg } \
132 1.1.1.2 mrg while (0)
133 1.1 mrg
134 1.1.1.2 mrg /* Predicates. */
135 1.1.1.2 mrg #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
136 1.1 mrg #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
137 1.1.1.2 mrg #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 1.1.1.2 mrg #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 1.1.1.2 mrg #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
140 1.1.1.2 mrg (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 1.1 mrg #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 1.1 mrg #define _FP_FRAC_GT_2(X, Y) \
143 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 1.1 mrg #define _FP_FRAC_GE_2(X, Y) \
145 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
146 1.1 mrg
147 1.1 mrg #define _FP_ZEROFRAC_2 0, 0
148 1.1 mrg #define _FP_MINFRAC_2 0, 1
149 1.1.1.2 mrg #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150 1.1 mrg
151 1.1.1.2 mrg /* Internals. */
152 1.1.1.2 mrg
153 1.1.1.2 mrg #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
154 1.1.1.2 mrg
155 1.1.1.2 mrg #define __FP_CLZ_2(R, xh, xl) \
156 1.1.1.2 mrg do \
157 1.1.1.2 mrg { \
158 1.1.1.2 mrg if (xh) \
159 1.1.1.2 mrg __FP_CLZ ((R), xh); \
160 1.1.1.2 mrg else \
161 1.1.1.2 mrg { \
162 1.1.1.2 mrg __FP_CLZ ((R), xl); \
163 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \
164 1.1.1.2 mrg } \
165 1.1.1.2 mrg } \
166 1.1.1.2 mrg while (0)
167 1.1 mrg
168 1.1 mrg #if 0
169 1.1 mrg
170 1.1.1.2 mrg # ifndef __FP_FRAC_ADDI_2
171 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) \
172 1.1 mrg (xh += ((xl += i) < i))
173 1.1.1.2 mrg # endif
174 1.1.1.2 mrg # ifndef __FP_FRAC_ADD_2
175 1.1.1.2 mrg # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
176 1.1 mrg (rh = xh + yh + ((rl = xl + yl) < xl))
177 1.1.1.2 mrg # endif
178 1.1.1.2 mrg # ifndef __FP_FRAC_SUB_2
179 1.1.1.2 mrg # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
180 1.1 mrg (rh = xh - yh - ((rl = xl - yl) > xl))
181 1.1.1.2 mrg # endif
182 1.1.1.2 mrg # ifndef __FP_FRAC_DEC_2
183 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
184 1.1.1.2 mrg do \
185 1.1.1.2 mrg { \
186 1.1.1.2 mrg UWtype __FP_FRAC_DEC_2_t = xl; \
187 1.1.1.2 mrg xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
188 1.1.1.2 mrg } \
189 1.1.1.2 mrg while (0)
190 1.1.1.2 mrg # endif
191 1.1 mrg
192 1.1 mrg #else
193 1.1 mrg
194 1.1.1.2 mrg # undef __FP_FRAC_ADDI_2
195 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
196 1.1.1.2 mrg # undef __FP_FRAC_ADD_2
197 1.1.1.2 mrg # define __FP_FRAC_ADD_2 add_ssaaaa
198 1.1.1.2 mrg # undef __FP_FRAC_SUB_2
199 1.1.1.2 mrg # define __FP_FRAC_SUB_2 sub_ddmmss
200 1.1.1.2 mrg # undef __FP_FRAC_DEC_2
201 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
202 1.1.1.2 mrg sub_ddmmss (xh, xl, xh, xl, yh, yl)
203 1.1 mrg
204 1.1 mrg #endif
205 1.1 mrg
206 1.1.1.2 mrg /* Unpack the raw bits of a native fp value. Do not classify or
207 1.1.1.2 mrg normalize the data. */
208 1.1 mrg
209 1.1 mrg #define _FP_UNPACK_RAW_2(fs, X, val) \
210 1.1.1.2 mrg do \
211 1.1.1.2 mrg { \
212 1.1.1.2 mrg union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
213 1.1.1.2 mrg _FP_UNPACK_RAW_2_flo.flt = (val); \
214 1.1 mrg \
215 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
216 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
217 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
218 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
219 1.1.1.2 mrg } \
220 1.1.1.2 mrg while (0)
221 1.1 mrg
222 1.1 mrg #define _FP_UNPACK_RAW_2_P(fs, X, val) \
223 1.1.1.2 mrg do \
224 1.1.1.2 mrg { \
225 1.1.1.2 mrg union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
226 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \
227 1.1 mrg \
228 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
229 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
230 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
231 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
232 1.1.1.2 mrg } \
233 1.1.1.2 mrg while (0)
234 1.1.1.2 mrg
235 1.1.1.2 mrg
236 1.1.1.2 mrg /* Repack the raw bits of a native fp value. */
237 1.1.1.2 mrg
238 1.1.1.2 mrg #define _FP_PACK_RAW_2(fs, val, X) \
239 1.1.1.2 mrg do \
240 1.1.1.2 mrg { \
241 1.1.1.2 mrg union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
242 1.1.1.2 mrg \
243 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
244 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
245 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
246 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
247 1.1.1.2 mrg \
248 1.1.1.2 mrg (val) = _FP_PACK_RAW_2_flo.flt; \
249 1.1.1.2 mrg } \
250 1.1.1.2 mrg while (0)
251 1.1 mrg
252 1.1 mrg #define _FP_PACK_RAW_2_P(fs, val, X) \
253 1.1.1.2 mrg do \
254 1.1.1.2 mrg { \
255 1.1.1.2 mrg union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
256 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \
257 1.1 mrg \
258 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
259 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
260 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
261 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
262 1.1.1.2 mrg } \
263 1.1.1.2 mrg while (0)
264 1.1 mrg
265 1.1 mrg
266 1.1.1.2 mrg /* Multiplication algorithms: */
267 1.1 mrg
268 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
269 1.1 mrg
270 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
271 1.1.1.2 mrg do \
272 1.1.1.2 mrg { \
273 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
274 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
275 1.1.1.2 mrg \
276 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
277 1.1.1.2 mrg X##_f0, Y##_f0); \
278 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
279 1.1.1.2 mrg X##_f0, Y##_f1); \
280 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
281 1.1.1.2 mrg X##_f1, Y##_f0); \
282 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
283 1.1.1.2 mrg X##_f1, Y##_f1); \
284 1.1.1.2 mrg \
285 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
286 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
287 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f1, \
288 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f0, \
289 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \
291 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
292 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
293 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f1, \
294 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f0, \
295 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \
297 1.1.1.2 mrg } \
298 1.1.1.2 mrg while (0)
299 1.1.1.2 mrg
300 1.1 mrg #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
301 1.1.1.2 mrg do \
302 1.1.1.2 mrg { \
303 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
304 1.1.1.2 mrg \
305 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
306 1.1.1.2 mrg X, Y, doit); \
307 1.1.1.2 mrg \
308 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
309 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
310 1.1.1.2 mrg at either 2B or 2B-1. */ \
311 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
312 1.1.1.2 mrg 2*(wfracbits)); \
313 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
314 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
315 1.1.1.2 mrg } \
316 1.1.1.2 mrg while (0)
317 1.1 mrg
318 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
319 1.1 mrg Do only 3 multiplications instead of four. This one is for machines
320 1.1 mrg where multiplication is much more expensive than subtraction. */
321 1.1 mrg
322 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
323 1.1.1.2 mrg do \
324 1.1.1.2 mrg { \
325 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
326 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
327 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
328 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
329 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
330 1.1.1.2 mrg \
331 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
332 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
333 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
334 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
335 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
336 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
337 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
338 1.1.1.2 mrg X##_f0, Y##_f0); \
339 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
340 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
341 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
342 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
343 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
344 1.1.1.2 mrg \
345 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
346 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
347 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
348 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
349 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
350 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \
351 1.1.1.2 mrg (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
352 1.1.1.2 mrg & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
353 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_d, \
354 1.1.1.2 mrg 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
355 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
356 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
357 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
358 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
359 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
360 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \
361 1.1.1.2 mrg 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
362 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 0)); \
363 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
365 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
366 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
367 1.1.1.2 mrg __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
368 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
369 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
370 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
371 1.1.1.2 mrg } \
372 1.1.1.2 mrg while (0)
373 1.1.1.2 mrg
374 1.1 mrg #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
375 1.1.1.2 mrg do \
376 1.1.1.2 mrg { \
377 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
378 1.1.1.2 mrg \
379 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
380 1.1.1.2 mrg _FP_MUL_MEAT_2_wide_3mul_z, \
381 1.1.1.2 mrg X, Y, doit); \
382 1.1.1.2 mrg \
383 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
384 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
385 1.1.1.2 mrg at either 2B or 2B-1. */ \
386 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
387 1.1.1.2 mrg (wfracbits)-1, 2*(wfracbits)); \
388 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
389 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
390 1.1.1.2 mrg } \
391 1.1.1.2 mrg while (0)
392 1.1.1.2 mrg
393 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
394 1.1.1.2 mrg do \
395 1.1.1.2 mrg { \
396 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
397 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
398 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
399 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
400 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
401 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
402 1.1.1.2 mrg \
403 1.1.1.2 mrg mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
404 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y, 2); \
405 1.1.1.2 mrg } \
406 1.1.1.2 mrg while (0)
407 1.1 mrg
408 1.1 mrg #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
409 1.1.1.2 mrg do \
410 1.1.1.2 mrg { \
411 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
412 1.1.1.2 mrg \
413 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
414 1.1.1.2 mrg \
415 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
416 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
417 1.1.1.2 mrg at either 2B or 2B-1. */ \
418 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
419 1.1.1.2 mrg 2*(wfracbits)); \
420 1.1.1.2 mrg R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
421 1.1.1.2 mrg R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
422 1.1.1.2 mrg } \
423 1.1.1.2 mrg while (0)
424 1.1 mrg
425 1.1 mrg /* Do at most 120x120=240 bits multiplication using double floating
426 1.1 mrg point multiplication. This is useful if floating point
427 1.1 mrg multiplication has much bigger throughput than integer multiply.
428 1.1 mrg It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
429 1.1.1.2 mrg between 106 and 120 only.
430 1.1 mrg Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
431 1.1 mrg SETFETZ is a macro which will disable all FPU exceptions and set rounding
432 1.1 mrg towards zero, RESETFE should optionally reset it back. */
433 1.1 mrg
434 1.1.1.2 mrg #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
435 1.1.1.2 mrg do \
436 1.1.1.2 mrg { \
437 1.1.1.2 mrg static const double _const[] = \
438 1.1.1.2 mrg { \
439 1.1.1.2 mrg /* 2^-24 */ 5.9604644775390625e-08, \
440 1.1.1.2 mrg /* 2^-48 */ 3.5527136788005009e-15, \
441 1.1.1.2 mrg /* 2^-72 */ 2.1175823681357508e-22, \
442 1.1.1.2 mrg /* 2^-96 */ 1.2621774483536189e-29, \
443 1.1.1.2 mrg /* 2^28 */ 2.68435456e+08, \
444 1.1.1.2 mrg /* 2^4 */ 1.600000e+01, \
445 1.1.1.2 mrg /* 2^-20 */ 9.5367431640625e-07, \
446 1.1.1.2 mrg /* 2^-44 */ 5.6843418860808015e-14, \
447 1.1.1.2 mrg /* 2^-68 */ 3.3881317890172014e-21, \
448 1.1.1.2 mrg /* 2^-92 */ 2.0194839173657902e-28, \
449 1.1.1.2 mrg /* 2^-116 */ 1.2037062152420224e-35 \
450 1.1.1.2 mrg }; \
451 1.1.1.2 mrg double _a240, _b240, _c240, _d240, _e240, _f240, \
452 1.1.1.2 mrg _g240, _h240, _i240, _j240, _k240; \
453 1.1.1.2 mrg union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
454 1.1.1.2 mrg _p240, _q240, _r240, _s240; \
455 1.1.1.2 mrg UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
456 1.1.1.2 mrg \
457 1.1.1.3 mrg _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
458 1.1.1.3 mrg "wfracbits out of range"); \
459 1.1.1.2 mrg \
460 1.1.1.2 mrg setfetz; \
461 1.1.1.2 mrg \
462 1.1.1.2 mrg _e240 = (double) (long) (X##_f0 & 0xffffff); \
463 1.1.1.2 mrg _j240 = (double) (long) (Y##_f0 & 0xffffff); \
464 1.1.1.2 mrg _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
465 1.1.1.2 mrg _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
466 1.1.1.2 mrg _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
467 1.1.1.2 mrg _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
468 1.1.1.2 mrg _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
469 1.1.1.2 mrg _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
470 1.1.1.2 mrg _a240 = (double) (long) (X##_f1 >> 32); \
471 1.1.1.2 mrg _f240 = (double) (long) (Y##_f1 >> 32); \
472 1.1.1.2 mrg _e240 *= _const[3]; \
473 1.1.1.2 mrg _j240 *= _const[3]; \
474 1.1.1.2 mrg _d240 *= _const[2]; \
475 1.1.1.2 mrg _i240 *= _const[2]; \
476 1.1.1.2 mrg _c240 *= _const[1]; \
477 1.1.1.2 mrg _h240 *= _const[1]; \
478 1.1.1.2 mrg _b240 *= _const[0]; \
479 1.1.1.2 mrg _g240 *= _const[0]; \
480 1.1.1.2 mrg _s240.d = _e240*_j240; \
481 1.1.1.2 mrg _r240.d = _d240*_j240 + _e240*_i240; \
482 1.1.1.2 mrg _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
483 1.1.1.2 mrg _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
484 1.1.1.2 mrg _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
485 1.1.1.2 mrg _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
486 1.1.1.2 mrg _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
487 1.1.1.2 mrg _l240.d = _a240*_g240 + _b240*_f240; \
488 1.1.1.2 mrg _k240 = _a240*_f240; \
489 1.1.1.2 mrg _r240.d += _s240.d; \
490 1.1.1.2 mrg _q240.d += _r240.d; \
491 1.1.1.2 mrg _p240.d += _q240.d; \
492 1.1.1.2 mrg _o240.d += _p240.d; \
493 1.1.1.2 mrg _n240.d += _o240.d; \
494 1.1.1.2 mrg _m240.d += _n240.d; \
495 1.1.1.2 mrg _l240.d += _m240.d; \
496 1.1.1.2 mrg _k240 += _l240.d; \
497 1.1.1.2 mrg _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
498 1.1.1.2 mrg _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
499 1.1.1.2 mrg _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
500 1.1.1.2 mrg _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
501 1.1.1.2 mrg _o240.d += _const[7]; \
502 1.1.1.2 mrg _n240.d += _const[6]; \
503 1.1.1.2 mrg _m240.d += _const[5]; \
504 1.1.1.2 mrg _l240.d += _const[4]; \
505 1.1.1.2 mrg if (_s240.d != 0.0) \
506 1.1.1.2 mrg _y240 = 1; \
507 1.1.1.2 mrg if (_r240.d != 0.0) \
508 1.1.1.2 mrg _y240 = 1; \
509 1.1.1.2 mrg if (_q240.d != 0.0) \
510 1.1.1.2 mrg _y240 = 1; \
511 1.1.1.2 mrg if (_p240.d != 0.0) \
512 1.1.1.2 mrg _y240 = 1; \
513 1.1.1.2 mrg _t240 = (DItype) _k240; \
514 1.1.1.2 mrg _u240 = _l240.i; \
515 1.1.1.2 mrg _v240 = _m240.i; \
516 1.1.1.2 mrg _w240 = _n240.i; \
517 1.1.1.2 mrg _x240 = _o240.i; \
518 1.1.1.2 mrg R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
519 1.1.1.2 mrg | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
520 1.1.1.2 mrg R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
521 1.1.1.2 mrg | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
522 1.1.1.2 mrg | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
523 1.1.1.2 mrg | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
524 1.1.1.2 mrg | _y240); \
525 1.1.1.2 mrg resetfe; \
526 1.1.1.2 mrg } \
527 1.1.1.2 mrg while (0)
528 1.1.1.2 mrg
529 1.1.1.2 mrg /* Division algorithms: */
530 1.1 mrg
531 1.1 mrg #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
532 1.1.1.2 mrg do \
533 1.1.1.2 mrg { \
534 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
535 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
536 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
537 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
538 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
539 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
540 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
541 1.1.1.2 mrg if (_FP_FRAC_GE_2 (X, Y)) \
542 1.1.1.2 mrg { \
543 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
544 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 \
545 1.1.1.2 mrg = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
546 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 \
547 1.1.1.2 mrg = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
548 1.1.1.2 mrg } \
549 1.1.1.2 mrg else \
550 1.1.1.2 mrg { \
551 1.1.1.2 mrg R##_e--; \
552 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
553 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
554 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
555 1.1.1.2 mrg } \
556 1.1.1.2 mrg \
557 1.1.1.2 mrg /* Normalize, i.e. make the most significant bit of the \
558 1.1.1.2 mrg denominator set. */ \
559 1.1.1.2 mrg _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
560 1.1.1.2 mrg \
561 1.1.1.2 mrg udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
562 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
563 1.1.1.2 mrg Y##_f1); \
564 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
565 1.1.1.2 mrg R##_f1, Y##_f0); \
566 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
567 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
568 1.1.1.2 mrg { \
569 1.1.1.2 mrg R##_f1--; \
570 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
571 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
572 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
573 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
574 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
575 1.1.1.2 mrg { \
576 1.1.1.2 mrg R##_f1--; \
577 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
578 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
579 1.1.1.2 mrg } \
580 1.1.1.2 mrg } \
581 1.1.1.2 mrg _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
582 1.1.1.2 mrg \
583 1.1.1.2 mrg if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
584 1.1.1.2 mrg { \
585 1.1.1.2 mrg /* This is a special case, not an optimization \
586 1.1.1.2 mrg (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
587 1.1.1.2 mrg As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
588 1.1.1.2 mrg R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
589 1.1.1.2 mrg know what kind of bits it is (sticky, guard, round), \
590 1.1.1.2 mrg we don't care. We also don't care what the reminder is, \
591 1.1.1.2 mrg because the guard bit will be set anyway. -jj */ \
592 1.1.1.2 mrg R##_f0 = -1; \
593 1.1.1.2 mrg } \
594 1.1.1.2 mrg else \
595 1.1.1.2 mrg { \
596 1.1.1.2 mrg udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
597 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f1, \
598 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
599 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
600 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
601 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = 0; \
602 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
603 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
604 1.1.1.2 mrg { \
605 1.1.1.2 mrg R##_f0--; \
606 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
607 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
608 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
609 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
610 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
611 1.1.1.2 mrg { \
612 1.1.1.2 mrg R##_f0--; \
613 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
614 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
615 1.1.1.2 mrg } \
616 1.1.1.2 mrg } \
617 1.1.1.2 mrg if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
618 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m)) \
619 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \
620 1.1.1.2 mrg } \
621 1.1.1.2 mrg } \
622 1.1.1.2 mrg while (0)
623 1.1.1.2 mrg
624 1.1.1.2 mrg
625 1.1.1.2 mrg /* Square root algorithms:
626 1.1.1.2 mrg We have just one right now, maybe Newton approximation
627 1.1.1.2 mrg should be added for those machines where division is fast. */
628 1.1.1.2 mrg
629 1.1.1.2 mrg #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
630 1.1.1.2 mrg do \
631 1.1.1.2 mrg { \
632 1.1.1.2 mrg while (q) \
633 1.1.1.2 mrg { \
634 1.1.1.2 mrg T##_f1 = S##_f1 + (q); \
635 1.1.1.2 mrg if (T##_f1 <= X##_f1) \
636 1.1.1.2 mrg { \
637 1.1.1.2 mrg S##_f1 = T##_f1 + (q); \
638 1.1.1.2 mrg X##_f1 -= T##_f1; \
639 1.1.1.2 mrg R##_f1 += (q); \
640 1.1.1.2 mrg } \
641 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \
642 1.1.1.2 mrg (q) >>= 1; \
643 1.1.1.2 mrg } \
644 1.1.1.2 mrg (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
645 1.1.1.2 mrg while ((q) != _FP_WORK_ROUND) \
646 1.1.1.2 mrg { \
647 1.1.1.2 mrg T##_f0 = S##_f0 + (q); \
648 1.1.1.2 mrg T##_f1 = S##_f1; \
649 1.1.1.2 mrg if (T##_f1 < X##_f1 \
650 1.1.1.2 mrg || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
651 1.1.1.2 mrg { \
652 1.1.1.2 mrg S##_f0 = T##_f0 + (q); \
653 1.1.1.2 mrg S##_f1 += (T##_f0 > S##_f0); \
654 1.1.1.2 mrg _FP_FRAC_DEC_2 (X, T); \
655 1.1.1.2 mrg R##_f0 += (q); \
656 1.1.1.2 mrg } \
657 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \
658 1.1.1.2 mrg (q) >>= 1; \
659 1.1.1.2 mrg } \
660 1.1.1.2 mrg if (X##_f0 | X##_f1) \
661 1.1.1.2 mrg { \
662 1.1.1.2 mrg if (S##_f1 < X##_f1 \
663 1.1.1.2 mrg || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
664 1.1.1.2 mrg R##_f0 |= _FP_WORK_ROUND; \
665 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \
666 1.1.1.2 mrg } \
667 1.1.1.2 mrg } \
668 1.1.1.2 mrg while (0)
669 1.1.1.2 mrg
670 1.1.1.2 mrg
671 1.1.1.2 mrg /* Assembly/disassembly for converting to/from integral types.
672 1.1.1.2 mrg No shifting or overflow handled here. */
673 1.1 mrg
674 1.1 mrg #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
675 1.1.1.2 mrg (void) (((rsize) <= _FP_W_TYPE_SIZE) \
676 1.1.1.2 mrg ? ({ (r) = X##_f0; }) \
677 1.1.1.2 mrg : ({ \
678 1.1.1.2 mrg (r) = X##_f1; \
679 1.1.1.2 mrg (r) <<= _FP_W_TYPE_SIZE; \
680 1.1.1.2 mrg (r) += X##_f0; \
681 1.1.1.2 mrg }))
682 1.1.1.2 mrg
683 1.1.1.2 mrg #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
684 1.1.1.2 mrg do \
685 1.1.1.2 mrg { \
686 1.1.1.2 mrg X##_f0 = (r); \
687 1.1.1.2 mrg X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
688 1.1.1.2 mrg ? 0 \
689 1.1.1.2 mrg : (r) >> _FP_W_TYPE_SIZE); \
690 1.1.1.2 mrg } \
691 1.1.1.2 mrg while (0)
692 1.1.1.2 mrg
693 1.1.1.2 mrg /* Convert FP values between word sizes. */
694 1.1 mrg
695 1.1 mrg #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
696 1.1 mrg
697 1.1 mrg #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
698 1.1 mrg
699 1.1.1.2 mrg #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
700 1.1.1.3 mrg
701 1.1.1.3 mrg #endif /* !SOFT_FP_OP_2_H */
702