op-2.h revision 1.1.1.3 1 1.1 mrg /* Software floating-point emulation.
2 1.1 mrg Basic two-word fraction declaration and manipulation.
3 1.1.1.3 mrg Copyright (C) 1997-2015 Free Software Foundation, Inc.
4 1.1 mrg This file is part of the GNU C Library.
5 1.1 mrg Contributed by Richard Henderson (rth (at) cygnus.com),
6 1.1 mrg Jakub Jelinek (jj (at) ultra.linux.cz),
7 1.1 mrg David S. Miller (davem (at) redhat.com) and
8 1.1 mrg Peter Maydell (pmaydell (at) chiark.greenend.org.uk).
9 1.1 mrg
10 1.1 mrg The GNU C Library is free software; you can redistribute it and/or
11 1.1 mrg modify it under the terms of the GNU Lesser General Public
12 1.1 mrg License as published by the Free Software Foundation; either
13 1.1 mrg version 2.1 of the License, or (at your option) any later version.
14 1.1 mrg
15 1.1 mrg In addition to the permissions in the GNU Lesser General Public
16 1.1 mrg License, the Free Software Foundation gives you unlimited
17 1.1 mrg permission to link the compiled version of this file into
18 1.1 mrg combinations with other programs, and to distribute those
19 1.1 mrg combinations without any restriction coming from the use of this
20 1.1 mrg file. (The Lesser General Public License restrictions do apply in
21 1.1 mrg other respects; for example, they cover modification of the file,
22 1.1 mrg and distribution when not linked into a combine executable.)
23 1.1 mrg
24 1.1 mrg The GNU C Library is distributed in the hope that it will be useful,
25 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
26 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 1.1 mrg Lesser General Public License for more details.
28 1.1 mrg
29 1.1 mrg You should have received a copy of the GNU Lesser General Public
30 1.1 mrg License along with the GNU C Library; if not, see
31 1.1 mrg <http://www.gnu.org/licenses/>. */
32 1.1 mrg
33 1.1.1.3 mrg #ifndef SOFT_FP_OP_2_H
34 1.1.1.3 mrg #define SOFT_FP_OP_2_H 1
35 1.1.1.3 mrg
36 1.1.1.3 mrg #define _FP_FRAC_DECL_2(X) \
37 1.1.1.3 mrg _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
38 1.1.1.2 mrg #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
39 1.1.1.2 mrg #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
40 1.1 mrg #define _FP_FRAC_HIGH_2(X) (X##_f1)
41 1.1 mrg #define _FP_FRAC_LOW_2(X) (X##_f0)
42 1.1.1.2 mrg #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
43 1.1 mrg
44 1.1.1.2 mrg #define _FP_FRAC_SLL_2(X, N) \
45 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
46 1.1.1.2 mrg ? ({ \
47 1.1.1.2 mrg if (__builtin_constant_p (N) && (N) == 1) \
48 1.1.1.2 mrg { \
49 1.1.1.2 mrg X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
50 1.1.1.2 mrg X##_f0 += X##_f0; \
51 1.1.1.2 mrg } \
52 1.1.1.2 mrg else \
53 1.1.1.2 mrg { \
54 1.1.1.2 mrg X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
55 1.1.1.2 mrg X##_f0 <<= (N); \
56 1.1.1.2 mrg } \
57 1.1.1.2 mrg 0; \
58 1.1.1.2 mrg }) \
59 1.1.1.2 mrg : ({ \
60 1.1.1.2 mrg X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
61 1.1.1.2 mrg X##_f0 = 0; \
62 1.1.1.2 mrg }))
63 1.1.1.2 mrg
64 1.1.1.2 mrg
65 1.1.1.2 mrg #define _FP_FRAC_SRL_2(X, N) \
66 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
67 1.1.1.2 mrg ? ({ \
68 1.1.1.2 mrg X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
69 1.1.1.2 mrg X##_f1 >>= (N); \
70 1.1.1.2 mrg }) \
71 1.1.1.2 mrg : ({ \
72 1.1.1.2 mrg X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
73 1.1.1.2 mrg X##_f1 = 0; \
74 1.1.1.2 mrg }))
75 1.1 mrg
76 1.1 mrg /* Right shift with sticky-lsb. */
77 1.1.1.2 mrg #define _FP_FRAC_SRST_2(X, S, N, sz) \
78 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
79 1.1.1.2 mrg ? ({ \
80 1.1.1.2 mrg S = (__builtin_constant_p (N) && (N) == 1 \
81 1.1.1.2 mrg ? X##_f0 & 1 \
82 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
83 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
84 1.1.1.2 mrg X##_f1 >>= (N); \
85 1.1.1.2 mrg }) \
86 1.1.1.2 mrg : ({ \
87 1.1.1.2 mrg S = ((((N) == _FP_W_TYPE_SIZE \
88 1.1.1.2 mrg ? 0 \
89 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
90 1.1.1.2 mrg | X##_f0) != 0); \
91 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
92 1.1.1.2 mrg X##_f1 = 0; \
93 1.1.1.2 mrg }))
94 1.1.1.2 mrg
95 1.1.1.2 mrg #define _FP_FRAC_SRS_2(X, N, sz) \
96 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \
97 1.1.1.2 mrg ? ({ \
98 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
99 1.1.1.2 mrg | (__builtin_constant_p (N) && (N) == 1 \
100 1.1.1.2 mrg ? X##_f0 & 1 \
101 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
102 1.1.1.2 mrg X##_f1 >>= (N); \
103 1.1.1.2 mrg }) \
104 1.1.1.2 mrg : ({ \
105 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
106 1.1.1.2 mrg | ((((N) == _FP_W_TYPE_SIZE \
107 1.1.1.2 mrg ? 0 \
108 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
109 1.1.1.2 mrg | X##_f0) != 0)); \
110 1.1.1.2 mrg X##_f1 = 0; \
111 1.1.1.2 mrg }))
112 1.1.1.2 mrg
113 1.1.1.2 mrg #define _FP_FRAC_ADDI_2(X, I) \
114 1.1.1.2 mrg __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
115 1.1.1.2 mrg
116 1.1.1.2 mrg #define _FP_FRAC_ADD_2(R, X, Y) \
117 1.1.1.2 mrg __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 1.1.1.2 mrg
119 1.1.1.2 mrg #define _FP_FRAC_SUB_2(R, X, Y) \
120 1.1.1.2 mrg __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
121 1.1.1.2 mrg
122 1.1.1.2 mrg #define _FP_FRAC_DEC_2(X, Y) \
123 1.1.1.2 mrg __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
124 1.1.1.2 mrg
125 1.1.1.2 mrg #define _FP_FRAC_CLZ_2(R, X) \
126 1.1.1.2 mrg do \
127 1.1.1.2 mrg { \
128 1.1.1.2 mrg if (X##_f1) \
129 1.1.1.2 mrg __FP_CLZ ((R), X##_f1); \
130 1.1.1.2 mrg else \
131 1.1.1.2 mrg { \
132 1.1.1.2 mrg __FP_CLZ ((R), X##_f0); \
133 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \
134 1.1.1.2 mrg } \
135 1.1.1.2 mrg } \
136 1.1.1.2 mrg while (0)
137 1.1 mrg
138 1.1.1.2 mrg /* Predicates. */
139 1.1.1.2 mrg #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
140 1.1 mrg #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
141 1.1.1.2 mrg #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
142 1.1.1.2 mrg #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
143 1.1.1.2 mrg #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
144 1.1.1.2 mrg (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
145 1.1 mrg #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
146 1.1 mrg #define _FP_FRAC_GT_2(X, Y) \
147 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
148 1.1 mrg #define _FP_FRAC_GE_2(X, Y) \
149 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
150 1.1 mrg
151 1.1 mrg #define _FP_ZEROFRAC_2 0, 0
152 1.1 mrg #define _FP_MINFRAC_2 0, 1
153 1.1.1.2 mrg #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
154 1.1 mrg
155 1.1.1.2 mrg /* Internals. */
156 1.1.1.2 mrg
157 1.1.1.2 mrg #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
158 1.1.1.2 mrg
159 1.1.1.2 mrg #define __FP_CLZ_2(R, xh, xl) \
160 1.1.1.2 mrg do \
161 1.1.1.2 mrg { \
162 1.1.1.2 mrg if (xh) \
163 1.1.1.2 mrg __FP_CLZ ((R), xh); \
164 1.1.1.2 mrg else \
165 1.1.1.2 mrg { \
166 1.1.1.2 mrg __FP_CLZ ((R), xl); \
167 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \
168 1.1.1.2 mrg } \
169 1.1.1.2 mrg } \
170 1.1.1.2 mrg while (0)
171 1.1 mrg
172 1.1 mrg #if 0
173 1.1 mrg
174 1.1.1.2 mrg # ifndef __FP_FRAC_ADDI_2
175 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) \
176 1.1 mrg (xh += ((xl += i) < i))
177 1.1.1.2 mrg # endif
178 1.1.1.2 mrg # ifndef __FP_FRAC_ADD_2
179 1.1.1.2 mrg # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
180 1.1 mrg (rh = xh + yh + ((rl = xl + yl) < xl))
181 1.1.1.2 mrg # endif
182 1.1.1.2 mrg # ifndef __FP_FRAC_SUB_2
183 1.1.1.2 mrg # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
184 1.1 mrg (rh = xh - yh - ((rl = xl - yl) > xl))
185 1.1.1.2 mrg # endif
186 1.1.1.2 mrg # ifndef __FP_FRAC_DEC_2
187 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
188 1.1.1.2 mrg do \
189 1.1.1.2 mrg { \
190 1.1.1.2 mrg UWtype __FP_FRAC_DEC_2_t = xl; \
191 1.1.1.2 mrg xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
192 1.1.1.2 mrg } \
193 1.1.1.2 mrg while (0)
194 1.1.1.2 mrg # endif
195 1.1 mrg
196 1.1 mrg #else
197 1.1 mrg
198 1.1.1.2 mrg # undef __FP_FRAC_ADDI_2
199 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
200 1.1.1.2 mrg # undef __FP_FRAC_ADD_2
201 1.1.1.2 mrg # define __FP_FRAC_ADD_2 add_ssaaaa
202 1.1.1.2 mrg # undef __FP_FRAC_SUB_2
203 1.1.1.2 mrg # define __FP_FRAC_SUB_2 sub_ddmmss
204 1.1.1.2 mrg # undef __FP_FRAC_DEC_2
205 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
206 1.1.1.2 mrg sub_ddmmss (xh, xl, xh, xl, yh, yl)
207 1.1 mrg
208 1.1 mrg #endif
209 1.1 mrg
210 1.1.1.2 mrg /* Unpack the raw bits of a native fp value. Do not classify or
211 1.1.1.2 mrg normalize the data. */
212 1.1 mrg
213 1.1 mrg #define _FP_UNPACK_RAW_2(fs, X, val) \
214 1.1.1.2 mrg do \
215 1.1.1.2 mrg { \
216 1.1.1.2 mrg union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
217 1.1.1.2 mrg _FP_UNPACK_RAW_2_flo.flt = (val); \
218 1.1 mrg \
219 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
220 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
221 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
222 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
223 1.1.1.2 mrg } \
224 1.1.1.2 mrg while (0)
225 1.1 mrg
226 1.1 mrg #define _FP_UNPACK_RAW_2_P(fs, X, val) \
227 1.1.1.2 mrg do \
228 1.1.1.2 mrg { \
229 1.1.1.2 mrg union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
230 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \
231 1.1 mrg \
232 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
233 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
234 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
235 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
236 1.1.1.2 mrg } \
237 1.1.1.2 mrg while (0)
238 1.1.1.2 mrg
239 1.1.1.2 mrg
240 1.1.1.2 mrg /* Repack the raw bits of a native fp value. */
241 1.1.1.2 mrg
242 1.1.1.2 mrg #define _FP_PACK_RAW_2(fs, val, X) \
243 1.1.1.2 mrg do \
244 1.1.1.2 mrg { \
245 1.1.1.2 mrg union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
246 1.1.1.2 mrg \
247 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
248 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
249 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
250 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
251 1.1.1.2 mrg \
252 1.1.1.2 mrg (val) = _FP_PACK_RAW_2_flo.flt; \
253 1.1.1.2 mrg } \
254 1.1.1.2 mrg while (0)
255 1.1 mrg
256 1.1 mrg #define _FP_PACK_RAW_2_P(fs, val, X) \
257 1.1.1.2 mrg do \
258 1.1.1.2 mrg { \
259 1.1.1.2 mrg union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
260 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \
261 1.1 mrg \
262 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
263 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
264 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
265 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
266 1.1.1.2 mrg } \
267 1.1.1.2 mrg while (0)
268 1.1 mrg
269 1.1 mrg
270 1.1.1.2 mrg /* Multiplication algorithms: */
271 1.1 mrg
272 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
273 1.1 mrg
274 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
275 1.1.1.2 mrg do \
276 1.1.1.2 mrg { \
277 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
278 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
279 1.1.1.2 mrg \
280 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
281 1.1.1.2 mrg X##_f0, Y##_f0); \
282 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
283 1.1.1.2 mrg X##_f0, Y##_f1); \
284 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
285 1.1.1.2 mrg X##_f1, Y##_f0); \
286 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
287 1.1.1.2 mrg X##_f1, Y##_f1); \
288 1.1.1.2 mrg \
289 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
291 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f1, \
292 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f0, \
293 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
294 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \
295 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
297 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f1, \
298 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f0, \
299 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
300 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \
301 1.1.1.2 mrg } \
302 1.1.1.2 mrg while (0)
303 1.1.1.2 mrg
304 1.1 mrg #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
305 1.1.1.2 mrg do \
306 1.1.1.2 mrg { \
307 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
308 1.1.1.2 mrg \
309 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
310 1.1.1.2 mrg X, Y, doit); \
311 1.1.1.2 mrg \
312 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
313 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
314 1.1.1.2 mrg at either 2B or 2B-1. */ \
315 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
316 1.1.1.2 mrg 2*(wfracbits)); \
317 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
318 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
319 1.1.1.2 mrg } \
320 1.1.1.2 mrg while (0)
321 1.1 mrg
322 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
323 1.1 mrg Do only 3 multiplications instead of four. This one is for machines
324 1.1 mrg where multiplication is much more expensive than subtraction. */
325 1.1 mrg
326 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
327 1.1.1.2 mrg do \
328 1.1.1.2 mrg { \
329 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
330 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
331 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
332 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
333 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
334 1.1.1.2 mrg \
335 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
336 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
337 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
338 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
339 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
340 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
341 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
342 1.1.1.2 mrg X##_f0, Y##_f0); \
343 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
344 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
345 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
346 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
347 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
348 1.1.1.2 mrg \
349 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
350 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
351 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
352 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
353 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
354 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \
355 1.1.1.2 mrg (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
356 1.1.1.2 mrg & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
357 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_d, \
358 1.1.1.2 mrg 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
359 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
360 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
361 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
362 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
363 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \
365 1.1.1.2 mrg 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
366 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 0)); \
367 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
368 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \
369 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
370 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
371 1.1.1.2 mrg __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
372 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
373 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
374 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
375 1.1.1.2 mrg } \
376 1.1.1.2 mrg while (0)
377 1.1.1.2 mrg
378 1.1 mrg #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
379 1.1.1.2 mrg do \
380 1.1.1.2 mrg { \
381 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
382 1.1.1.2 mrg \
383 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
384 1.1.1.2 mrg _FP_MUL_MEAT_2_wide_3mul_z, \
385 1.1.1.2 mrg X, Y, doit); \
386 1.1.1.2 mrg \
387 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
388 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
389 1.1.1.2 mrg at either 2B or 2B-1. */ \
390 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
391 1.1.1.2 mrg (wfracbits)-1, 2*(wfracbits)); \
392 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
393 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
394 1.1.1.2 mrg } \
395 1.1.1.2 mrg while (0)
396 1.1.1.2 mrg
397 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
398 1.1.1.2 mrg do \
399 1.1.1.2 mrg { \
400 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
401 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
402 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
403 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
404 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
405 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
406 1.1.1.2 mrg \
407 1.1.1.2 mrg mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
408 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y, 2); \
409 1.1.1.2 mrg } \
410 1.1.1.2 mrg while (0)
411 1.1 mrg
412 1.1 mrg #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
413 1.1.1.2 mrg do \
414 1.1.1.2 mrg { \
415 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
416 1.1.1.2 mrg \
417 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
418 1.1.1.2 mrg \
419 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \
420 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \
421 1.1.1.2 mrg at either 2B or 2B-1. */ \
422 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
423 1.1.1.2 mrg 2*(wfracbits)); \
424 1.1.1.2 mrg R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
425 1.1.1.2 mrg R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
426 1.1.1.2 mrg } \
427 1.1.1.2 mrg while (0)
428 1.1 mrg
429 1.1 mrg /* Do at most 120x120=240 bits multiplication using double floating
430 1.1 mrg point multiplication. This is useful if floating point
431 1.1 mrg multiplication has much bigger throughput than integer multiply.
432 1.1 mrg It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
433 1.1.1.2 mrg between 106 and 120 only.
434 1.1 mrg Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
435 1.1 mrg SETFETZ is a macro which will disable all FPU exceptions and set rounding
436 1.1 mrg towards zero, RESETFE should optionally reset it back. */
437 1.1 mrg
438 1.1.1.2 mrg #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
439 1.1.1.2 mrg do \
440 1.1.1.2 mrg { \
441 1.1.1.2 mrg static const double _const[] = \
442 1.1.1.2 mrg { \
443 1.1.1.2 mrg /* 2^-24 */ 5.9604644775390625e-08, \
444 1.1.1.2 mrg /* 2^-48 */ 3.5527136788005009e-15, \
445 1.1.1.2 mrg /* 2^-72 */ 2.1175823681357508e-22, \
446 1.1.1.2 mrg /* 2^-96 */ 1.2621774483536189e-29, \
447 1.1.1.2 mrg /* 2^28 */ 2.68435456e+08, \
448 1.1.1.2 mrg /* 2^4 */ 1.600000e+01, \
449 1.1.1.2 mrg /* 2^-20 */ 9.5367431640625e-07, \
450 1.1.1.2 mrg /* 2^-44 */ 5.6843418860808015e-14, \
451 1.1.1.2 mrg /* 2^-68 */ 3.3881317890172014e-21, \
452 1.1.1.2 mrg /* 2^-92 */ 2.0194839173657902e-28, \
453 1.1.1.2 mrg /* 2^-116 */ 1.2037062152420224e-35 \
454 1.1.1.2 mrg }; \
455 1.1.1.2 mrg double _a240, _b240, _c240, _d240, _e240, _f240, \
456 1.1.1.2 mrg _g240, _h240, _i240, _j240, _k240; \
457 1.1.1.2 mrg union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
458 1.1.1.2 mrg _p240, _q240, _r240, _s240; \
459 1.1.1.2 mrg UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
460 1.1.1.2 mrg \
461 1.1.1.3 mrg _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
462 1.1.1.3 mrg "wfracbits out of range"); \
463 1.1.1.2 mrg \
464 1.1.1.2 mrg setfetz; \
465 1.1.1.2 mrg \
466 1.1.1.2 mrg _e240 = (double) (long) (X##_f0 & 0xffffff); \
467 1.1.1.2 mrg _j240 = (double) (long) (Y##_f0 & 0xffffff); \
468 1.1.1.2 mrg _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
469 1.1.1.2 mrg _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
470 1.1.1.2 mrg _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
471 1.1.1.2 mrg _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
472 1.1.1.2 mrg _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
473 1.1.1.2 mrg _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
474 1.1.1.2 mrg _a240 = (double) (long) (X##_f1 >> 32); \
475 1.1.1.2 mrg _f240 = (double) (long) (Y##_f1 >> 32); \
476 1.1.1.2 mrg _e240 *= _const[3]; \
477 1.1.1.2 mrg _j240 *= _const[3]; \
478 1.1.1.2 mrg _d240 *= _const[2]; \
479 1.1.1.2 mrg _i240 *= _const[2]; \
480 1.1.1.2 mrg _c240 *= _const[1]; \
481 1.1.1.2 mrg _h240 *= _const[1]; \
482 1.1.1.2 mrg _b240 *= _const[0]; \
483 1.1.1.2 mrg _g240 *= _const[0]; \
484 1.1.1.2 mrg _s240.d = _e240*_j240; \
485 1.1.1.2 mrg _r240.d = _d240*_j240 + _e240*_i240; \
486 1.1.1.2 mrg _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
487 1.1.1.2 mrg _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
488 1.1.1.2 mrg _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
489 1.1.1.2 mrg _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
490 1.1.1.2 mrg _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
491 1.1.1.2 mrg _l240.d = _a240*_g240 + _b240*_f240; \
492 1.1.1.2 mrg _k240 = _a240*_f240; \
493 1.1.1.2 mrg _r240.d += _s240.d; \
494 1.1.1.2 mrg _q240.d += _r240.d; \
495 1.1.1.2 mrg _p240.d += _q240.d; \
496 1.1.1.2 mrg _o240.d += _p240.d; \
497 1.1.1.2 mrg _n240.d += _o240.d; \
498 1.1.1.2 mrg _m240.d += _n240.d; \
499 1.1.1.2 mrg _l240.d += _m240.d; \
500 1.1.1.2 mrg _k240 += _l240.d; \
501 1.1.1.2 mrg _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
502 1.1.1.2 mrg _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
503 1.1.1.2 mrg _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
504 1.1.1.2 mrg _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
505 1.1.1.2 mrg _o240.d += _const[7]; \
506 1.1.1.2 mrg _n240.d += _const[6]; \
507 1.1.1.2 mrg _m240.d += _const[5]; \
508 1.1.1.2 mrg _l240.d += _const[4]; \
509 1.1.1.2 mrg if (_s240.d != 0.0) \
510 1.1.1.2 mrg _y240 = 1; \
511 1.1.1.2 mrg if (_r240.d != 0.0) \
512 1.1.1.2 mrg _y240 = 1; \
513 1.1.1.2 mrg if (_q240.d != 0.0) \
514 1.1.1.2 mrg _y240 = 1; \
515 1.1.1.2 mrg if (_p240.d != 0.0) \
516 1.1.1.2 mrg _y240 = 1; \
517 1.1.1.2 mrg _t240 = (DItype) _k240; \
518 1.1.1.2 mrg _u240 = _l240.i; \
519 1.1.1.2 mrg _v240 = _m240.i; \
520 1.1.1.2 mrg _w240 = _n240.i; \
521 1.1.1.2 mrg _x240 = _o240.i; \
522 1.1.1.2 mrg R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
523 1.1.1.2 mrg | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
524 1.1.1.2 mrg R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
525 1.1.1.2 mrg | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
526 1.1.1.2 mrg | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
527 1.1.1.2 mrg | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
528 1.1.1.2 mrg | _y240); \
529 1.1.1.2 mrg resetfe; \
530 1.1.1.2 mrg } \
531 1.1.1.2 mrg while (0)
532 1.1.1.2 mrg
533 1.1.1.2 mrg /* Division algorithms: */
534 1.1 mrg
535 1.1 mrg #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
536 1.1.1.2 mrg do \
537 1.1.1.2 mrg { \
538 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
539 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
540 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
541 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
542 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
543 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
544 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
545 1.1.1.2 mrg if (_FP_FRAC_GE_2 (X, Y)) \
546 1.1.1.2 mrg { \
547 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
548 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 \
549 1.1.1.2 mrg = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
550 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 \
551 1.1.1.2 mrg = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
552 1.1.1.2 mrg } \
553 1.1.1.2 mrg else \
554 1.1.1.2 mrg { \
555 1.1.1.2 mrg R##_e--; \
556 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
557 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
558 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
559 1.1.1.2 mrg } \
560 1.1.1.2 mrg \
561 1.1.1.2 mrg /* Normalize, i.e. make the most significant bit of the \
562 1.1.1.2 mrg denominator set. */ \
563 1.1.1.2 mrg _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
564 1.1.1.2 mrg \
565 1.1.1.2 mrg udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
566 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
567 1.1.1.2 mrg Y##_f1); \
568 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
569 1.1.1.2 mrg R##_f1, Y##_f0); \
570 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
571 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
572 1.1.1.2 mrg { \
573 1.1.1.2 mrg R##_f1--; \
574 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
575 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
576 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
577 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
578 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
579 1.1.1.2 mrg { \
580 1.1.1.2 mrg R##_f1--; \
581 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
582 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
583 1.1.1.2 mrg } \
584 1.1.1.2 mrg } \
585 1.1.1.2 mrg _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
586 1.1.1.2 mrg \
587 1.1.1.2 mrg if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
588 1.1.1.2 mrg { \
589 1.1.1.2 mrg /* This is a special case, not an optimization \
590 1.1.1.2 mrg (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
591 1.1.1.2 mrg As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
592 1.1.1.2 mrg R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
593 1.1.1.2 mrg know what kind of bits it is (sticky, guard, round), \
594 1.1.1.2 mrg we don't care. We also don't care what the reminder is, \
595 1.1.1.2 mrg because the guard bit will be set anyway. -jj */ \
596 1.1.1.2 mrg R##_f0 = -1; \
597 1.1.1.2 mrg } \
598 1.1.1.2 mrg else \
599 1.1.1.2 mrg { \
600 1.1.1.2 mrg udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
601 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f1, \
602 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
603 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
604 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
605 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = 0; \
606 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
607 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
608 1.1.1.2 mrg { \
609 1.1.1.2 mrg R##_f0--; \
610 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
611 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
612 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
613 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
614 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \
615 1.1.1.2 mrg { \
616 1.1.1.2 mrg R##_f0--; \
617 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
618 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \
619 1.1.1.2 mrg } \
620 1.1.1.2 mrg } \
621 1.1.1.2 mrg if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
622 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m)) \
623 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \
624 1.1.1.2 mrg } \
625 1.1.1.2 mrg } \
626 1.1.1.2 mrg while (0)
627 1.1.1.2 mrg
628 1.1.1.2 mrg
629 1.1.1.2 mrg /* Square root algorithms:
630 1.1.1.2 mrg We have just one right now, maybe Newton approximation
631 1.1.1.2 mrg should be added for those machines where division is fast. */
632 1.1.1.2 mrg
633 1.1.1.2 mrg #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
634 1.1.1.2 mrg do \
635 1.1.1.2 mrg { \
636 1.1.1.2 mrg while (q) \
637 1.1.1.2 mrg { \
638 1.1.1.2 mrg T##_f1 = S##_f1 + (q); \
639 1.1.1.2 mrg if (T##_f1 <= X##_f1) \
640 1.1.1.2 mrg { \
641 1.1.1.2 mrg S##_f1 = T##_f1 + (q); \
642 1.1.1.2 mrg X##_f1 -= T##_f1; \
643 1.1.1.2 mrg R##_f1 += (q); \
644 1.1.1.2 mrg } \
645 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \
646 1.1.1.2 mrg (q) >>= 1; \
647 1.1.1.2 mrg } \
648 1.1.1.2 mrg (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
649 1.1.1.2 mrg while ((q) != _FP_WORK_ROUND) \
650 1.1.1.2 mrg { \
651 1.1.1.2 mrg T##_f0 = S##_f0 + (q); \
652 1.1.1.2 mrg T##_f1 = S##_f1; \
653 1.1.1.2 mrg if (T##_f1 < X##_f1 \
654 1.1.1.2 mrg || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
655 1.1.1.2 mrg { \
656 1.1.1.2 mrg S##_f0 = T##_f0 + (q); \
657 1.1.1.2 mrg S##_f1 += (T##_f0 > S##_f0); \
658 1.1.1.2 mrg _FP_FRAC_DEC_2 (X, T); \
659 1.1.1.2 mrg R##_f0 += (q); \
660 1.1.1.2 mrg } \
661 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \
662 1.1.1.2 mrg (q) >>= 1; \
663 1.1.1.2 mrg } \
664 1.1.1.2 mrg if (X##_f0 | X##_f1) \
665 1.1.1.2 mrg { \
666 1.1.1.2 mrg if (S##_f1 < X##_f1 \
667 1.1.1.2 mrg || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
668 1.1.1.2 mrg R##_f0 |= _FP_WORK_ROUND; \
669 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \
670 1.1.1.2 mrg } \
671 1.1.1.2 mrg } \
672 1.1.1.2 mrg while (0)
673 1.1.1.2 mrg
674 1.1.1.2 mrg
675 1.1.1.2 mrg /* Assembly/disassembly for converting to/from integral types.
676 1.1.1.2 mrg No shifting or overflow handled here. */
677 1.1 mrg
678 1.1 mrg #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
679 1.1.1.2 mrg (void) (((rsize) <= _FP_W_TYPE_SIZE) \
680 1.1.1.2 mrg ? ({ (r) = X##_f0; }) \
681 1.1.1.2 mrg : ({ \
682 1.1.1.2 mrg (r) = X##_f1; \
683 1.1.1.2 mrg (r) <<= _FP_W_TYPE_SIZE; \
684 1.1.1.2 mrg (r) += X##_f0; \
685 1.1.1.2 mrg }))
686 1.1.1.2 mrg
687 1.1.1.2 mrg #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
688 1.1.1.2 mrg do \
689 1.1.1.2 mrg { \
690 1.1.1.2 mrg X##_f0 = (r); \
691 1.1.1.2 mrg X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
692 1.1.1.2 mrg ? 0 \
693 1.1.1.2 mrg : (r) >> _FP_W_TYPE_SIZE); \
694 1.1.1.2 mrg } \
695 1.1.1.2 mrg while (0)
696 1.1.1.2 mrg
697 1.1.1.2 mrg /* Convert FP values between word sizes. */
698 1.1 mrg
699 1.1 mrg #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
700 1.1 mrg
701 1.1 mrg #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
702 1.1 mrg
703 1.1.1.2 mrg #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
704 1.1.1.3 mrg
705 1.1.1.3 mrg #endif /* !SOFT_FP_OP_2_H */
706