softfloat.c revision 1.2 1 1.2 christos /* $NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $ */
2 1.1 bjh21
3 1.1 bjh21 /*
4 1.1 bjh21 * This version hacked for use with gcc -msoft-float by bjh21.
5 1.1 bjh21 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 1.1 bjh21 * itself).
7 1.1 bjh21 */
8 1.1 bjh21
9 1.1 bjh21 /*
10 1.1 bjh21 * Things you may want to define:
11 1.1 bjh21 *
12 1.1 bjh21 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 1.1 bjh21 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
14 1.1 bjh21 * properly renamed.
15 1.1 bjh21 */
16 1.1 bjh21
17 1.1 bjh21 /*
18 1.1 bjh21 * This differs from the standard bits32/softfloat.c in that float64
19 1.1 bjh21 * is defined to be a 64-bit integer rather than a structure. The
20 1.1 bjh21 * structure is float64s, with translation between the two going via
21 1.1 bjh21 * float64u.
22 1.1 bjh21 */
23 1.1 bjh21
24 1.1 bjh21 /*
25 1.1 bjh21 ===============================================================================
26 1.1 bjh21
27 1.1 bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28 1.1 bjh21 Arithmetic Package, Release 2a.
29 1.1 bjh21
30 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the
31 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
32 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the
33 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version
34 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector
35 1.1 bjh21 processor in collaboration with the University of California at Berkeley,
36 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
37 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38 1.1 bjh21 arithmetic/SoftFloat.html'.
39 1.1 bjh21
40 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
41 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
43 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45 1.1 bjh21
46 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as
47 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they
48 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of
49 1.1 bjh21 this code that are retained.
50 1.1 bjh21
51 1.1 bjh21 ===============================================================================
52 1.1 bjh21 */
53 1.1 bjh21
54 1.1 bjh21 #include <sys/cdefs.h>
55 1.1 bjh21 #if defined(LIBC_SCCS) && !defined(lint)
56 1.2 christos __RCSID("$NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $");
57 1.1 bjh21 #endif /* LIBC_SCCS and not lint */
58 1.1 bjh21
59 1.1 bjh21 #ifdef SOFTFLOAT_FOR_GCC
60 1.1 bjh21 #include "softfloat-for-gcc.h"
61 1.1 bjh21 #endif
62 1.1 bjh21
63 1.1 bjh21 #include "milieu.h"
64 1.1 bjh21 #include "softfloat.h"
65 1.1 bjh21
66 1.1 bjh21 /*
67 1.1 bjh21 * Conversions between floats as stored in memory and floats as
68 1.1 bjh21 * SoftFloat uses them
69 1.1 bjh21 */
70 1.1 bjh21 #ifndef FLOAT64_DEMANGLE
71 1.1 bjh21 #define FLOAT64_DEMANGLE(a) (a)
72 1.1 bjh21 #endif
73 1.1 bjh21 #ifndef FLOAT64_MANGLE
74 1.1 bjh21 #define FLOAT64_MANGLE(a) (a)
75 1.1 bjh21 #endif
76 1.1 bjh21
77 1.1 bjh21 /*
78 1.1 bjh21 -------------------------------------------------------------------------------
79 1.1 bjh21 Floating-point rounding mode and exception flags.
80 1.1 bjh21 -------------------------------------------------------------------------------
81 1.1 bjh21 */
82 1.1 bjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
83 1.1 bjh21 fp_except float_exception_flags = 0;
84 1.1 bjh21
85 1.1 bjh21 /*
86 1.1 bjh21 -------------------------------------------------------------------------------
87 1.1 bjh21 Primitive arithmetic functions, including multi-word arithmetic, and
88 1.1 bjh21 division and square root approximations. (Can be specialized to target if
89 1.1 bjh21 desired.)
90 1.1 bjh21 -------------------------------------------------------------------------------
91 1.1 bjh21 */
92 1.1 bjh21 #include "softfloat-macros"
93 1.1 bjh21
94 1.1 bjh21 /*
95 1.1 bjh21 -------------------------------------------------------------------------------
96 1.1 bjh21 Functions and definitions to determine: (1) whether tininess for underflow
97 1.1 bjh21 is detected before or after rounding by default, (2) what (if anything)
98 1.1 bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
99 1.1 bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
100 1.1 bjh21 are propagated from function inputs to output. These details are target-
101 1.1 bjh21 specific.
102 1.1 bjh21 -------------------------------------------------------------------------------
103 1.1 bjh21 */
104 1.1 bjh21 #include "softfloat-specialize"
105 1.1 bjh21
106 1.1 bjh21 /*
107 1.1 bjh21 -------------------------------------------------------------------------------
108 1.1 bjh21 Returns the fraction bits of the single-precision floating-point value `a'.
109 1.1 bjh21 -------------------------------------------------------------------------------
110 1.1 bjh21 */
111 1.1 bjh21 INLINE bits32 extractFloat32Frac( float32 a )
112 1.1 bjh21 {
113 1.1 bjh21
114 1.1 bjh21 return a & 0x007FFFFF;
115 1.1 bjh21
116 1.1 bjh21 }
117 1.1 bjh21
118 1.1 bjh21 /*
119 1.1 bjh21 -------------------------------------------------------------------------------
120 1.1 bjh21 Returns the exponent bits of the single-precision floating-point value `a'.
121 1.1 bjh21 -------------------------------------------------------------------------------
122 1.1 bjh21 */
123 1.1 bjh21 INLINE int16 extractFloat32Exp( float32 a )
124 1.1 bjh21 {
125 1.1 bjh21
126 1.1 bjh21 return ( a>>23 ) & 0xFF;
127 1.1 bjh21
128 1.1 bjh21 }
129 1.1 bjh21
130 1.1 bjh21 /*
131 1.1 bjh21 -------------------------------------------------------------------------------
132 1.1 bjh21 Returns the sign bit of the single-precision floating-point value `a'.
133 1.1 bjh21 -------------------------------------------------------------------------------
134 1.1 bjh21 */
135 1.1 bjh21 INLINE flag extractFloat32Sign( float32 a )
136 1.1 bjh21 {
137 1.1 bjh21
138 1.1 bjh21 return a>>31;
139 1.1 bjh21
140 1.1 bjh21 }
141 1.1 bjh21
142 1.1 bjh21 /*
143 1.1 bjh21 -------------------------------------------------------------------------------
144 1.1 bjh21 Normalizes the subnormal single-precision floating-point value represented
145 1.1 bjh21 by the denormalized significand `aSig'. The normalized exponent and
146 1.1 bjh21 significand are stored at the locations pointed to by `zExpPtr' and
147 1.1 bjh21 `zSigPtr', respectively.
148 1.1 bjh21 -------------------------------------------------------------------------------
149 1.1 bjh21 */
150 1.1 bjh21 static void
151 1.1 bjh21 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
152 1.1 bjh21 {
153 1.1 bjh21 int8 shiftCount;
154 1.1 bjh21
155 1.1 bjh21 shiftCount = countLeadingZeros32( aSig ) - 8;
156 1.1 bjh21 *zSigPtr = aSig<<shiftCount;
157 1.1 bjh21 *zExpPtr = 1 - shiftCount;
158 1.1 bjh21
159 1.1 bjh21 }
160 1.1 bjh21
161 1.1 bjh21 /*
162 1.1 bjh21 -------------------------------------------------------------------------------
163 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
164 1.1 bjh21 single-precision floating-point value, returning the result. After being
165 1.1 bjh21 shifted into the proper positions, the three fields are simply added
166 1.1 bjh21 together to form the result. This means that any integer portion of `zSig'
167 1.1 bjh21 will be added into the exponent. Since a properly normalized significand
168 1.1 bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
169 1.1 bjh21 than the desired result exponent whenever `zSig' is a complete, normalized
170 1.1 bjh21 significand.
171 1.1 bjh21 -------------------------------------------------------------------------------
172 1.1 bjh21 */
173 1.1 bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
174 1.1 bjh21 {
175 1.1 bjh21
176 1.1 bjh21 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
177 1.1 bjh21
178 1.1 bjh21 }
179 1.1 bjh21
180 1.1 bjh21 /*
181 1.1 bjh21 -------------------------------------------------------------------------------
182 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
183 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating-
184 1.1 bjh21 point value corresponding to the abstract input. Ordinarily, the abstract
185 1.1 bjh21 value is simply rounded and packed into the single-precision format, with
186 1.1 bjh21 the inexact exception raised if the abstract input cannot be represented
187 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and
188 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is
189 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to
190 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
191 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal single-
192 1.1 bjh21 precision floating-point number.
193 1.1 bjh21 The input significand `zSig' has its binary point between bits 30
194 1.1 bjh21 and 29, which is 7 bits to the left of the usual location. This shifted
195 1.1 bjh21 significand must be normalized or smaller. If `zSig' is not normalized,
196 1.1 bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
197 1.1 bjh21 and it must not require rounding. In the usual case that `zSig' is
198 1.1 bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
199 1.1 bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
200 1.1 bjh21 Binary Floating-Point Arithmetic.
201 1.1 bjh21 -------------------------------------------------------------------------------
202 1.1 bjh21 */
203 1.1 bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
204 1.1 bjh21 {
205 1.1 bjh21 int8 roundingMode;
206 1.1 bjh21 flag roundNearestEven;
207 1.1 bjh21 int8 roundIncrement, roundBits;
208 1.1 bjh21 flag isTiny;
209 1.1 bjh21
210 1.1 bjh21 roundingMode = float_rounding_mode;
211 1.1 bjh21 roundNearestEven = roundingMode == float_round_nearest_even;
212 1.1 bjh21 roundIncrement = 0x40;
213 1.1 bjh21 if ( ! roundNearestEven ) {
214 1.1 bjh21 if ( roundingMode == float_round_to_zero ) {
215 1.1 bjh21 roundIncrement = 0;
216 1.1 bjh21 }
217 1.1 bjh21 else {
218 1.1 bjh21 roundIncrement = 0x7F;
219 1.1 bjh21 if ( zSign ) {
220 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0;
221 1.1 bjh21 }
222 1.1 bjh21 else {
223 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0;
224 1.1 bjh21 }
225 1.1 bjh21 }
226 1.1 bjh21 }
227 1.1 bjh21 roundBits = zSig & 0x7F;
228 1.1 bjh21 if ( 0xFD <= (bits16) zExp ) {
229 1.1 bjh21 if ( ( 0xFD < zExp )
230 1.1 bjh21 || ( ( zExp == 0xFD )
231 1.1 bjh21 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
232 1.1 bjh21 ) {
233 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact );
234 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
235 1.1 bjh21 }
236 1.1 bjh21 if ( zExp < 0 ) {
237 1.1 bjh21 isTiny =
238 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding )
239 1.1 bjh21 || ( zExp < -1 )
240 1.2 christos || ( zSig + roundIncrement < (uint32)0x80000000 );
241 1.1 bjh21 shift32RightJamming( zSig, - zExp, &zSig );
242 1.1 bjh21 zExp = 0;
243 1.1 bjh21 roundBits = zSig & 0x7F;
244 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
245 1.1 bjh21 }
246 1.1 bjh21 }
247 1.1 bjh21 if ( roundBits ) float_exception_flags |= float_flag_inexact;
248 1.1 bjh21 zSig = ( zSig + roundIncrement )>>7;
249 1.1 bjh21 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
250 1.1 bjh21 if ( zSig == 0 ) zExp = 0;
251 1.1 bjh21 return packFloat32( zSign, zExp, zSig );
252 1.1 bjh21
253 1.1 bjh21 }
254 1.1 bjh21
255 1.1 bjh21 /*
256 1.1 bjh21 -------------------------------------------------------------------------------
257 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
258 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating-
259 1.1 bjh21 point value corresponding to the abstract input. This routine is just like
260 1.1 bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
261 1.1 bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
262 1.1 bjh21 floating-point exponent.
263 1.1 bjh21 -------------------------------------------------------------------------------
264 1.1 bjh21 */
265 1.1 bjh21 static float32
266 1.1 bjh21 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
267 1.1 bjh21 {
268 1.1 bjh21 int8 shiftCount;
269 1.1 bjh21
270 1.1 bjh21 shiftCount = countLeadingZeros32( zSig ) - 1;
271 1.1 bjh21 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
272 1.1 bjh21
273 1.1 bjh21 }
274 1.1 bjh21
275 1.1 bjh21 /*
276 1.1 bjh21 -------------------------------------------------------------------------------
277 1.1 bjh21 Returns the least-significant 32 fraction bits of the double-precision
278 1.1 bjh21 floating-point value `a'.
279 1.1 bjh21 -------------------------------------------------------------------------------
280 1.1 bjh21 */
281 1.1 bjh21 INLINE bits32 extractFloat64Frac1( float64 a )
282 1.1 bjh21 {
283 1.1 bjh21
284 1.2 christos return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
285 1.1 bjh21
286 1.1 bjh21 }
287 1.1 bjh21
288 1.1 bjh21 /*
289 1.1 bjh21 -------------------------------------------------------------------------------
290 1.1 bjh21 Returns the most-significant 20 fraction bits of the double-precision
291 1.1 bjh21 floating-point value `a'.
292 1.1 bjh21 -------------------------------------------------------------------------------
293 1.1 bjh21 */
294 1.1 bjh21 INLINE bits32 extractFloat64Frac0( float64 a )
295 1.1 bjh21 {
296 1.1 bjh21
297 1.2 christos return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
298 1.1 bjh21
299 1.1 bjh21 }
300 1.1 bjh21
301 1.1 bjh21 /*
302 1.1 bjh21 -------------------------------------------------------------------------------
303 1.1 bjh21 Returns the exponent bits of the double-precision floating-point value `a'.
304 1.1 bjh21 -------------------------------------------------------------------------------
305 1.1 bjh21 */
306 1.1 bjh21 INLINE int16 extractFloat64Exp( float64 a )
307 1.1 bjh21 {
308 1.1 bjh21
309 1.2 christos return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
310 1.1 bjh21
311 1.1 bjh21 }
312 1.1 bjh21
313 1.1 bjh21 /*
314 1.1 bjh21 -------------------------------------------------------------------------------
315 1.1 bjh21 Returns the sign bit of the double-precision floating-point value `a'.
316 1.1 bjh21 -------------------------------------------------------------------------------
317 1.1 bjh21 */
318 1.1 bjh21 INLINE flag extractFloat64Sign( float64 a )
319 1.1 bjh21 {
320 1.1 bjh21
321 1.2 christos return (flag)(FLOAT64_DEMANGLE(a) >> 63);
322 1.1 bjh21
323 1.1 bjh21 }
324 1.1 bjh21
325 1.1 bjh21 /*
326 1.1 bjh21 -------------------------------------------------------------------------------
327 1.1 bjh21 Normalizes the subnormal double-precision floating-point value represented
328 1.1 bjh21 by the denormalized significand formed by the concatenation of `aSig0' and
329 1.1 bjh21 `aSig1'. The normalized exponent is stored at the location pointed to by
330 1.1 bjh21 `zExpPtr'. The most significant 21 bits of the normalized significand are
331 1.1 bjh21 stored at the location pointed to by `zSig0Ptr', and the least significant
332 1.1 bjh21 32 bits of the normalized significand are stored at the location pointed to
333 1.1 bjh21 by `zSig1Ptr'.
334 1.1 bjh21 -------------------------------------------------------------------------------
335 1.1 bjh21 */
336 1.1 bjh21 static void
337 1.1 bjh21 normalizeFloat64Subnormal(
338 1.1 bjh21 bits32 aSig0,
339 1.1 bjh21 bits32 aSig1,
340 1.1 bjh21 int16 *zExpPtr,
341 1.1 bjh21 bits32 *zSig0Ptr,
342 1.1 bjh21 bits32 *zSig1Ptr
343 1.1 bjh21 )
344 1.1 bjh21 {
345 1.1 bjh21 int8 shiftCount;
346 1.1 bjh21
347 1.1 bjh21 if ( aSig0 == 0 ) {
348 1.1 bjh21 shiftCount = countLeadingZeros32( aSig1 ) - 11;
349 1.1 bjh21 if ( shiftCount < 0 ) {
350 1.1 bjh21 *zSig0Ptr = aSig1>>( - shiftCount );
351 1.1 bjh21 *zSig1Ptr = aSig1<<( shiftCount & 31 );
352 1.1 bjh21 }
353 1.1 bjh21 else {
354 1.1 bjh21 *zSig0Ptr = aSig1<<shiftCount;
355 1.1 bjh21 *zSig1Ptr = 0;
356 1.1 bjh21 }
357 1.1 bjh21 *zExpPtr = - shiftCount - 31;
358 1.1 bjh21 }
359 1.1 bjh21 else {
360 1.1 bjh21 shiftCount = countLeadingZeros32( aSig0 ) - 11;
361 1.1 bjh21 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
362 1.1 bjh21 *zExpPtr = 1 - shiftCount;
363 1.1 bjh21 }
364 1.1 bjh21
365 1.1 bjh21 }
366 1.1 bjh21
367 1.1 bjh21 /*
368 1.1 bjh21 -------------------------------------------------------------------------------
369 1.1 bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by
370 1.1 bjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
371 1.1 bjh21 point value, returning the result. After being shifted into the proper
372 1.1 bjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
373 1.1 bjh21 together to form the most significant 32 bits of the result. This means
374 1.1 bjh21 that any integer portion of `zSig0' will be added into the exponent. Since
375 1.1 bjh21 a properly normalized significand will have an integer portion equal to 1,
376 1.1 bjh21 the `zExp' input should be 1 less than the desired result exponent whenever
377 1.1 bjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
378 1.1 bjh21 -------------------------------------------------------------------------------
379 1.1 bjh21 */
380 1.1 bjh21 INLINE float64
381 1.1 bjh21 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
382 1.1 bjh21 {
383 1.1 bjh21
384 1.1 bjh21 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
385 1.1 bjh21 ( ( (bits64) zExp )<<52 ) +
386 1.1 bjh21 ( ( (bits64) zSig0 )<<32 ) + zSig1 );
387 1.1 bjh21
388 1.1 bjh21
389 1.1 bjh21 }
390 1.1 bjh21
391 1.1 bjh21 /*
392 1.1 bjh21 -------------------------------------------------------------------------------
393 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
394 1.1 bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
395 1.1 bjh21 and `zSig2', and returns the proper double-precision floating-point value
396 1.1 bjh21 corresponding to the abstract input. Ordinarily, the abstract value is
397 1.1 bjh21 simply rounded and packed into the double-precision format, with the inexact
398 1.1 bjh21 exception raised if the abstract input cannot be represented exactly.
399 1.1 bjh21 However, if the abstract value is too large, the overflow and inexact
400 1.1 bjh21 exceptions are raised and an infinity or maximal finite value is returned.
401 1.1 bjh21 If the abstract value is too small, the input value is rounded to a
402 1.1 bjh21 subnormal number, and the underflow and inexact exceptions are raised if the
403 1.1 bjh21 abstract input cannot be represented exactly as a subnormal double-precision
404 1.1 bjh21 floating-point number.
405 1.1 bjh21 The input significand must be normalized or smaller. If the input
406 1.1 bjh21 significand is not normalized, `zExp' must be 0; in that case, the result
407 1.1 bjh21 returned is a subnormal number, and it must not require rounding. In the
408 1.1 bjh21 usual case that the input significand is normalized, `zExp' must be 1 less
409 1.1 bjh21 than the ``true'' floating-point exponent. The handling of underflow and
410 1.1 bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
411 1.1 bjh21 -------------------------------------------------------------------------------
412 1.1 bjh21 */
413 1.1 bjh21 static float64
414 1.1 bjh21 roundAndPackFloat64(
415 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
416 1.1 bjh21 {
417 1.1 bjh21 int8 roundingMode;
418 1.1 bjh21 flag roundNearestEven, increment, isTiny;
419 1.1 bjh21
420 1.1 bjh21 roundingMode = float_rounding_mode;
421 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even );
422 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 );
423 1.1 bjh21 if ( ! roundNearestEven ) {
424 1.1 bjh21 if ( roundingMode == float_round_to_zero ) {
425 1.1 bjh21 increment = 0;
426 1.1 bjh21 }
427 1.1 bjh21 else {
428 1.1 bjh21 if ( zSign ) {
429 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2;
430 1.1 bjh21 }
431 1.1 bjh21 else {
432 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2;
433 1.1 bjh21 }
434 1.1 bjh21 }
435 1.1 bjh21 }
436 1.1 bjh21 if ( 0x7FD <= (bits16) zExp ) {
437 1.1 bjh21 if ( ( 0x7FD < zExp )
438 1.1 bjh21 || ( ( zExp == 0x7FD )
439 1.1 bjh21 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
440 1.1 bjh21 && increment
441 1.1 bjh21 )
442 1.1 bjh21 ) {
443 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact );
444 1.1 bjh21 if ( ( roundingMode == float_round_to_zero )
445 1.1 bjh21 || ( zSign && ( roundingMode == float_round_up ) )
446 1.1 bjh21 || ( ! zSign && ( roundingMode == float_round_down ) )
447 1.1 bjh21 ) {
448 1.1 bjh21 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
449 1.1 bjh21 }
450 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
451 1.1 bjh21 }
452 1.1 bjh21 if ( zExp < 0 ) {
453 1.1 bjh21 isTiny =
454 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding )
455 1.1 bjh21 || ( zExp < -1 )
456 1.1 bjh21 || ! increment
457 1.1 bjh21 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
458 1.1 bjh21 shift64ExtraRightJamming(
459 1.1 bjh21 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
460 1.1 bjh21 zExp = 0;
461 1.1 bjh21 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
462 1.1 bjh21 if ( roundNearestEven ) {
463 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 );
464 1.1 bjh21 }
465 1.1 bjh21 else {
466 1.1 bjh21 if ( zSign ) {
467 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2;
468 1.1 bjh21 }
469 1.1 bjh21 else {
470 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2;
471 1.1 bjh21 }
472 1.1 bjh21 }
473 1.1 bjh21 }
474 1.1 bjh21 }
475 1.1 bjh21 if ( zSig2 ) float_exception_flags |= float_flag_inexact;
476 1.1 bjh21 if ( increment ) {
477 1.1 bjh21 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
478 1.1 bjh21 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
479 1.1 bjh21 }
480 1.1 bjh21 else {
481 1.1 bjh21 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
482 1.1 bjh21 }
483 1.1 bjh21 return packFloat64( zSign, zExp, zSig0, zSig1 );
484 1.1 bjh21
485 1.1 bjh21 }
486 1.1 bjh21
487 1.1 bjh21 /*
488 1.1 bjh21 -------------------------------------------------------------------------------
489 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
490 1.1 bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
491 1.1 bjh21 returns the proper double-precision floating-point value corresponding
492 1.1 bjh21 to the abstract input. This routine is just like `roundAndPackFloat64'
493 1.1 bjh21 except that the input significand has fewer bits and does not have to be
494 1.1 bjh21 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
495 1.1 bjh21 point exponent.
496 1.1 bjh21 -------------------------------------------------------------------------------
497 1.1 bjh21 */
498 1.1 bjh21 static float64
499 1.1 bjh21 normalizeRoundAndPackFloat64(
500 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
501 1.1 bjh21 {
502 1.1 bjh21 int8 shiftCount;
503 1.1 bjh21 bits32 zSig2;
504 1.1 bjh21
505 1.1 bjh21 if ( zSig0 == 0 ) {
506 1.1 bjh21 zSig0 = zSig1;
507 1.1 bjh21 zSig1 = 0;
508 1.1 bjh21 zExp -= 32;
509 1.1 bjh21 }
510 1.1 bjh21 shiftCount = countLeadingZeros32( zSig0 ) - 11;
511 1.1 bjh21 if ( 0 <= shiftCount ) {
512 1.1 bjh21 zSig2 = 0;
513 1.1 bjh21 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
514 1.1 bjh21 }
515 1.1 bjh21 else {
516 1.1 bjh21 shift64ExtraRightJamming(
517 1.1 bjh21 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
518 1.1 bjh21 }
519 1.1 bjh21 zExp -= shiftCount;
520 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
521 1.1 bjh21
522 1.1 bjh21 }
523 1.1 bjh21
524 1.1 bjh21 /*
525 1.1 bjh21 -------------------------------------------------------------------------------
526 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
527 1.1 bjh21 the single-precision floating-point format. The conversion is performed
528 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
529 1.1 bjh21 -------------------------------------------------------------------------------
530 1.1 bjh21 */
531 1.1 bjh21 float32 int32_to_float32( int32 a )
532 1.1 bjh21 {
533 1.1 bjh21 flag zSign;
534 1.1 bjh21
535 1.1 bjh21 if ( a == 0 ) return 0;
536 1.1 bjh21 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
537 1.1 bjh21 zSign = ( a < 0 );
538 1.2 christos return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
539 1.1 bjh21
540 1.1 bjh21 }
541 1.1 bjh21
542 1.1 bjh21 /*
543 1.1 bjh21 -------------------------------------------------------------------------------
544 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
545 1.1 bjh21 the double-precision floating-point format. The conversion is performed
546 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
547 1.1 bjh21 -------------------------------------------------------------------------------
548 1.1 bjh21 */
549 1.1 bjh21 float64 int32_to_float64( int32 a )
550 1.1 bjh21 {
551 1.1 bjh21 flag zSign;
552 1.1 bjh21 bits32 absA;
553 1.1 bjh21 int8 shiftCount;
554 1.1 bjh21 bits32 zSig0, zSig1;
555 1.1 bjh21
556 1.1 bjh21 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
557 1.1 bjh21 zSign = ( a < 0 );
558 1.1 bjh21 absA = zSign ? - a : a;
559 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) - 11;
560 1.1 bjh21 if ( 0 <= shiftCount ) {
561 1.1 bjh21 zSig0 = absA<<shiftCount;
562 1.1 bjh21 zSig1 = 0;
563 1.1 bjh21 }
564 1.1 bjh21 else {
565 1.1 bjh21 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
566 1.1 bjh21 }
567 1.1 bjh21 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
568 1.1 bjh21
569 1.1 bjh21 }
570 1.1 bjh21
571 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
572 1.1 bjh21 /*
573 1.1 bjh21 -------------------------------------------------------------------------------
574 1.1 bjh21 Returns the result of converting the single-precision floating-point value
575 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
576 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
577 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded
578 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest
579 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the
580 1.1 bjh21 largest integer with the same sign as `a' is returned.
581 1.1 bjh21 -------------------------------------------------------------------------------
582 1.1 bjh21 */
583 1.1 bjh21 int32 float32_to_int32( float32 a )
584 1.1 bjh21 {
585 1.1 bjh21 flag aSign;
586 1.1 bjh21 int16 aExp, shiftCount;
587 1.1 bjh21 bits32 aSig, aSigExtra;
588 1.1 bjh21 int32 z;
589 1.1 bjh21 int8 roundingMode;
590 1.1 bjh21
591 1.1 bjh21 aSig = extractFloat32Frac( a );
592 1.1 bjh21 aExp = extractFloat32Exp( a );
593 1.1 bjh21 aSign = extractFloat32Sign( a );
594 1.1 bjh21 shiftCount = aExp - 0x96;
595 1.1 bjh21 if ( 0 <= shiftCount ) {
596 1.1 bjh21 if ( 0x9E <= aExp ) {
597 1.1 bjh21 if ( a != 0xCF000000 ) {
598 1.1 bjh21 float_raise( float_flag_invalid );
599 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
600 1.1 bjh21 return 0x7FFFFFFF;
601 1.1 bjh21 }
602 1.1 bjh21 }
603 1.1 bjh21 return (sbits32) 0x80000000;
604 1.1 bjh21 }
605 1.1 bjh21 z = ( aSig | 0x00800000 )<<shiftCount;
606 1.1 bjh21 if ( aSign ) z = - z;
607 1.1 bjh21 }
608 1.1 bjh21 else {
609 1.1 bjh21 if ( aExp < 0x7E ) {
610 1.1 bjh21 aSigExtra = aExp | aSig;
611 1.1 bjh21 z = 0;
612 1.1 bjh21 }
613 1.1 bjh21 else {
614 1.1 bjh21 aSig |= 0x00800000;
615 1.1 bjh21 aSigExtra = aSig<<( shiftCount & 31 );
616 1.1 bjh21 z = aSig>>( - shiftCount );
617 1.1 bjh21 }
618 1.1 bjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
619 1.1 bjh21 roundingMode = float_rounding_mode;
620 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
621 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) {
622 1.1 bjh21 ++z;
623 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
624 1.1 bjh21 }
625 1.1 bjh21 if ( aSign ) z = - z;
626 1.1 bjh21 }
627 1.1 bjh21 else {
628 1.1 bjh21 aSigExtra = ( aSigExtra != 0 );
629 1.1 bjh21 if ( aSign ) {
630 1.1 bjh21 z += ( roundingMode == float_round_down ) & aSigExtra;
631 1.1 bjh21 z = - z;
632 1.1 bjh21 }
633 1.1 bjh21 else {
634 1.1 bjh21 z += ( roundingMode == float_round_up ) & aSigExtra;
635 1.1 bjh21 }
636 1.1 bjh21 }
637 1.1 bjh21 }
638 1.1 bjh21 return z;
639 1.1 bjh21
640 1.1 bjh21 }
641 1.1 bjh21 #endif
642 1.1 bjh21
643 1.1 bjh21 /*
644 1.1 bjh21 -------------------------------------------------------------------------------
645 1.1 bjh21 Returns the result of converting the single-precision floating-point value
646 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
647 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
648 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero.
649 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if
650 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is
651 1.1 bjh21 returned.
652 1.1 bjh21 -------------------------------------------------------------------------------
653 1.1 bjh21 */
654 1.1 bjh21 int32 float32_to_int32_round_to_zero( float32 a )
655 1.1 bjh21 {
656 1.1 bjh21 flag aSign;
657 1.1 bjh21 int16 aExp, shiftCount;
658 1.1 bjh21 bits32 aSig;
659 1.1 bjh21 int32 z;
660 1.1 bjh21
661 1.1 bjh21 aSig = extractFloat32Frac( a );
662 1.1 bjh21 aExp = extractFloat32Exp( a );
663 1.1 bjh21 aSign = extractFloat32Sign( a );
664 1.1 bjh21 shiftCount = aExp - 0x9E;
665 1.1 bjh21 if ( 0 <= shiftCount ) {
666 1.1 bjh21 if ( a != 0xCF000000 ) {
667 1.1 bjh21 float_raise( float_flag_invalid );
668 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
669 1.1 bjh21 }
670 1.1 bjh21 return (sbits32) 0x80000000;
671 1.1 bjh21 }
672 1.1 bjh21 else if ( aExp <= 0x7E ) {
673 1.1 bjh21 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
674 1.1 bjh21 return 0;
675 1.1 bjh21 }
676 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
677 1.1 bjh21 z = aSig>>( - shiftCount );
678 1.1 bjh21 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
679 1.1 bjh21 float_exception_flags |= float_flag_inexact;
680 1.1 bjh21 }
681 1.1 bjh21 if ( aSign ) z = - z;
682 1.1 bjh21 return z;
683 1.1 bjh21
684 1.1 bjh21 }
685 1.1 bjh21
686 1.1 bjh21 /*
687 1.1 bjh21 -------------------------------------------------------------------------------
688 1.1 bjh21 Returns the result of converting the single-precision floating-point value
689 1.1 bjh21 `a' to the double-precision floating-point format. The conversion is
690 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
691 1.1 bjh21 Arithmetic.
692 1.1 bjh21 -------------------------------------------------------------------------------
693 1.1 bjh21 */
694 1.1 bjh21 float64 float32_to_float64( float32 a )
695 1.1 bjh21 {
696 1.1 bjh21 flag aSign;
697 1.1 bjh21 int16 aExp;
698 1.1 bjh21 bits32 aSig, zSig0, zSig1;
699 1.1 bjh21
700 1.1 bjh21 aSig = extractFloat32Frac( a );
701 1.1 bjh21 aExp = extractFloat32Exp( a );
702 1.1 bjh21 aSign = extractFloat32Sign( a );
703 1.1 bjh21 if ( aExp == 0xFF ) {
704 1.1 bjh21 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
705 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0, 0 );
706 1.1 bjh21 }
707 1.1 bjh21 if ( aExp == 0 ) {
708 1.1 bjh21 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
709 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
710 1.1 bjh21 --aExp;
711 1.1 bjh21 }
712 1.1 bjh21 shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
713 1.1 bjh21 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
714 1.1 bjh21
715 1.1 bjh21 }
716 1.1 bjh21
717 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
718 1.1 bjh21 /*
719 1.1 bjh21 -------------------------------------------------------------------------------
720 1.1 bjh21 Rounds the single-precision floating-point value `a' to an integer,
721 1.1 bjh21 and returns the result as a single-precision floating-point value. The
722 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary
723 1.1 bjh21 Floating-Point Arithmetic.
724 1.1 bjh21 -------------------------------------------------------------------------------
725 1.1 bjh21 */
726 1.1 bjh21 float32 float32_round_to_int( float32 a )
727 1.1 bjh21 {
728 1.1 bjh21 flag aSign;
729 1.1 bjh21 int16 aExp;
730 1.1 bjh21 bits32 lastBitMask, roundBitsMask;
731 1.1 bjh21 int8 roundingMode;
732 1.1 bjh21 float32 z;
733 1.1 bjh21
734 1.1 bjh21 aExp = extractFloat32Exp( a );
735 1.1 bjh21 if ( 0x96 <= aExp ) {
736 1.1 bjh21 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
737 1.1 bjh21 return propagateFloat32NaN( a, a );
738 1.1 bjh21 }
739 1.1 bjh21 return a;
740 1.1 bjh21 }
741 1.1 bjh21 if ( aExp <= 0x7E ) {
742 1.1 bjh21 if ( (bits32) ( a<<1 ) == 0 ) return a;
743 1.1 bjh21 float_exception_flags |= float_flag_inexact;
744 1.1 bjh21 aSign = extractFloat32Sign( a );
745 1.1 bjh21 switch ( float_rounding_mode ) {
746 1.1 bjh21 case float_round_nearest_even:
747 1.1 bjh21 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
748 1.1 bjh21 return packFloat32( aSign, 0x7F, 0 );
749 1.1 bjh21 }
750 1.1 bjh21 break;
751 1.1 bjh21 case float_round_to_zero:
752 1.1 bjh21 break;
753 1.1 bjh21 case float_round_down:
754 1.1 bjh21 return aSign ? 0xBF800000 : 0;
755 1.1 bjh21 case float_round_up:
756 1.1 bjh21 return aSign ? 0x80000000 : 0x3F800000;
757 1.1 bjh21 }
758 1.1 bjh21 return packFloat32( aSign, 0, 0 );
759 1.1 bjh21 }
760 1.1 bjh21 lastBitMask = 1;
761 1.1 bjh21 lastBitMask <<= 0x96 - aExp;
762 1.1 bjh21 roundBitsMask = lastBitMask - 1;
763 1.1 bjh21 z = a;
764 1.1 bjh21 roundingMode = float_rounding_mode;
765 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
766 1.1 bjh21 z += lastBitMask>>1;
767 1.1 bjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
768 1.1 bjh21 }
769 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
770 1.1 bjh21 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
771 1.1 bjh21 z += roundBitsMask;
772 1.1 bjh21 }
773 1.1 bjh21 }
774 1.1 bjh21 z &= ~ roundBitsMask;
775 1.1 bjh21 if ( z != a ) float_exception_flags |= float_flag_inexact;
776 1.1 bjh21 return z;
777 1.1 bjh21
778 1.1 bjh21 }
779 1.1 bjh21 #endif
780 1.1 bjh21
781 1.1 bjh21 /*
782 1.1 bjh21 -------------------------------------------------------------------------------
783 1.1 bjh21 Returns the result of adding the absolute values of the single-precision
784 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
785 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN.
786 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
787 1.1 bjh21 Floating-Point Arithmetic.
788 1.1 bjh21 -------------------------------------------------------------------------------
789 1.1 bjh21 */
790 1.1 bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
791 1.1 bjh21 {
792 1.1 bjh21 int16 aExp, bExp, zExp;
793 1.1 bjh21 bits32 aSig, bSig, zSig;
794 1.1 bjh21 int16 expDiff;
795 1.1 bjh21
796 1.1 bjh21 aSig = extractFloat32Frac( a );
797 1.1 bjh21 aExp = extractFloat32Exp( a );
798 1.1 bjh21 bSig = extractFloat32Frac( b );
799 1.1 bjh21 bExp = extractFloat32Exp( b );
800 1.1 bjh21 expDiff = aExp - bExp;
801 1.1 bjh21 aSig <<= 6;
802 1.1 bjh21 bSig <<= 6;
803 1.1 bjh21 if ( 0 < expDiff ) {
804 1.1 bjh21 if ( aExp == 0xFF ) {
805 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
806 1.1 bjh21 return a;
807 1.1 bjh21 }
808 1.1 bjh21 if ( bExp == 0 ) {
809 1.1 bjh21 --expDiff;
810 1.1 bjh21 }
811 1.1 bjh21 else {
812 1.1 bjh21 bSig |= 0x20000000;
813 1.1 bjh21 }
814 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig );
815 1.1 bjh21 zExp = aExp;
816 1.1 bjh21 }
817 1.1 bjh21 else if ( expDiff < 0 ) {
818 1.1 bjh21 if ( bExp == 0xFF ) {
819 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
820 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
821 1.1 bjh21 }
822 1.1 bjh21 if ( aExp == 0 ) {
823 1.1 bjh21 ++expDiff;
824 1.1 bjh21 }
825 1.1 bjh21 else {
826 1.1 bjh21 aSig |= 0x20000000;
827 1.1 bjh21 }
828 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig );
829 1.1 bjh21 zExp = bExp;
830 1.1 bjh21 }
831 1.1 bjh21 else {
832 1.1 bjh21 if ( aExp == 0xFF ) {
833 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
834 1.1 bjh21 return a;
835 1.1 bjh21 }
836 1.1 bjh21 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
837 1.1 bjh21 zSig = 0x40000000 + aSig + bSig;
838 1.1 bjh21 zExp = aExp;
839 1.1 bjh21 goto roundAndPack;
840 1.1 bjh21 }
841 1.1 bjh21 aSig |= 0x20000000;
842 1.1 bjh21 zSig = ( aSig + bSig )<<1;
843 1.1 bjh21 --zExp;
844 1.1 bjh21 if ( (sbits32) zSig < 0 ) {
845 1.1 bjh21 zSig = aSig + bSig;
846 1.1 bjh21 ++zExp;
847 1.1 bjh21 }
848 1.1 bjh21 roundAndPack:
849 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig );
850 1.1 bjh21
851 1.1 bjh21 }
852 1.1 bjh21
853 1.1 bjh21 /*
854 1.1 bjh21 -------------------------------------------------------------------------------
855 1.1 bjh21 Returns the result of subtracting the absolute values of the single-
856 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the
857 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the
858 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE
859 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
860 1.1 bjh21 -------------------------------------------------------------------------------
861 1.1 bjh21 */
862 1.1 bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
863 1.1 bjh21 {
864 1.1 bjh21 int16 aExp, bExp, zExp;
865 1.1 bjh21 bits32 aSig, bSig, zSig;
866 1.1 bjh21 int16 expDiff;
867 1.1 bjh21
868 1.1 bjh21 aSig = extractFloat32Frac( a );
869 1.1 bjh21 aExp = extractFloat32Exp( a );
870 1.1 bjh21 bSig = extractFloat32Frac( b );
871 1.1 bjh21 bExp = extractFloat32Exp( b );
872 1.1 bjh21 expDiff = aExp - bExp;
873 1.1 bjh21 aSig <<= 7;
874 1.1 bjh21 bSig <<= 7;
875 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger;
876 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger;
877 1.1 bjh21 if ( aExp == 0xFF ) {
878 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
879 1.1 bjh21 float_raise( float_flag_invalid );
880 1.1 bjh21 return float32_default_nan;
881 1.1 bjh21 }
882 1.1 bjh21 if ( aExp == 0 ) {
883 1.1 bjh21 aExp = 1;
884 1.1 bjh21 bExp = 1;
885 1.1 bjh21 }
886 1.1 bjh21 if ( bSig < aSig ) goto aBigger;
887 1.1 bjh21 if ( aSig < bSig ) goto bBigger;
888 1.1 bjh21 return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
889 1.1 bjh21 bExpBigger:
890 1.1 bjh21 if ( bExp == 0xFF ) {
891 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
892 1.1 bjh21 return packFloat32( zSign ^ 1, 0xFF, 0 );
893 1.1 bjh21 }
894 1.1 bjh21 if ( aExp == 0 ) {
895 1.1 bjh21 ++expDiff;
896 1.1 bjh21 }
897 1.1 bjh21 else {
898 1.1 bjh21 aSig |= 0x40000000;
899 1.1 bjh21 }
900 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig );
901 1.1 bjh21 bSig |= 0x40000000;
902 1.1 bjh21 bBigger:
903 1.1 bjh21 zSig = bSig - aSig;
904 1.1 bjh21 zExp = bExp;
905 1.1 bjh21 zSign ^= 1;
906 1.1 bjh21 goto normalizeRoundAndPack;
907 1.1 bjh21 aExpBigger:
908 1.1 bjh21 if ( aExp == 0xFF ) {
909 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
910 1.1 bjh21 return a;
911 1.1 bjh21 }
912 1.1 bjh21 if ( bExp == 0 ) {
913 1.1 bjh21 --expDiff;
914 1.1 bjh21 }
915 1.1 bjh21 else {
916 1.1 bjh21 bSig |= 0x40000000;
917 1.1 bjh21 }
918 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig );
919 1.1 bjh21 aSig |= 0x40000000;
920 1.1 bjh21 aBigger:
921 1.1 bjh21 zSig = aSig - bSig;
922 1.1 bjh21 zExp = aExp;
923 1.1 bjh21 normalizeRoundAndPack:
924 1.1 bjh21 --zExp;
925 1.1 bjh21 return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
926 1.1 bjh21
927 1.1 bjh21 }
928 1.1 bjh21
929 1.1 bjh21 /*
930 1.1 bjh21 -------------------------------------------------------------------------------
931 1.1 bjh21 Returns the result of adding the single-precision floating-point values `a'
932 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for
933 1.1 bjh21 Binary Floating-Point Arithmetic.
934 1.1 bjh21 -------------------------------------------------------------------------------
935 1.1 bjh21 */
936 1.1 bjh21 float32 float32_add( float32 a, float32 b )
937 1.1 bjh21 {
938 1.1 bjh21 flag aSign, bSign;
939 1.1 bjh21
940 1.1 bjh21 aSign = extractFloat32Sign( a );
941 1.1 bjh21 bSign = extractFloat32Sign( b );
942 1.1 bjh21 if ( aSign == bSign ) {
943 1.1 bjh21 return addFloat32Sigs( a, b, aSign );
944 1.1 bjh21 }
945 1.1 bjh21 else {
946 1.1 bjh21 return subFloat32Sigs( a, b, aSign );
947 1.1 bjh21 }
948 1.1 bjh21
949 1.1 bjh21 }
950 1.1 bjh21
951 1.1 bjh21 /*
952 1.1 bjh21 -------------------------------------------------------------------------------
953 1.1 bjh21 Returns the result of subtracting the single-precision floating-point values
954 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
955 1.1 bjh21 for Binary Floating-Point Arithmetic.
956 1.1 bjh21 -------------------------------------------------------------------------------
957 1.1 bjh21 */
958 1.1 bjh21 float32 float32_sub( float32 a, float32 b )
959 1.1 bjh21 {
960 1.1 bjh21 flag aSign, bSign;
961 1.1 bjh21
962 1.1 bjh21 aSign = extractFloat32Sign( a );
963 1.1 bjh21 bSign = extractFloat32Sign( b );
964 1.1 bjh21 if ( aSign == bSign ) {
965 1.1 bjh21 return subFloat32Sigs( a, b, aSign );
966 1.1 bjh21 }
967 1.1 bjh21 else {
968 1.1 bjh21 return addFloat32Sigs( a, b, aSign );
969 1.1 bjh21 }
970 1.1 bjh21
971 1.1 bjh21 }
972 1.1 bjh21
973 1.1 bjh21 /*
974 1.1 bjh21 -------------------------------------------------------------------------------
975 1.1 bjh21 Returns the result of multiplying the single-precision floating-point values
976 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
977 1.1 bjh21 for Binary Floating-Point Arithmetic.
978 1.1 bjh21 -------------------------------------------------------------------------------
979 1.1 bjh21 */
980 1.1 bjh21 float32 float32_mul( float32 a, float32 b )
981 1.1 bjh21 {
982 1.1 bjh21 flag aSign, bSign, zSign;
983 1.1 bjh21 int16 aExp, bExp, zExp;
984 1.1 bjh21 bits32 aSig, bSig, zSig0, zSig1;
985 1.1 bjh21
986 1.1 bjh21 aSig = extractFloat32Frac( a );
987 1.1 bjh21 aExp = extractFloat32Exp( a );
988 1.1 bjh21 aSign = extractFloat32Sign( a );
989 1.1 bjh21 bSig = extractFloat32Frac( b );
990 1.1 bjh21 bExp = extractFloat32Exp( b );
991 1.1 bjh21 bSign = extractFloat32Sign( b );
992 1.1 bjh21 zSign = aSign ^ bSign;
993 1.1 bjh21 if ( aExp == 0xFF ) {
994 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
995 1.1 bjh21 return propagateFloat32NaN( a, b );
996 1.1 bjh21 }
997 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) {
998 1.1 bjh21 float_raise( float_flag_invalid );
999 1.1 bjh21 return float32_default_nan;
1000 1.1 bjh21 }
1001 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1002 1.1 bjh21 }
1003 1.1 bjh21 if ( bExp == 0xFF ) {
1004 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1005 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) {
1006 1.1 bjh21 float_raise( float_flag_invalid );
1007 1.1 bjh21 return float32_default_nan;
1008 1.1 bjh21 }
1009 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1010 1.1 bjh21 }
1011 1.1 bjh21 if ( aExp == 0 ) {
1012 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1013 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1014 1.1 bjh21 }
1015 1.1 bjh21 if ( bExp == 0 ) {
1016 1.1 bjh21 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1017 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1018 1.1 bjh21 }
1019 1.1 bjh21 zExp = aExp + bExp - 0x7F;
1020 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7;
1021 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1022 1.1 bjh21 mul32To64( aSig, bSig, &zSig0, &zSig1 );
1023 1.1 bjh21 zSig0 |= ( zSig1 != 0 );
1024 1.1 bjh21 if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1025 1.1 bjh21 zSig0 <<= 1;
1026 1.1 bjh21 --zExp;
1027 1.1 bjh21 }
1028 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig0 );
1029 1.1 bjh21
1030 1.1 bjh21 }
1031 1.1 bjh21
1032 1.1 bjh21 /*
1033 1.1 bjh21 -------------------------------------------------------------------------------
1034 1.1 bjh21 Returns the result of dividing the single-precision floating-point value `a'
1035 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the
1036 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1037 1.1 bjh21 -------------------------------------------------------------------------------
1038 1.1 bjh21 */
1039 1.1 bjh21 float32 float32_div( float32 a, float32 b )
1040 1.1 bjh21 {
1041 1.1 bjh21 flag aSign, bSign, zSign;
1042 1.1 bjh21 int16 aExp, bExp, zExp;
1043 1.1 bjh21 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1044 1.1 bjh21
1045 1.1 bjh21 aSig = extractFloat32Frac( a );
1046 1.1 bjh21 aExp = extractFloat32Exp( a );
1047 1.1 bjh21 aSign = extractFloat32Sign( a );
1048 1.1 bjh21 bSig = extractFloat32Frac( b );
1049 1.1 bjh21 bExp = extractFloat32Exp( b );
1050 1.1 bjh21 bSign = extractFloat32Sign( b );
1051 1.1 bjh21 zSign = aSign ^ bSign;
1052 1.1 bjh21 if ( aExp == 0xFF ) {
1053 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
1054 1.1 bjh21 if ( bExp == 0xFF ) {
1055 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1056 1.1 bjh21 float_raise( float_flag_invalid );
1057 1.1 bjh21 return float32_default_nan;
1058 1.1 bjh21 }
1059 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1060 1.1 bjh21 }
1061 1.1 bjh21 if ( bExp == 0xFF ) {
1062 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1063 1.1 bjh21 return packFloat32( zSign, 0, 0 );
1064 1.1 bjh21 }
1065 1.1 bjh21 if ( bExp == 0 ) {
1066 1.1 bjh21 if ( bSig == 0 ) {
1067 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) {
1068 1.1 bjh21 float_raise( float_flag_invalid );
1069 1.1 bjh21 return float32_default_nan;
1070 1.1 bjh21 }
1071 1.1 bjh21 float_raise( float_flag_divbyzero );
1072 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1073 1.1 bjh21 }
1074 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1075 1.1 bjh21 }
1076 1.1 bjh21 if ( aExp == 0 ) {
1077 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1078 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1079 1.1 bjh21 }
1080 1.1 bjh21 zExp = aExp - bExp + 0x7D;
1081 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7;
1082 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1083 1.1 bjh21 if ( bSig <= ( aSig + aSig ) ) {
1084 1.1 bjh21 aSig >>= 1;
1085 1.1 bjh21 ++zExp;
1086 1.1 bjh21 }
1087 1.1 bjh21 zSig = estimateDiv64To32( aSig, 0, bSig );
1088 1.1 bjh21 if ( ( zSig & 0x3F ) <= 2 ) {
1089 1.1 bjh21 mul32To64( bSig, zSig, &term0, &term1 );
1090 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1091 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1092 1.1 bjh21 --zSig;
1093 1.1 bjh21 add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1094 1.1 bjh21 }
1095 1.1 bjh21 zSig |= ( rem1 != 0 );
1096 1.1 bjh21 }
1097 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig );
1098 1.1 bjh21
1099 1.1 bjh21 }
1100 1.1 bjh21
1101 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1102 1.1 bjh21 /*
1103 1.1 bjh21 -------------------------------------------------------------------------------
1104 1.1 bjh21 Returns the remainder of the single-precision floating-point value `a'
1105 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed
1106 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1107 1.1 bjh21 -------------------------------------------------------------------------------
1108 1.1 bjh21 */
1109 1.1 bjh21 float32 float32_rem( float32 a, float32 b )
1110 1.1 bjh21 {
1111 1.1 bjh21 flag aSign, bSign, zSign;
1112 1.1 bjh21 int16 aExp, bExp, expDiff;
1113 1.1 bjh21 bits32 aSig, bSig, q, allZero, alternateASig;
1114 1.1 bjh21 sbits32 sigMean;
1115 1.1 bjh21
1116 1.1 bjh21 aSig = extractFloat32Frac( a );
1117 1.1 bjh21 aExp = extractFloat32Exp( a );
1118 1.1 bjh21 aSign = extractFloat32Sign( a );
1119 1.1 bjh21 bSig = extractFloat32Frac( b );
1120 1.1 bjh21 bExp = extractFloat32Exp( b );
1121 1.1 bjh21 bSign = extractFloat32Sign( b );
1122 1.1 bjh21 if ( aExp == 0xFF ) {
1123 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1124 1.1 bjh21 return propagateFloat32NaN( a, b );
1125 1.1 bjh21 }
1126 1.1 bjh21 float_raise( float_flag_invalid );
1127 1.1 bjh21 return float32_default_nan;
1128 1.1 bjh21 }
1129 1.1 bjh21 if ( bExp == 0xFF ) {
1130 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1131 1.1 bjh21 return a;
1132 1.1 bjh21 }
1133 1.1 bjh21 if ( bExp == 0 ) {
1134 1.1 bjh21 if ( bSig == 0 ) {
1135 1.1 bjh21 float_raise( float_flag_invalid );
1136 1.1 bjh21 return float32_default_nan;
1137 1.1 bjh21 }
1138 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1139 1.1 bjh21 }
1140 1.1 bjh21 if ( aExp == 0 ) {
1141 1.1 bjh21 if ( aSig == 0 ) return a;
1142 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1143 1.1 bjh21 }
1144 1.1 bjh21 expDiff = aExp - bExp;
1145 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
1146 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1147 1.1 bjh21 if ( expDiff < 0 ) {
1148 1.1 bjh21 if ( expDiff < -1 ) return a;
1149 1.1 bjh21 aSig >>= 1;
1150 1.1 bjh21 }
1151 1.1 bjh21 q = ( bSig <= aSig );
1152 1.1 bjh21 if ( q ) aSig -= bSig;
1153 1.1 bjh21 expDiff -= 32;
1154 1.1 bjh21 while ( 0 < expDiff ) {
1155 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig );
1156 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0;
1157 1.1 bjh21 aSig = - ( ( bSig>>2 ) * q );
1158 1.1 bjh21 expDiff -= 30;
1159 1.1 bjh21 }
1160 1.1 bjh21 expDiff += 32;
1161 1.1 bjh21 if ( 0 < expDiff ) {
1162 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig );
1163 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0;
1164 1.1 bjh21 q >>= 32 - expDiff;
1165 1.1 bjh21 bSig >>= 2;
1166 1.1 bjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1167 1.1 bjh21 }
1168 1.1 bjh21 else {
1169 1.1 bjh21 aSig >>= 2;
1170 1.1 bjh21 bSig >>= 2;
1171 1.1 bjh21 }
1172 1.1 bjh21 do {
1173 1.1 bjh21 alternateASig = aSig;
1174 1.1 bjh21 ++q;
1175 1.1 bjh21 aSig -= bSig;
1176 1.1 bjh21 } while ( 0 <= (sbits32) aSig );
1177 1.1 bjh21 sigMean = aSig + alternateASig;
1178 1.1 bjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1179 1.1 bjh21 aSig = alternateASig;
1180 1.1 bjh21 }
1181 1.1 bjh21 zSign = ( (sbits32) aSig < 0 );
1182 1.1 bjh21 if ( zSign ) aSig = - aSig;
1183 1.1 bjh21 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1184 1.1 bjh21
1185 1.1 bjh21 }
1186 1.1 bjh21 #endif
1187 1.1 bjh21
1188 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1189 1.1 bjh21 /*
1190 1.1 bjh21 -------------------------------------------------------------------------------
1191 1.1 bjh21 Returns the square root of the single-precision floating-point value `a'.
1192 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
1193 1.1 bjh21 Floating-Point Arithmetic.
1194 1.1 bjh21 -------------------------------------------------------------------------------
1195 1.1 bjh21 */
1196 1.1 bjh21 float32 float32_sqrt( float32 a )
1197 1.1 bjh21 {
1198 1.1 bjh21 flag aSign;
1199 1.1 bjh21 int16 aExp, zExp;
1200 1.1 bjh21 bits32 aSig, zSig, rem0, rem1, term0, term1;
1201 1.1 bjh21
1202 1.1 bjh21 aSig = extractFloat32Frac( a );
1203 1.1 bjh21 aExp = extractFloat32Exp( a );
1204 1.1 bjh21 aSign = extractFloat32Sign( a );
1205 1.1 bjh21 if ( aExp == 0xFF ) {
1206 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, 0 );
1207 1.1 bjh21 if ( ! aSign ) return a;
1208 1.1 bjh21 float_raise( float_flag_invalid );
1209 1.1 bjh21 return float32_default_nan;
1210 1.1 bjh21 }
1211 1.1 bjh21 if ( aSign ) {
1212 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) return a;
1213 1.1 bjh21 float_raise( float_flag_invalid );
1214 1.1 bjh21 return float32_default_nan;
1215 1.1 bjh21 }
1216 1.1 bjh21 if ( aExp == 0 ) {
1217 1.1 bjh21 if ( aSig == 0 ) return 0;
1218 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1219 1.1 bjh21 }
1220 1.1 bjh21 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1221 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
1222 1.1 bjh21 zSig = estimateSqrt32( aExp, aSig ) + 2;
1223 1.1 bjh21 if ( ( zSig & 0x7F ) <= 5 ) {
1224 1.1 bjh21 if ( zSig < 2 ) {
1225 1.1 bjh21 zSig = 0x7FFFFFFF;
1226 1.1 bjh21 goto roundAndPack;
1227 1.1 bjh21 }
1228 1.1 bjh21 else {
1229 1.1 bjh21 aSig >>= aExp & 1;
1230 1.1 bjh21 mul32To64( zSig, zSig, &term0, &term1 );
1231 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1232 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1233 1.1 bjh21 --zSig;
1234 1.1 bjh21 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1235 1.1 bjh21 term1 |= 1;
1236 1.1 bjh21 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1237 1.1 bjh21 }
1238 1.1 bjh21 zSig |= ( ( rem0 | rem1 ) != 0 );
1239 1.1 bjh21 }
1240 1.1 bjh21 }
1241 1.1 bjh21 shift32RightJamming( zSig, 1, &zSig );
1242 1.1 bjh21 roundAndPack:
1243 1.1 bjh21 return roundAndPackFloat32( 0, zExp, zSig );
1244 1.1 bjh21
1245 1.1 bjh21 }
1246 1.1 bjh21 #endif
1247 1.1 bjh21
1248 1.1 bjh21 /*
1249 1.1 bjh21 -------------------------------------------------------------------------------
1250 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1251 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
1252 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253 1.1 bjh21 -------------------------------------------------------------------------------
1254 1.1 bjh21 */
1255 1.1 bjh21 flag float32_eq( float32 a, float32 b )
1256 1.1 bjh21 {
1257 1.1 bjh21
1258 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1259 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1260 1.1 bjh21 ) {
1261 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1262 1.1 bjh21 float_raise( float_flag_invalid );
1263 1.1 bjh21 }
1264 1.1 bjh21 return 0;
1265 1.1 bjh21 }
1266 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1267 1.1 bjh21
1268 1.1 bjh21 }
1269 1.1 bjh21
1270 1.1 bjh21 /*
1271 1.1 bjh21 -------------------------------------------------------------------------------
1272 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1273 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison
1274 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1275 1.1 bjh21 Arithmetic.
1276 1.1 bjh21 -------------------------------------------------------------------------------
1277 1.1 bjh21 */
1278 1.1 bjh21 flag float32_le( float32 a, float32 b )
1279 1.1 bjh21 {
1280 1.1 bjh21 flag aSign, bSign;
1281 1.1 bjh21
1282 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1283 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1284 1.1 bjh21 ) {
1285 1.1 bjh21 float_raise( float_flag_invalid );
1286 1.1 bjh21 return 0;
1287 1.1 bjh21 }
1288 1.1 bjh21 aSign = extractFloat32Sign( a );
1289 1.1 bjh21 bSign = extractFloat32Sign( b );
1290 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1291 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
1292 1.1 bjh21
1293 1.1 bjh21 }
1294 1.1 bjh21
1295 1.1 bjh21 /*
1296 1.1 bjh21 -------------------------------------------------------------------------------
1297 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1298 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
1299 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1300 1.1 bjh21 -------------------------------------------------------------------------------
1301 1.1 bjh21 */
1302 1.1 bjh21 flag float32_lt( float32 a, float32 b )
1303 1.1 bjh21 {
1304 1.1 bjh21 flag aSign, bSign;
1305 1.1 bjh21
1306 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1307 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1308 1.1 bjh21 ) {
1309 1.1 bjh21 float_raise( float_flag_invalid );
1310 1.1 bjh21 return 0;
1311 1.1 bjh21 }
1312 1.1 bjh21 aSign = extractFloat32Sign( a );
1313 1.1 bjh21 bSign = extractFloat32Sign( b );
1314 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1315 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
1316 1.1 bjh21
1317 1.1 bjh21 }
1318 1.1 bjh21
1319 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1320 1.1 bjh21 /*
1321 1.1 bjh21 -------------------------------------------------------------------------------
1322 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1323 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is
1324 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed
1325 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1326 1.1 bjh21 -------------------------------------------------------------------------------
1327 1.1 bjh21 */
1328 1.1 bjh21 flag float32_eq_signaling( float32 a, float32 b )
1329 1.1 bjh21 {
1330 1.1 bjh21
1331 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1332 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1333 1.1 bjh21 ) {
1334 1.1 bjh21 float_raise( float_flag_invalid );
1335 1.1 bjh21 return 0;
1336 1.1 bjh21 }
1337 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1338 1.1 bjh21
1339 1.1 bjh21 }
1340 1.1 bjh21
1341 1.1 bjh21 /*
1342 1.1 bjh21 -------------------------------------------------------------------------------
1343 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than or
1344 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1345 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the
1346 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1347 1.1 bjh21 -------------------------------------------------------------------------------
1348 1.1 bjh21 */
1349 1.1 bjh21 flag float32_le_quiet( float32 a, float32 b )
1350 1.1 bjh21 {
1351 1.1 bjh21 flag aSign, bSign;
1352 1.1 bjh21 int16 aExp, bExp;
1353 1.1 bjh21
1354 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1355 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1356 1.1 bjh21 ) {
1357 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1358 1.1 bjh21 float_raise( float_flag_invalid );
1359 1.1 bjh21 }
1360 1.1 bjh21 return 0;
1361 1.1 bjh21 }
1362 1.1 bjh21 aSign = extractFloat32Sign( a );
1363 1.1 bjh21 bSign = extractFloat32Sign( b );
1364 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1365 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
1366 1.1 bjh21
1367 1.1 bjh21 }
1368 1.1 bjh21
1369 1.1 bjh21 /*
1370 1.1 bjh21 -------------------------------------------------------------------------------
1371 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1372 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1373 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE
1374 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
1375 1.1 bjh21 -------------------------------------------------------------------------------
1376 1.1 bjh21 */
1377 1.1 bjh21 flag float32_lt_quiet( float32 a, float32 b )
1378 1.1 bjh21 {
1379 1.1 bjh21 flag aSign, bSign;
1380 1.1 bjh21
1381 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1382 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1383 1.1 bjh21 ) {
1384 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1385 1.1 bjh21 float_raise( float_flag_invalid );
1386 1.1 bjh21 }
1387 1.1 bjh21 return 0;
1388 1.1 bjh21 }
1389 1.1 bjh21 aSign = extractFloat32Sign( a );
1390 1.1 bjh21 bSign = extractFloat32Sign( b );
1391 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1392 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
1393 1.1 bjh21
1394 1.1 bjh21 }
1395 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1396 1.1 bjh21
1397 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1398 1.1 bjh21 /*
1399 1.1 bjh21 -------------------------------------------------------------------------------
1400 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1401 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
1402 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1403 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded
1404 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest
1405 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the
1406 1.1 bjh21 largest integer with the same sign as `a' is returned.
1407 1.1 bjh21 -------------------------------------------------------------------------------
1408 1.1 bjh21 */
1409 1.1 bjh21 int32 float64_to_int32( float64 a )
1410 1.1 bjh21 {
1411 1.1 bjh21 flag aSign;
1412 1.1 bjh21 int16 aExp, shiftCount;
1413 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra;
1414 1.1 bjh21 int32 z;
1415 1.1 bjh21 int8 roundingMode;
1416 1.1 bjh21
1417 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1418 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1419 1.1 bjh21 aExp = extractFloat64Exp( a );
1420 1.1 bjh21 aSign = extractFloat64Sign( a );
1421 1.1 bjh21 shiftCount = aExp - 0x413;
1422 1.1 bjh21 if ( 0 <= shiftCount ) {
1423 1.1 bjh21 if ( 0x41E < aExp ) {
1424 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1425 1.1 bjh21 goto invalid;
1426 1.1 bjh21 }
1427 1.1 bjh21 shortShift64Left(
1428 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1429 1.1 bjh21 if ( 0x80000000 < absZ ) goto invalid;
1430 1.1 bjh21 }
1431 1.1 bjh21 else {
1432 1.1 bjh21 aSig1 = ( aSig1 != 0 );
1433 1.1 bjh21 if ( aExp < 0x3FE ) {
1434 1.1 bjh21 aSigExtra = aExp | aSig0 | aSig1;
1435 1.1 bjh21 absZ = 0;
1436 1.1 bjh21 }
1437 1.1 bjh21 else {
1438 1.1 bjh21 aSig0 |= 0x00100000;
1439 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1440 1.1 bjh21 absZ = aSig0>>( - shiftCount );
1441 1.1 bjh21 }
1442 1.1 bjh21 }
1443 1.1 bjh21 roundingMode = float_rounding_mode;
1444 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1445 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) {
1446 1.1 bjh21 ++absZ;
1447 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1448 1.1 bjh21 }
1449 1.1 bjh21 z = aSign ? - absZ : absZ;
1450 1.1 bjh21 }
1451 1.1 bjh21 else {
1452 1.1 bjh21 aSigExtra = ( aSigExtra != 0 );
1453 1.1 bjh21 if ( aSign ) {
1454 1.1 bjh21 z = - ( absZ
1455 1.1 bjh21 + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1456 1.1 bjh21 }
1457 1.1 bjh21 else {
1458 1.1 bjh21 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1459 1.1 bjh21 }
1460 1.1 bjh21 }
1461 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) {
1462 1.1 bjh21 invalid:
1463 1.1 bjh21 float_raise( float_flag_invalid );
1464 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1465 1.1 bjh21 }
1466 1.1 bjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1467 1.1 bjh21 return z;
1468 1.1 bjh21
1469 1.1 bjh21 }
1470 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1471 1.1 bjh21
1472 1.1 bjh21 /*
1473 1.1 bjh21 -------------------------------------------------------------------------------
1474 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1475 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
1476 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1477 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero.
1478 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1479 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is
1480 1.1 bjh21 returned.
1481 1.1 bjh21 -------------------------------------------------------------------------------
1482 1.1 bjh21 */
1483 1.1 bjh21 int32 float64_to_int32_round_to_zero( float64 a )
1484 1.1 bjh21 {
1485 1.1 bjh21 flag aSign;
1486 1.1 bjh21 int16 aExp, shiftCount;
1487 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra;
1488 1.1 bjh21 int32 z;
1489 1.1 bjh21
1490 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1491 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1492 1.1 bjh21 aExp = extractFloat64Exp( a );
1493 1.1 bjh21 aSign = extractFloat64Sign( a );
1494 1.1 bjh21 shiftCount = aExp - 0x413;
1495 1.1 bjh21 if ( 0 <= shiftCount ) {
1496 1.1 bjh21 if ( 0x41E < aExp ) {
1497 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1498 1.1 bjh21 goto invalid;
1499 1.1 bjh21 }
1500 1.1 bjh21 shortShift64Left(
1501 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1502 1.1 bjh21 }
1503 1.1 bjh21 else {
1504 1.1 bjh21 if ( aExp < 0x3FF ) {
1505 1.1 bjh21 if ( aExp | aSig0 | aSig1 ) {
1506 1.1 bjh21 float_exception_flags |= float_flag_inexact;
1507 1.1 bjh21 }
1508 1.1 bjh21 return 0;
1509 1.1 bjh21 }
1510 1.1 bjh21 aSig0 |= 0x00100000;
1511 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1512 1.1 bjh21 absZ = aSig0>>( - shiftCount );
1513 1.1 bjh21 }
1514 1.1 bjh21 z = aSign ? - absZ : absZ;
1515 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) {
1516 1.1 bjh21 invalid:
1517 1.1 bjh21 float_raise( float_flag_invalid );
1518 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1519 1.1 bjh21 }
1520 1.1 bjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1521 1.1 bjh21 return z;
1522 1.1 bjh21
1523 1.1 bjh21 }
1524 1.1 bjh21
1525 1.1 bjh21 /*
1526 1.1 bjh21 -------------------------------------------------------------------------------
1527 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1528 1.1 bjh21 `a' to the single-precision floating-point format. The conversion is
1529 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1530 1.1 bjh21 Arithmetic.
1531 1.1 bjh21 -------------------------------------------------------------------------------
1532 1.1 bjh21 */
1533 1.1 bjh21 float32 float64_to_float32( float64 a )
1534 1.1 bjh21 {
1535 1.1 bjh21 flag aSign;
1536 1.1 bjh21 int16 aExp;
1537 1.1 bjh21 bits32 aSig0, aSig1, zSig;
1538 1.1 bjh21 bits32 allZero;
1539 1.1 bjh21
1540 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1541 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1542 1.1 bjh21 aExp = extractFloat64Exp( a );
1543 1.1 bjh21 aSign = extractFloat64Sign( a );
1544 1.1 bjh21 if ( aExp == 0x7FF ) {
1545 1.1 bjh21 if ( aSig0 | aSig1 ) {
1546 1.1 bjh21 return commonNaNToFloat32( float64ToCommonNaN( a ) );
1547 1.1 bjh21 }
1548 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 );
1549 1.1 bjh21 }
1550 1.1 bjh21 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1551 1.1 bjh21 if ( aExp ) zSig |= 0x40000000;
1552 1.1 bjh21 return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1553 1.1 bjh21
1554 1.1 bjh21 }
1555 1.1 bjh21
1556 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1557 1.1 bjh21 /*
1558 1.1 bjh21 -------------------------------------------------------------------------------
1559 1.1 bjh21 Rounds the double-precision floating-point value `a' to an integer,
1560 1.1 bjh21 and returns the result as a double-precision floating-point value. The
1561 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary
1562 1.1 bjh21 Floating-Point Arithmetic.
1563 1.1 bjh21 -------------------------------------------------------------------------------
1564 1.1 bjh21 */
1565 1.1 bjh21 float64 float64_round_to_int( float64 a )
1566 1.1 bjh21 {
1567 1.1 bjh21 flag aSign;
1568 1.1 bjh21 int16 aExp;
1569 1.1 bjh21 bits32 lastBitMask, roundBitsMask;
1570 1.1 bjh21 int8 roundingMode;
1571 1.1 bjh21 float64 z;
1572 1.1 bjh21
1573 1.1 bjh21 aExp = extractFloat64Exp( a );
1574 1.1 bjh21 if ( 0x413 <= aExp ) {
1575 1.1 bjh21 if ( 0x433 <= aExp ) {
1576 1.1 bjh21 if ( ( aExp == 0x7FF )
1577 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1578 1.1 bjh21 return propagateFloat64NaN( a, a );
1579 1.1 bjh21 }
1580 1.1 bjh21 return a;
1581 1.1 bjh21 }
1582 1.1 bjh21 lastBitMask = 1;
1583 1.1 bjh21 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1584 1.1 bjh21 roundBitsMask = lastBitMask - 1;
1585 1.1 bjh21 z = a;
1586 1.1 bjh21 roundingMode = float_rounding_mode;
1587 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1588 1.1 bjh21 if ( lastBitMask ) {
1589 1.1 bjh21 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1590 1.1 bjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1591 1.1 bjh21 }
1592 1.1 bjh21 else {
1593 1.1 bjh21 if ( (sbits32) z.low < 0 ) {
1594 1.1 bjh21 ++z.high;
1595 1.1 bjh21 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1596 1.1 bjh21 }
1597 1.1 bjh21 }
1598 1.1 bjh21 }
1599 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
1600 1.1 bjh21 if ( extractFloat64Sign( z )
1601 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) {
1602 1.1 bjh21 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1603 1.1 bjh21 }
1604 1.1 bjh21 }
1605 1.1 bjh21 z.low &= ~ roundBitsMask;
1606 1.1 bjh21 }
1607 1.1 bjh21 else {
1608 1.1 bjh21 if ( aExp <= 0x3FE ) {
1609 1.1 bjh21 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1610 1.1 bjh21 float_exception_flags |= float_flag_inexact;
1611 1.1 bjh21 aSign = extractFloat64Sign( a );
1612 1.1 bjh21 switch ( float_rounding_mode ) {
1613 1.1 bjh21 case float_round_nearest_even:
1614 1.1 bjh21 if ( ( aExp == 0x3FE )
1615 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1616 1.1 bjh21 ) {
1617 1.1 bjh21 return packFloat64( aSign, 0x3FF, 0, 0 );
1618 1.1 bjh21 }
1619 1.1 bjh21 break;
1620 1.1 bjh21 case float_round_down:
1621 1.1 bjh21 return
1622 1.1 bjh21 aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1623 1.1 bjh21 : packFloat64( 0, 0, 0, 0 );
1624 1.1 bjh21 case float_round_up:
1625 1.1 bjh21 return
1626 1.1 bjh21 aSign ? packFloat64( 1, 0, 0, 0 )
1627 1.1 bjh21 : packFloat64( 0, 0x3FF, 0, 0 );
1628 1.1 bjh21 }
1629 1.1 bjh21 return packFloat64( aSign, 0, 0, 0 );
1630 1.1 bjh21 }
1631 1.1 bjh21 lastBitMask = 1;
1632 1.1 bjh21 lastBitMask <<= 0x413 - aExp;
1633 1.1 bjh21 roundBitsMask = lastBitMask - 1;
1634 1.1 bjh21 z.low = 0;
1635 1.1 bjh21 z.high = a.high;
1636 1.1 bjh21 roundingMode = float_rounding_mode;
1637 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1638 1.1 bjh21 z.high += lastBitMask>>1;
1639 1.1 bjh21 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1640 1.1 bjh21 z.high &= ~ lastBitMask;
1641 1.1 bjh21 }
1642 1.1 bjh21 }
1643 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
1644 1.1 bjh21 if ( extractFloat64Sign( z )
1645 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) {
1646 1.1 bjh21 z.high |= ( a.low != 0 );
1647 1.1 bjh21 z.high += roundBitsMask;
1648 1.1 bjh21 }
1649 1.1 bjh21 }
1650 1.1 bjh21 z.high &= ~ roundBitsMask;
1651 1.1 bjh21 }
1652 1.1 bjh21 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1653 1.1 bjh21 float_exception_flags |= float_flag_inexact;
1654 1.1 bjh21 }
1655 1.1 bjh21 return z;
1656 1.1 bjh21
1657 1.1 bjh21 }
1658 1.1 bjh21 #endif
1659 1.1 bjh21
1660 1.1 bjh21 /*
1661 1.1 bjh21 -------------------------------------------------------------------------------
1662 1.1 bjh21 Returns the result of adding the absolute values of the double-precision
1663 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1664 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN.
1665 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
1666 1.1 bjh21 Floating-Point Arithmetic.
1667 1.1 bjh21 -------------------------------------------------------------------------------
1668 1.1 bjh21 */
1669 1.1 bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1670 1.1 bjh21 {
1671 1.1 bjh21 int16 aExp, bExp, zExp;
1672 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1673 1.1 bjh21 int16 expDiff;
1674 1.1 bjh21
1675 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1676 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1677 1.1 bjh21 aExp = extractFloat64Exp( a );
1678 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1679 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1680 1.1 bjh21 bExp = extractFloat64Exp( b );
1681 1.1 bjh21 expDiff = aExp - bExp;
1682 1.1 bjh21 if ( 0 < expDiff ) {
1683 1.1 bjh21 if ( aExp == 0x7FF ) {
1684 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1685 1.1 bjh21 return a;
1686 1.1 bjh21 }
1687 1.1 bjh21 if ( bExp == 0 ) {
1688 1.1 bjh21 --expDiff;
1689 1.1 bjh21 }
1690 1.1 bjh21 else {
1691 1.1 bjh21 bSig0 |= 0x00100000;
1692 1.1 bjh21 }
1693 1.1 bjh21 shift64ExtraRightJamming(
1694 1.1 bjh21 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1695 1.1 bjh21 zExp = aExp;
1696 1.1 bjh21 }
1697 1.1 bjh21 else if ( expDiff < 0 ) {
1698 1.1 bjh21 if ( bExp == 0x7FF ) {
1699 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1700 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1701 1.1 bjh21 }
1702 1.1 bjh21 if ( aExp == 0 ) {
1703 1.1 bjh21 ++expDiff;
1704 1.1 bjh21 }
1705 1.1 bjh21 else {
1706 1.1 bjh21 aSig0 |= 0x00100000;
1707 1.1 bjh21 }
1708 1.1 bjh21 shift64ExtraRightJamming(
1709 1.1 bjh21 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1710 1.1 bjh21 zExp = bExp;
1711 1.1 bjh21 }
1712 1.1 bjh21 else {
1713 1.1 bjh21 if ( aExp == 0x7FF ) {
1714 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1715 1.1 bjh21 return propagateFloat64NaN( a, b );
1716 1.1 bjh21 }
1717 1.1 bjh21 return a;
1718 1.1 bjh21 }
1719 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1720 1.1 bjh21 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1721 1.1 bjh21 zSig2 = 0;
1722 1.1 bjh21 zSig0 |= 0x00200000;
1723 1.1 bjh21 zExp = aExp;
1724 1.1 bjh21 goto shiftRight1;
1725 1.1 bjh21 }
1726 1.1 bjh21 aSig0 |= 0x00100000;
1727 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1728 1.1 bjh21 --zExp;
1729 1.1 bjh21 if ( zSig0 < 0x00200000 ) goto roundAndPack;
1730 1.1 bjh21 ++zExp;
1731 1.1 bjh21 shiftRight1:
1732 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1733 1.1 bjh21 roundAndPack:
1734 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1735 1.1 bjh21
1736 1.1 bjh21 }
1737 1.1 bjh21
1738 1.1 bjh21 /*
1739 1.1 bjh21 -------------------------------------------------------------------------------
1740 1.1 bjh21 Returns the result of subtracting the absolute values of the double-
1741 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the
1742 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the
1743 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE
1744 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
1745 1.1 bjh21 -------------------------------------------------------------------------------
1746 1.1 bjh21 */
1747 1.1 bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1748 1.1 bjh21 {
1749 1.1 bjh21 int16 aExp, bExp, zExp;
1750 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1751 1.1 bjh21 int16 expDiff;
1752 1.1 bjh21
1753 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1754 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1755 1.1 bjh21 aExp = extractFloat64Exp( a );
1756 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1757 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1758 1.1 bjh21 bExp = extractFloat64Exp( b );
1759 1.1 bjh21 expDiff = aExp - bExp;
1760 1.1 bjh21 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1761 1.1 bjh21 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1762 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger;
1763 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger;
1764 1.1 bjh21 if ( aExp == 0x7FF ) {
1765 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1766 1.1 bjh21 return propagateFloat64NaN( a, b );
1767 1.1 bjh21 }
1768 1.1 bjh21 float_raise( float_flag_invalid );
1769 1.1 bjh21 return float64_default_nan;
1770 1.1 bjh21 }
1771 1.1 bjh21 if ( aExp == 0 ) {
1772 1.1 bjh21 aExp = 1;
1773 1.1 bjh21 bExp = 1;
1774 1.1 bjh21 }
1775 1.1 bjh21 if ( bSig0 < aSig0 ) goto aBigger;
1776 1.1 bjh21 if ( aSig0 < bSig0 ) goto bBigger;
1777 1.1 bjh21 if ( bSig1 < aSig1 ) goto aBigger;
1778 1.1 bjh21 if ( aSig1 < bSig1 ) goto bBigger;
1779 1.1 bjh21 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1780 1.1 bjh21 bExpBigger:
1781 1.1 bjh21 if ( bExp == 0x7FF ) {
1782 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1783 1.1 bjh21 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1784 1.1 bjh21 }
1785 1.1 bjh21 if ( aExp == 0 ) {
1786 1.1 bjh21 ++expDiff;
1787 1.1 bjh21 }
1788 1.1 bjh21 else {
1789 1.1 bjh21 aSig0 |= 0x40000000;
1790 1.1 bjh21 }
1791 1.1 bjh21 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1792 1.1 bjh21 bSig0 |= 0x40000000;
1793 1.1 bjh21 bBigger:
1794 1.1 bjh21 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1795 1.1 bjh21 zExp = bExp;
1796 1.1 bjh21 zSign ^= 1;
1797 1.1 bjh21 goto normalizeRoundAndPack;
1798 1.1 bjh21 aExpBigger:
1799 1.1 bjh21 if ( aExp == 0x7FF ) {
1800 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1801 1.1 bjh21 return a;
1802 1.1 bjh21 }
1803 1.1 bjh21 if ( bExp == 0 ) {
1804 1.1 bjh21 --expDiff;
1805 1.1 bjh21 }
1806 1.1 bjh21 else {
1807 1.1 bjh21 bSig0 |= 0x40000000;
1808 1.1 bjh21 }
1809 1.1 bjh21 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1810 1.1 bjh21 aSig0 |= 0x40000000;
1811 1.1 bjh21 aBigger:
1812 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1813 1.1 bjh21 zExp = aExp;
1814 1.1 bjh21 normalizeRoundAndPack:
1815 1.1 bjh21 --zExp;
1816 1.1 bjh21 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1817 1.1 bjh21
1818 1.1 bjh21 }
1819 1.1 bjh21
1820 1.1 bjh21 /*
1821 1.1 bjh21 -------------------------------------------------------------------------------
1822 1.1 bjh21 Returns the result of adding the double-precision floating-point values `a'
1823 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for
1824 1.1 bjh21 Binary Floating-Point Arithmetic.
1825 1.1 bjh21 -------------------------------------------------------------------------------
1826 1.1 bjh21 */
1827 1.1 bjh21 float64 float64_add( float64 a, float64 b )
1828 1.1 bjh21 {
1829 1.1 bjh21 flag aSign, bSign;
1830 1.1 bjh21
1831 1.1 bjh21 aSign = extractFloat64Sign( a );
1832 1.1 bjh21 bSign = extractFloat64Sign( b );
1833 1.1 bjh21 if ( aSign == bSign ) {
1834 1.1 bjh21 return addFloat64Sigs( a, b, aSign );
1835 1.1 bjh21 }
1836 1.1 bjh21 else {
1837 1.1 bjh21 return subFloat64Sigs( a, b, aSign );
1838 1.1 bjh21 }
1839 1.1 bjh21
1840 1.1 bjh21 }
1841 1.1 bjh21
1842 1.1 bjh21 /*
1843 1.1 bjh21 -------------------------------------------------------------------------------
1844 1.1 bjh21 Returns the result of subtracting the double-precision floating-point values
1845 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1846 1.1 bjh21 for Binary Floating-Point Arithmetic.
1847 1.1 bjh21 -------------------------------------------------------------------------------
1848 1.1 bjh21 */
1849 1.1 bjh21 float64 float64_sub( float64 a, float64 b )
1850 1.1 bjh21 {
1851 1.1 bjh21 flag aSign, bSign;
1852 1.1 bjh21
1853 1.1 bjh21 aSign = extractFloat64Sign( a );
1854 1.1 bjh21 bSign = extractFloat64Sign( b );
1855 1.1 bjh21 if ( aSign == bSign ) {
1856 1.1 bjh21 return subFloat64Sigs( a, b, aSign );
1857 1.1 bjh21 }
1858 1.1 bjh21 else {
1859 1.1 bjh21 return addFloat64Sigs( a, b, aSign );
1860 1.1 bjh21 }
1861 1.1 bjh21
1862 1.1 bjh21 }
1863 1.1 bjh21
1864 1.1 bjh21 /*
1865 1.1 bjh21 -------------------------------------------------------------------------------
1866 1.1 bjh21 Returns the result of multiplying the double-precision floating-point values
1867 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1868 1.1 bjh21 for Binary Floating-Point Arithmetic.
1869 1.1 bjh21 -------------------------------------------------------------------------------
1870 1.1 bjh21 */
1871 1.1 bjh21 float64 float64_mul( float64 a, float64 b )
1872 1.1 bjh21 {
1873 1.1 bjh21 flag aSign, bSign, zSign;
1874 1.1 bjh21 int16 aExp, bExp, zExp;
1875 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1876 1.1 bjh21
1877 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1878 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1879 1.1 bjh21 aExp = extractFloat64Exp( a );
1880 1.1 bjh21 aSign = extractFloat64Sign( a );
1881 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1882 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1883 1.1 bjh21 bExp = extractFloat64Exp( b );
1884 1.1 bjh21 bSign = extractFloat64Sign( b );
1885 1.1 bjh21 zSign = aSign ^ bSign;
1886 1.1 bjh21 if ( aExp == 0x7FF ) {
1887 1.1 bjh21 if ( ( aSig0 | aSig1 )
1888 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1889 1.1 bjh21 return propagateFloat64NaN( a, b );
1890 1.1 bjh21 }
1891 1.1 bjh21 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1892 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1893 1.1 bjh21 }
1894 1.1 bjh21 if ( bExp == 0x7FF ) {
1895 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1896 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1897 1.1 bjh21 invalid:
1898 1.1 bjh21 float_raise( float_flag_invalid );
1899 1.1 bjh21 return float64_default_nan;
1900 1.1 bjh21 }
1901 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1902 1.1 bjh21 }
1903 1.1 bjh21 if ( aExp == 0 ) {
1904 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1905 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1906 1.1 bjh21 }
1907 1.1 bjh21 if ( bExp == 0 ) {
1908 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1909 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1910 1.1 bjh21 }
1911 1.1 bjh21 zExp = aExp + bExp - 0x400;
1912 1.1 bjh21 aSig0 |= 0x00100000;
1913 1.1 bjh21 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1914 1.1 bjh21 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1915 1.1 bjh21 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1916 1.1 bjh21 zSig2 |= ( zSig3 != 0 );
1917 1.1 bjh21 if ( 0x00200000 <= zSig0 ) {
1918 1.1 bjh21 shift64ExtraRightJamming(
1919 1.1 bjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1920 1.1 bjh21 ++zExp;
1921 1.1 bjh21 }
1922 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1923 1.1 bjh21
1924 1.1 bjh21 }
1925 1.1 bjh21
1926 1.1 bjh21 /*
1927 1.1 bjh21 -------------------------------------------------------------------------------
1928 1.1 bjh21 Returns the result of dividing the double-precision floating-point value `a'
1929 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the
1930 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1931 1.1 bjh21 -------------------------------------------------------------------------------
1932 1.1 bjh21 */
1933 1.1 bjh21 float64 float64_div( float64 a, float64 b )
1934 1.1 bjh21 {
1935 1.1 bjh21 flag aSign, bSign, zSign;
1936 1.1 bjh21 int16 aExp, bExp, zExp;
1937 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1938 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1939 1.1 bjh21
1940 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1941 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1942 1.1 bjh21 aExp = extractFloat64Exp( a );
1943 1.1 bjh21 aSign = extractFloat64Sign( a );
1944 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1945 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1946 1.1 bjh21 bExp = extractFloat64Exp( b );
1947 1.1 bjh21 bSign = extractFloat64Sign( b );
1948 1.1 bjh21 zSign = aSign ^ bSign;
1949 1.1 bjh21 if ( aExp == 0x7FF ) {
1950 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1951 1.1 bjh21 if ( bExp == 0x7FF ) {
1952 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1953 1.1 bjh21 goto invalid;
1954 1.1 bjh21 }
1955 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1956 1.1 bjh21 }
1957 1.1 bjh21 if ( bExp == 0x7FF ) {
1958 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959 1.1 bjh21 return packFloat64( zSign, 0, 0, 0 );
1960 1.1 bjh21 }
1961 1.1 bjh21 if ( bExp == 0 ) {
1962 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) {
1963 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1964 1.1 bjh21 invalid:
1965 1.1 bjh21 float_raise( float_flag_invalid );
1966 1.1 bjh21 return float64_default_nan;
1967 1.1 bjh21 }
1968 1.1 bjh21 float_raise( float_flag_divbyzero );
1969 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1970 1.1 bjh21 }
1971 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1972 1.1 bjh21 }
1973 1.1 bjh21 if ( aExp == 0 ) {
1974 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1975 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1976 1.1 bjh21 }
1977 1.1 bjh21 zExp = aExp - bExp + 0x3FD;
1978 1.1 bjh21 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1979 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1980 1.1 bjh21 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1981 1.1 bjh21 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1982 1.1 bjh21 ++zExp;
1983 1.1 bjh21 }
1984 1.1 bjh21 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1985 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1986 1.1 bjh21 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1987 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1988 1.1 bjh21 --zSig0;
1989 1.1 bjh21 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1990 1.1 bjh21 }
1991 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1992 1.1 bjh21 if ( ( zSig1 & 0x3FF ) <= 4 ) {
1993 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1994 1.1 bjh21 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1995 1.1 bjh21 while ( (sbits32) rem1 < 0 ) {
1996 1.1 bjh21 --zSig1;
1997 1.1 bjh21 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1998 1.1 bjh21 }
1999 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2000 1.1 bjh21 }
2001 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2002 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2003 1.1 bjh21
2004 1.1 bjh21 }
2005 1.1 bjh21
2006 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2007 1.1 bjh21 /*
2008 1.1 bjh21 -------------------------------------------------------------------------------
2009 1.1 bjh21 Returns the remainder of the double-precision floating-point value `a'
2010 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed
2011 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2012 1.1 bjh21 -------------------------------------------------------------------------------
2013 1.1 bjh21 */
2014 1.1 bjh21 float64 float64_rem( float64 a, float64 b )
2015 1.1 bjh21 {
2016 1.1 bjh21 flag aSign, bSign, zSign;
2017 1.1 bjh21 int16 aExp, bExp, expDiff;
2018 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2019 1.1 bjh21 bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2020 1.1 bjh21 sbits32 sigMean0;
2021 1.1 bjh21 float64 z;
2022 1.1 bjh21
2023 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
2024 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
2025 1.1 bjh21 aExp = extractFloat64Exp( a );
2026 1.1 bjh21 aSign = extractFloat64Sign( a );
2027 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
2028 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
2029 1.1 bjh21 bExp = extractFloat64Exp( b );
2030 1.1 bjh21 bSign = extractFloat64Sign( b );
2031 1.1 bjh21 if ( aExp == 0x7FF ) {
2032 1.1 bjh21 if ( ( aSig0 | aSig1 )
2033 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2034 1.1 bjh21 return propagateFloat64NaN( a, b );
2035 1.1 bjh21 }
2036 1.1 bjh21 goto invalid;
2037 1.1 bjh21 }
2038 1.1 bjh21 if ( bExp == 0x7FF ) {
2039 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2040 1.1 bjh21 return a;
2041 1.1 bjh21 }
2042 1.1 bjh21 if ( bExp == 0 ) {
2043 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) {
2044 1.1 bjh21 invalid:
2045 1.1 bjh21 float_raise( float_flag_invalid );
2046 1.1 bjh21 return float64_default_nan;
2047 1.1 bjh21 }
2048 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2049 1.1 bjh21 }
2050 1.1 bjh21 if ( aExp == 0 ) {
2051 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return a;
2052 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2053 1.1 bjh21 }
2054 1.1 bjh21 expDiff = aExp - bExp;
2055 1.1 bjh21 if ( expDiff < -1 ) return a;
2056 1.1 bjh21 shortShift64Left(
2057 1.1 bjh21 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2058 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2059 1.1 bjh21 q = le64( bSig0, bSig1, aSig0, aSig1 );
2060 1.1 bjh21 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2061 1.1 bjh21 expDiff -= 32;
2062 1.1 bjh21 while ( 0 < expDiff ) {
2063 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2064 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0;
2065 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2066 1.1 bjh21 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2067 1.1 bjh21 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2068 1.1 bjh21 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2069 1.1 bjh21 expDiff -= 29;
2070 1.1 bjh21 }
2071 1.1 bjh21 if ( -32 < expDiff ) {
2072 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2073 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0;
2074 1.1 bjh21 q >>= - expDiff;
2075 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2076 1.1 bjh21 expDiff += 24;
2077 1.1 bjh21 if ( expDiff < 0 ) {
2078 1.1 bjh21 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2079 1.1 bjh21 }
2080 1.1 bjh21 else {
2081 1.1 bjh21 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2082 1.1 bjh21 }
2083 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2084 1.1 bjh21 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2085 1.1 bjh21 }
2086 1.1 bjh21 else {
2087 1.1 bjh21 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2088 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2089 1.1 bjh21 }
2090 1.1 bjh21 do {
2091 1.1 bjh21 alternateASig0 = aSig0;
2092 1.1 bjh21 alternateASig1 = aSig1;
2093 1.1 bjh21 ++q;
2094 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2095 1.1 bjh21 } while ( 0 <= (sbits32) aSig0 );
2096 1.1 bjh21 add64(
2097 1.1 bjh21 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2098 1.1 bjh21 if ( ( sigMean0 < 0 )
2099 1.1 bjh21 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2100 1.1 bjh21 aSig0 = alternateASig0;
2101 1.1 bjh21 aSig1 = alternateASig1;
2102 1.1 bjh21 }
2103 1.1 bjh21 zSign = ( (sbits32) aSig0 < 0 );
2104 1.1 bjh21 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2105 1.1 bjh21 return
2106 1.1 bjh21 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2107 1.1 bjh21
2108 1.1 bjh21 }
2109 1.1 bjh21 #endif
2110 1.1 bjh21
2111 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2112 1.1 bjh21 /*
2113 1.1 bjh21 -------------------------------------------------------------------------------
2114 1.1 bjh21 Returns the square root of the double-precision floating-point value `a'.
2115 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
2116 1.1 bjh21 Floating-Point Arithmetic.
2117 1.1 bjh21 -------------------------------------------------------------------------------
2118 1.1 bjh21 */
2119 1.1 bjh21 float64 float64_sqrt( float64 a )
2120 1.1 bjh21 {
2121 1.1 bjh21 flag aSign;
2122 1.1 bjh21 int16 aExp, zExp;
2123 1.1 bjh21 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2124 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2125 1.1 bjh21 float64 z;
2126 1.1 bjh21
2127 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
2128 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
2129 1.1 bjh21 aExp = extractFloat64Exp( a );
2130 1.1 bjh21 aSign = extractFloat64Sign( a );
2131 1.1 bjh21 if ( aExp == 0x7FF ) {
2132 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2133 1.1 bjh21 if ( ! aSign ) return a;
2134 1.1 bjh21 goto invalid;
2135 1.1 bjh21 }
2136 1.1 bjh21 if ( aSign ) {
2137 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2138 1.1 bjh21 invalid:
2139 1.1 bjh21 float_raise( float_flag_invalid );
2140 1.1 bjh21 return float64_default_nan;
2141 1.1 bjh21 }
2142 1.1 bjh21 if ( aExp == 0 ) {
2143 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2144 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2145 1.1 bjh21 }
2146 1.1 bjh21 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2147 1.1 bjh21 aSig0 |= 0x00100000;
2148 1.1 bjh21 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2149 1.1 bjh21 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2150 1.1 bjh21 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2151 1.1 bjh21 doubleZSig0 = zSig0 + zSig0;
2152 1.1 bjh21 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2153 1.1 bjh21 mul32To64( zSig0, zSig0, &term0, &term1 );
2154 1.1 bjh21 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2155 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
2156 1.1 bjh21 --zSig0;
2157 1.1 bjh21 doubleZSig0 -= 2;
2158 1.1 bjh21 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2159 1.1 bjh21 }
2160 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2161 1.1 bjh21 if ( ( zSig1 & 0x1FF ) <= 5 ) {
2162 1.1 bjh21 if ( zSig1 == 0 ) zSig1 = 1;
2163 1.1 bjh21 mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2164 1.1 bjh21 sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2165 1.1 bjh21 mul32To64( zSig1, zSig1, &term2, &term3 );
2166 1.1 bjh21 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2167 1.1 bjh21 while ( (sbits32) rem1 < 0 ) {
2168 1.1 bjh21 --zSig1;
2169 1.1 bjh21 shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2170 1.1 bjh21 term3 |= 1;
2171 1.1 bjh21 term2 |= doubleZSig0;
2172 1.1 bjh21 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2173 1.1 bjh21 }
2174 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2175 1.1 bjh21 }
2176 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2177 1.1 bjh21 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2178 1.1 bjh21
2179 1.1 bjh21 }
2180 1.1 bjh21 #endif
2181 1.1 bjh21
2182 1.1 bjh21 /*
2183 1.1 bjh21 -------------------------------------------------------------------------------
2184 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2185 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
2186 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2187 1.1 bjh21 -------------------------------------------------------------------------------
2188 1.1 bjh21 */
2189 1.1 bjh21 flag float64_eq( float64 a, float64 b )
2190 1.1 bjh21 {
2191 1.1 bjh21
2192 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2193 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2194 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2195 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2196 1.1 bjh21 ) {
2197 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2198 1.1 bjh21 float_raise( float_flag_invalid );
2199 1.1 bjh21 }
2200 1.1 bjh21 return 0;
2201 1.1 bjh21 }
2202 1.1 bjh21 return ( a == b ) ||
2203 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2204 1.1 bjh21
2205 1.1 bjh21 }
2206 1.1 bjh21
2207 1.1 bjh21 /*
2208 1.1 bjh21 -------------------------------------------------------------------------------
2209 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2210 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison
2211 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2212 1.1 bjh21 Arithmetic.
2213 1.1 bjh21 -------------------------------------------------------------------------------
2214 1.1 bjh21 */
2215 1.1 bjh21 flag float64_le( float64 a, float64 b )
2216 1.1 bjh21 {
2217 1.1 bjh21 flag aSign, bSign;
2218 1.1 bjh21
2219 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2220 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2221 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2222 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2223 1.1 bjh21 ) {
2224 1.1 bjh21 float_raise( float_flag_invalid );
2225 1.1 bjh21 return 0;
2226 1.1 bjh21 }
2227 1.1 bjh21 aSign = extractFloat64Sign( a );
2228 1.1 bjh21 bSign = extractFloat64Sign( b );
2229 1.1 bjh21 if ( aSign != bSign )
2230 1.1 bjh21 return aSign ||
2231 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2232 1.1 bjh21 0 );
2233 1.1 bjh21 return ( a == b ) ||
2234 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2235 1.1 bjh21 }
2236 1.1 bjh21
2237 1.1 bjh21 /*
2238 1.1 bjh21 -------------------------------------------------------------------------------
2239 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2240 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
2241 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2242 1.1 bjh21 -------------------------------------------------------------------------------
2243 1.1 bjh21 */
2244 1.1 bjh21 flag float64_lt( float64 a, float64 b )
2245 1.1 bjh21 {
2246 1.1 bjh21 flag aSign, bSign;
2247 1.1 bjh21
2248 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2249 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2250 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2251 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2252 1.1 bjh21 ) {
2253 1.1 bjh21 float_raise( float_flag_invalid );
2254 1.1 bjh21 return 0;
2255 1.1 bjh21 }
2256 1.1 bjh21 aSign = extractFloat64Sign( a );
2257 1.1 bjh21 bSign = extractFloat64Sign( b );
2258 1.1 bjh21 if ( aSign != bSign )
2259 1.1 bjh21 return aSign &&
2260 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2261 1.1 bjh21 0 );
2262 1.1 bjh21 return ( a != b ) &&
2263 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2264 1.1 bjh21
2265 1.1 bjh21 }
2266 1.1 bjh21
2267 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2268 1.1 bjh21 /*
2269 1.1 bjh21 -------------------------------------------------------------------------------
2270 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2271 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is
2272 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed
2273 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2274 1.1 bjh21 -------------------------------------------------------------------------------
2275 1.1 bjh21 */
2276 1.1 bjh21 flag float64_eq_signaling( float64 a, float64 b )
2277 1.1 bjh21 {
2278 1.1 bjh21
2279 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2280 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2281 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2282 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2283 1.1 bjh21 ) {
2284 1.1 bjh21 float_raise( float_flag_invalid );
2285 1.1 bjh21 return 0;
2286 1.1 bjh21 }
2287 1.1 bjh21 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2288 1.1 bjh21
2289 1.1 bjh21 }
2290 1.1 bjh21
2291 1.1 bjh21 /*
2292 1.1 bjh21 -------------------------------------------------------------------------------
2293 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than or
2294 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2295 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the
2296 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2297 1.1 bjh21 -------------------------------------------------------------------------------
2298 1.1 bjh21 */
2299 1.1 bjh21 flag float64_le_quiet( float64 a, float64 b )
2300 1.1 bjh21 {
2301 1.1 bjh21 flag aSign, bSign;
2302 1.1 bjh21
2303 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2304 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2305 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2306 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2307 1.1 bjh21 ) {
2308 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2309 1.1 bjh21 float_raise( float_flag_invalid );
2310 1.1 bjh21 }
2311 1.1 bjh21 return 0;
2312 1.1 bjh21 }
2313 1.1 bjh21 aSign = extractFloat64Sign( a );
2314 1.1 bjh21 bSign = extractFloat64Sign( b );
2315 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2316 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
2317 1.1 bjh21
2318 1.1 bjh21 }
2319 1.1 bjh21
2320 1.1 bjh21 /*
2321 1.1 bjh21 -------------------------------------------------------------------------------
2322 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2323 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2324 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE
2325 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
2326 1.1 bjh21 -------------------------------------------------------------------------------
2327 1.1 bjh21 */
2328 1.1 bjh21 flag float64_lt_quiet( float64 a, float64 b )
2329 1.1 bjh21 {
2330 1.1 bjh21 flag aSign, bSign;
2331 1.1 bjh21
2332 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2333 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2334 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2335 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2336 1.1 bjh21 ) {
2337 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2338 1.1 bjh21 float_raise( float_flag_invalid );
2339 1.1 bjh21 }
2340 1.1 bjh21 return 0;
2341 1.1 bjh21 }
2342 1.1 bjh21 aSign = extractFloat64Sign( a );
2343 1.1 bjh21 bSign = extractFloat64Sign( b );
2344 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2345 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
2346 1.1 bjh21
2347 1.1 bjh21 }
2348 1.1 bjh21
2349 1.1 bjh21 #endif
2350