softfloat.c revision 1.3 1 1.3 matt /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
2 1.1 bjh21
3 1.1 bjh21 /*
4 1.1 bjh21 * This version hacked for use with gcc -msoft-float by bjh21.
5 1.1 bjh21 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 1.1 bjh21 * itself).
7 1.1 bjh21 */
8 1.1 bjh21
9 1.1 bjh21 /*
10 1.1 bjh21 * Things you may want to define:
11 1.1 bjh21 *
12 1.1 bjh21 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 1.1 bjh21 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
14 1.1 bjh21 * properly renamed.
15 1.1 bjh21 */
16 1.1 bjh21
17 1.1 bjh21 /*
18 1.1 bjh21 * This differs from the standard bits32/softfloat.c in that float64
19 1.1 bjh21 * is defined to be a 64-bit integer rather than a structure. The
20 1.1 bjh21 * structure is float64s, with translation between the two going via
21 1.1 bjh21 * float64u.
22 1.1 bjh21 */
23 1.1 bjh21
24 1.1 bjh21 /*
25 1.1 bjh21 ===============================================================================
26 1.1 bjh21
27 1.1 bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28 1.1 bjh21 Arithmetic Package, Release 2a.
29 1.1 bjh21
30 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the
31 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
32 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the
33 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version
34 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector
35 1.1 bjh21 processor in collaboration with the University of California at Berkeley,
36 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
37 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38 1.1 bjh21 arithmetic/SoftFloat.html'.
39 1.1 bjh21
40 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
41 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
43 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45 1.1 bjh21
46 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as
47 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they
48 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of
49 1.1 bjh21 this code that are retained.
50 1.1 bjh21
51 1.1 bjh21 ===============================================================================
52 1.1 bjh21 */
53 1.1 bjh21
54 1.1 bjh21 #include <sys/cdefs.h>
55 1.1 bjh21 #if defined(LIBC_SCCS) && !defined(lint)
56 1.3 matt __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
57 1.1 bjh21 #endif /* LIBC_SCCS and not lint */
58 1.1 bjh21
59 1.1 bjh21 #ifdef SOFTFLOAT_FOR_GCC
60 1.1 bjh21 #include "softfloat-for-gcc.h"
61 1.1 bjh21 #endif
62 1.1 bjh21
63 1.1 bjh21 #include "milieu.h"
64 1.1 bjh21 #include "softfloat.h"
65 1.1 bjh21
66 1.1 bjh21 /*
67 1.1 bjh21 * Conversions between floats as stored in memory and floats as
68 1.1 bjh21 * SoftFloat uses them
69 1.1 bjh21 */
70 1.1 bjh21 #ifndef FLOAT64_DEMANGLE
71 1.1 bjh21 #define FLOAT64_DEMANGLE(a) (a)
72 1.1 bjh21 #endif
73 1.1 bjh21 #ifndef FLOAT64_MANGLE
74 1.1 bjh21 #define FLOAT64_MANGLE(a) (a)
75 1.1 bjh21 #endif
76 1.1 bjh21
77 1.1 bjh21 /*
78 1.1 bjh21 -------------------------------------------------------------------------------
79 1.1 bjh21 Floating-point rounding mode and exception flags.
80 1.1 bjh21 -------------------------------------------------------------------------------
81 1.1 bjh21 */
82 1.3 matt #ifndef set_float_rounding_mode
83 1.1 bjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
84 1.1 bjh21 fp_except float_exception_flags = 0;
85 1.3 matt #endif
86 1.3 matt #ifndef set_float_exception_inexact_flag
87 1.3 matt #define set_float_exception_inexact_flag() \
88 1.3 matt ((void)(float_exception_flags |= float_flag_inexact))
89 1.3 matt #endif
90 1.1 bjh21
91 1.1 bjh21 /*
92 1.1 bjh21 -------------------------------------------------------------------------------
93 1.1 bjh21 Primitive arithmetic functions, including multi-word arithmetic, and
94 1.1 bjh21 division and square root approximations. (Can be specialized to target if
95 1.1 bjh21 desired.)
96 1.1 bjh21 -------------------------------------------------------------------------------
97 1.1 bjh21 */
98 1.1 bjh21 #include "softfloat-macros"
99 1.1 bjh21
100 1.1 bjh21 /*
101 1.1 bjh21 -------------------------------------------------------------------------------
102 1.1 bjh21 Functions and definitions to determine: (1) whether tininess for underflow
103 1.1 bjh21 is detected before or after rounding by default, (2) what (if anything)
104 1.1 bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
105 1.1 bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
106 1.1 bjh21 are propagated from function inputs to output. These details are target-
107 1.1 bjh21 specific.
108 1.1 bjh21 -------------------------------------------------------------------------------
109 1.1 bjh21 */
110 1.1 bjh21 #include "softfloat-specialize"
111 1.1 bjh21
112 1.1 bjh21 /*
113 1.1 bjh21 -------------------------------------------------------------------------------
114 1.1 bjh21 Returns the fraction bits of the single-precision floating-point value `a'.
115 1.1 bjh21 -------------------------------------------------------------------------------
116 1.1 bjh21 */
117 1.1 bjh21 INLINE bits32 extractFloat32Frac( float32 a )
118 1.1 bjh21 {
119 1.1 bjh21
120 1.1 bjh21 return a & 0x007FFFFF;
121 1.1 bjh21
122 1.1 bjh21 }
123 1.1 bjh21
124 1.1 bjh21 /*
125 1.1 bjh21 -------------------------------------------------------------------------------
126 1.1 bjh21 Returns the exponent bits of the single-precision floating-point value `a'.
127 1.1 bjh21 -------------------------------------------------------------------------------
128 1.1 bjh21 */
129 1.1 bjh21 INLINE int16 extractFloat32Exp( float32 a )
130 1.1 bjh21 {
131 1.1 bjh21
132 1.1 bjh21 return ( a>>23 ) & 0xFF;
133 1.1 bjh21
134 1.1 bjh21 }
135 1.1 bjh21
136 1.1 bjh21 /*
137 1.1 bjh21 -------------------------------------------------------------------------------
138 1.1 bjh21 Returns the sign bit of the single-precision floating-point value `a'.
139 1.1 bjh21 -------------------------------------------------------------------------------
140 1.1 bjh21 */
141 1.1 bjh21 INLINE flag extractFloat32Sign( float32 a )
142 1.1 bjh21 {
143 1.1 bjh21
144 1.1 bjh21 return a>>31;
145 1.1 bjh21
146 1.1 bjh21 }
147 1.1 bjh21
148 1.1 bjh21 /*
149 1.1 bjh21 -------------------------------------------------------------------------------
150 1.1 bjh21 Normalizes the subnormal single-precision floating-point value represented
151 1.1 bjh21 by the denormalized significand `aSig'. The normalized exponent and
152 1.1 bjh21 significand are stored at the locations pointed to by `zExpPtr' and
153 1.1 bjh21 `zSigPtr', respectively.
154 1.1 bjh21 -------------------------------------------------------------------------------
155 1.1 bjh21 */
156 1.1 bjh21 static void
157 1.1 bjh21 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
158 1.1 bjh21 {
159 1.1 bjh21 int8 shiftCount;
160 1.1 bjh21
161 1.1 bjh21 shiftCount = countLeadingZeros32( aSig ) - 8;
162 1.1 bjh21 *zSigPtr = aSig<<shiftCount;
163 1.1 bjh21 *zExpPtr = 1 - shiftCount;
164 1.1 bjh21
165 1.1 bjh21 }
166 1.1 bjh21
167 1.1 bjh21 /*
168 1.1 bjh21 -------------------------------------------------------------------------------
169 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
170 1.1 bjh21 single-precision floating-point value, returning the result. After being
171 1.1 bjh21 shifted into the proper positions, the three fields are simply added
172 1.1 bjh21 together to form the result. This means that any integer portion of `zSig'
173 1.1 bjh21 will be added into the exponent. Since a properly normalized significand
174 1.1 bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
175 1.1 bjh21 than the desired result exponent whenever `zSig' is a complete, normalized
176 1.1 bjh21 significand.
177 1.1 bjh21 -------------------------------------------------------------------------------
178 1.1 bjh21 */
179 1.1 bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
180 1.1 bjh21 {
181 1.1 bjh21
182 1.1 bjh21 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
183 1.1 bjh21
184 1.1 bjh21 }
185 1.1 bjh21
186 1.1 bjh21 /*
187 1.1 bjh21 -------------------------------------------------------------------------------
188 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
189 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating-
190 1.1 bjh21 point value corresponding to the abstract input. Ordinarily, the abstract
191 1.1 bjh21 value is simply rounded and packed into the single-precision format, with
192 1.1 bjh21 the inexact exception raised if the abstract input cannot be represented
193 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and
194 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is
195 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to
196 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
197 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal single-
198 1.1 bjh21 precision floating-point number.
199 1.1 bjh21 The input significand `zSig' has its binary point between bits 30
200 1.1 bjh21 and 29, which is 7 bits to the left of the usual location. This shifted
201 1.1 bjh21 significand must be normalized or smaller. If `zSig' is not normalized,
202 1.1 bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
203 1.1 bjh21 and it must not require rounding. In the usual case that `zSig' is
204 1.1 bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
205 1.1 bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
206 1.1 bjh21 Binary Floating-Point Arithmetic.
207 1.1 bjh21 -------------------------------------------------------------------------------
208 1.1 bjh21 */
209 1.1 bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
210 1.1 bjh21 {
211 1.1 bjh21 int8 roundingMode;
212 1.1 bjh21 flag roundNearestEven;
213 1.1 bjh21 int8 roundIncrement, roundBits;
214 1.1 bjh21 flag isTiny;
215 1.1 bjh21
216 1.1 bjh21 roundingMode = float_rounding_mode;
217 1.1 bjh21 roundNearestEven = roundingMode == float_round_nearest_even;
218 1.1 bjh21 roundIncrement = 0x40;
219 1.1 bjh21 if ( ! roundNearestEven ) {
220 1.1 bjh21 if ( roundingMode == float_round_to_zero ) {
221 1.1 bjh21 roundIncrement = 0;
222 1.1 bjh21 }
223 1.1 bjh21 else {
224 1.1 bjh21 roundIncrement = 0x7F;
225 1.1 bjh21 if ( zSign ) {
226 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0;
227 1.1 bjh21 }
228 1.1 bjh21 else {
229 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0;
230 1.1 bjh21 }
231 1.1 bjh21 }
232 1.1 bjh21 }
233 1.1 bjh21 roundBits = zSig & 0x7F;
234 1.1 bjh21 if ( 0xFD <= (bits16) zExp ) {
235 1.1 bjh21 if ( ( 0xFD < zExp )
236 1.1 bjh21 || ( ( zExp == 0xFD )
237 1.1 bjh21 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
238 1.1 bjh21 ) {
239 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact );
240 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
241 1.1 bjh21 }
242 1.1 bjh21 if ( zExp < 0 ) {
243 1.1 bjh21 isTiny =
244 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding )
245 1.1 bjh21 || ( zExp < -1 )
246 1.2 christos || ( zSig + roundIncrement < (uint32)0x80000000 );
247 1.1 bjh21 shift32RightJamming( zSig, - zExp, &zSig );
248 1.1 bjh21 zExp = 0;
249 1.1 bjh21 roundBits = zSig & 0x7F;
250 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
251 1.1 bjh21 }
252 1.1 bjh21 }
253 1.3 matt if ( roundBits ) set_float_exception_inexact_flag();
254 1.1 bjh21 zSig = ( zSig + roundIncrement )>>7;
255 1.1 bjh21 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
256 1.1 bjh21 if ( zSig == 0 ) zExp = 0;
257 1.1 bjh21 return packFloat32( zSign, zExp, zSig );
258 1.1 bjh21
259 1.1 bjh21 }
260 1.1 bjh21
261 1.1 bjh21 /*
262 1.1 bjh21 -------------------------------------------------------------------------------
263 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
264 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating-
265 1.1 bjh21 point value corresponding to the abstract input. This routine is just like
266 1.1 bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
267 1.1 bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
268 1.1 bjh21 floating-point exponent.
269 1.1 bjh21 -------------------------------------------------------------------------------
270 1.1 bjh21 */
271 1.1 bjh21 static float32
272 1.1 bjh21 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
273 1.1 bjh21 {
274 1.1 bjh21 int8 shiftCount;
275 1.1 bjh21
276 1.1 bjh21 shiftCount = countLeadingZeros32( zSig ) - 1;
277 1.1 bjh21 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
278 1.1 bjh21
279 1.1 bjh21 }
280 1.1 bjh21
281 1.1 bjh21 /*
282 1.1 bjh21 -------------------------------------------------------------------------------
283 1.1 bjh21 Returns the least-significant 32 fraction bits of the double-precision
284 1.1 bjh21 floating-point value `a'.
285 1.1 bjh21 -------------------------------------------------------------------------------
286 1.1 bjh21 */
287 1.1 bjh21 INLINE bits32 extractFloat64Frac1( float64 a )
288 1.1 bjh21 {
289 1.1 bjh21
290 1.2 christos return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
291 1.1 bjh21
292 1.1 bjh21 }
293 1.1 bjh21
294 1.1 bjh21 /*
295 1.1 bjh21 -------------------------------------------------------------------------------
296 1.1 bjh21 Returns the most-significant 20 fraction bits of the double-precision
297 1.1 bjh21 floating-point value `a'.
298 1.1 bjh21 -------------------------------------------------------------------------------
299 1.1 bjh21 */
300 1.1 bjh21 INLINE bits32 extractFloat64Frac0( float64 a )
301 1.1 bjh21 {
302 1.1 bjh21
303 1.2 christos return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
304 1.1 bjh21
305 1.1 bjh21 }
306 1.1 bjh21
307 1.1 bjh21 /*
308 1.1 bjh21 -------------------------------------------------------------------------------
309 1.1 bjh21 Returns the exponent bits of the double-precision floating-point value `a'.
310 1.1 bjh21 -------------------------------------------------------------------------------
311 1.1 bjh21 */
312 1.1 bjh21 INLINE int16 extractFloat64Exp( float64 a )
313 1.1 bjh21 {
314 1.1 bjh21
315 1.2 christos return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
316 1.1 bjh21
317 1.1 bjh21 }
318 1.1 bjh21
319 1.1 bjh21 /*
320 1.1 bjh21 -------------------------------------------------------------------------------
321 1.1 bjh21 Returns the sign bit of the double-precision floating-point value `a'.
322 1.1 bjh21 -------------------------------------------------------------------------------
323 1.1 bjh21 */
324 1.1 bjh21 INLINE flag extractFloat64Sign( float64 a )
325 1.1 bjh21 {
326 1.1 bjh21
327 1.2 christos return (flag)(FLOAT64_DEMANGLE(a) >> 63);
328 1.1 bjh21
329 1.1 bjh21 }
330 1.1 bjh21
331 1.1 bjh21 /*
332 1.1 bjh21 -------------------------------------------------------------------------------
333 1.1 bjh21 Normalizes the subnormal double-precision floating-point value represented
334 1.1 bjh21 by the denormalized significand formed by the concatenation of `aSig0' and
335 1.1 bjh21 `aSig1'. The normalized exponent is stored at the location pointed to by
336 1.1 bjh21 `zExpPtr'. The most significant 21 bits of the normalized significand are
337 1.1 bjh21 stored at the location pointed to by `zSig0Ptr', and the least significant
338 1.1 bjh21 32 bits of the normalized significand are stored at the location pointed to
339 1.1 bjh21 by `zSig1Ptr'.
340 1.1 bjh21 -------------------------------------------------------------------------------
341 1.1 bjh21 */
342 1.1 bjh21 static void
343 1.1 bjh21 normalizeFloat64Subnormal(
344 1.1 bjh21 bits32 aSig0,
345 1.1 bjh21 bits32 aSig1,
346 1.1 bjh21 int16 *zExpPtr,
347 1.1 bjh21 bits32 *zSig0Ptr,
348 1.1 bjh21 bits32 *zSig1Ptr
349 1.1 bjh21 )
350 1.1 bjh21 {
351 1.1 bjh21 int8 shiftCount;
352 1.1 bjh21
353 1.1 bjh21 if ( aSig0 == 0 ) {
354 1.1 bjh21 shiftCount = countLeadingZeros32( aSig1 ) - 11;
355 1.1 bjh21 if ( shiftCount < 0 ) {
356 1.1 bjh21 *zSig0Ptr = aSig1>>( - shiftCount );
357 1.1 bjh21 *zSig1Ptr = aSig1<<( shiftCount & 31 );
358 1.1 bjh21 }
359 1.1 bjh21 else {
360 1.1 bjh21 *zSig0Ptr = aSig1<<shiftCount;
361 1.1 bjh21 *zSig1Ptr = 0;
362 1.1 bjh21 }
363 1.1 bjh21 *zExpPtr = - shiftCount - 31;
364 1.1 bjh21 }
365 1.1 bjh21 else {
366 1.1 bjh21 shiftCount = countLeadingZeros32( aSig0 ) - 11;
367 1.1 bjh21 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
368 1.1 bjh21 *zExpPtr = 1 - shiftCount;
369 1.1 bjh21 }
370 1.1 bjh21
371 1.1 bjh21 }
372 1.1 bjh21
373 1.1 bjh21 /*
374 1.1 bjh21 -------------------------------------------------------------------------------
375 1.1 bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by
376 1.1 bjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
377 1.1 bjh21 point value, returning the result. After being shifted into the proper
378 1.1 bjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
379 1.1 bjh21 together to form the most significant 32 bits of the result. This means
380 1.1 bjh21 that any integer portion of `zSig0' will be added into the exponent. Since
381 1.1 bjh21 a properly normalized significand will have an integer portion equal to 1,
382 1.1 bjh21 the `zExp' input should be 1 less than the desired result exponent whenever
383 1.1 bjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
384 1.1 bjh21 -------------------------------------------------------------------------------
385 1.1 bjh21 */
386 1.1 bjh21 INLINE float64
387 1.1 bjh21 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
388 1.1 bjh21 {
389 1.1 bjh21
390 1.1 bjh21 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
391 1.1 bjh21 ( ( (bits64) zExp )<<52 ) +
392 1.1 bjh21 ( ( (bits64) zSig0 )<<32 ) + zSig1 );
393 1.1 bjh21
394 1.1 bjh21
395 1.1 bjh21 }
396 1.1 bjh21
397 1.1 bjh21 /*
398 1.1 bjh21 -------------------------------------------------------------------------------
399 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
400 1.1 bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
401 1.1 bjh21 and `zSig2', and returns the proper double-precision floating-point value
402 1.1 bjh21 corresponding to the abstract input. Ordinarily, the abstract value is
403 1.1 bjh21 simply rounded and packed into the double-precision format, with the inexact
404 1.1 bjh21 exception raised if the abstract input cannot be represented exactly.
405 1.1 bjh21 However, if the abstract value is too large, the overflow and inexact
406 1.1 bjh21 exceptions are raised and an infinity or maximal finite value is returned.
407 1.1 bjh21 If the abstract value is too small, the input value is rounded to a
408 1.1 bjh21 subnormal number, and the underflow and inexact exceptions are raised if the
409 1.1 bjh21 abstract input cannot be represented exactly as a subnormal double-precision
410 1.1 bjh21 floating-point number.
411 1.1 bjh21 The input significand must be normalized or smaller. If the input
412 1.1 bjh21 significand is not normalized, `zExp' must be 0; in that case, the result
413 1.1 bjh21 returned is a subnormal number, and it must not require rounding. In the
414 1.1 bjh21 usual case that the input significand is normalized, `zExp' must be 1 less
415 1.1 bjh21 than the ``true'' floating-point exponent. The handling of underflow and
416 1.1 bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
417 1.1 bjh21 -------------------------------------------------------------------------------
418 1.1 bjh21 */
419 1.1 bjh21 static float64
420 1.1 bjh21 roundAndPackFloat64(
421 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
422 1.1 bjh21 {
423 1.1 bjh21 int8 roundingMode;
424 1.1 bjh21 flag roundNearestEven, increment, isTiny;
425 1.1 bjh21
426 1.1 bjh21 roundingMode = float_rounding_mode;
427 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even );
428 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 );
429 1.1 bjh21 if ( ! roundNearestEven ) {
430 1.1 bjh21 if ( roundingMode == float_round_to_zero ) {
431 1.1 bjh21 increment = 0;
432 1.1 bjh21 }
433 1.1 bjh21 else {
434 1.1 bjh21 if ( zSign ) {
435 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2;
436 1.1 bjh21 }
437 1.1 bjh21 else {
438 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2;
439 1.1 bjh21 }
440 1.1 bjh21 }
441 1.1 bjh21 }
442 1.1 bjh21 if ( 0x7FD <= (bits16) zExp ) {
443 1.1 bjh21 if ( ( 0x7FD < zExp )
444 1.1 bjh21 || ( ( zExp == 0x7FD )
445 1.1 bjh21 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
446 1.1 bjh21 && increment
447 1.1 bjh21 )
448 1.1 bjh21 ) {
449 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact );
450 1.1 bjh21 if ( ( roundingMode == float_round_to_zero )
451 1.1 bjh21 || ( zSign && ( roundingMode == float_round_up ) )
452 1.1 bjh21 || ( ! zSign && ( roundingMode == float_round_down ) )
453 1.1 bjh21 ) {
454 1.1 bjh21 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
455 1.1 bjh21 }
456 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
457 1.1 bjh21 }
458 1.1 bjh21 if ( zExp < 0 ) {
459 1.1 bjh21 isTiny =
460 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding )
461 1.1 bjh21 || ( zExp < -1 )
462 1.1 bjh21 || ! increment
463 1.1 bjh21 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
464 1.1 bjh21 shift64ExtraRightJamming(
465 1.1 bjh21 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
466 1.1 bjh21 zExp = 0;
467 1.1 bjh21 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
468 1.1 bjh21 if ( roundNearestEven ) {
469 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 );
470 1.1 bjh21 }
471 1.1 bjh21 else {
472 1.1 bjh21 if ( zSign ) {
473 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2;
474 1.1 bjh21 }
475 1.1 bjh21 else {
476 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2;
477 1.1 bjh21 }
478 1.1 bjh21 }
479 1.1 bjh21 }
480 1.1 bjh21 }
481 1.3 matt if ( zSig2 ) set_float_exception_inexact_flag();
482 1.1 bjh21 if ( increment ) {
483 1.1 bjh21 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
484 1.1 bjh21 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
485 1.1 bjh21 }
486 1.1 bjh21 else {
487 1.1 bjh21 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
488 1.1 bjh21 }
489 1.1 bjh21 return packFloat64( zSign, zExp, zSig0, zSig1 );
490 1.1 bjh21
491 1.1 bjh21 }
492 1.1 bjh21
493 1.1 bjh21 /*
494 1.1 bjh21 -------------------------------------------------------------------------------
495 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
496 1.1 bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
497 1.1 bjh21 returns the proper double-precision floating-point value corresponding
498 1.1 bjh21 to the abstract input. This routine is just like `roundAndPackFloat64'
499 1.1 bjh21 except that the input significand has fewer bits and does not have to be
500 1.1 bjh21 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
501 1.1 bjh21 point exponent.
502 1.1 bjh21 -------------------------------------------------------------------------------
503 1.1 bjh21 */
504 1.1 bjh21 static float64
505 1.1 bjh21 normalizeRoundAndPackFloat64(
506 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
507 1.1 bjh21 {
508 1.1 bjh21 int8 shiftCount;
509 1.1 bjh21 bits32 zSig2;
510 1.1 bjh21
511 1.1 bjh21 if ( zSig0 == 0 ) {
512 1.1 bjh21 zSig0 = zSig1;
513 1.1 bjh21 zSig1 = 0;
514 1.1 bjh21 zExp -= 32;
515 1.1 bjh21 }
516 1.1 bjh21 shiftCount = countLeadingZeros32( zSig0 ) - 11;
517 1.1 bjh21 if ( 0 <= shiftCount ) {
518 1.1 bjh21 zSig2 = 0;
519 1.1 bjh21 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
520 1.1 bjh21 }
521 1.1 bjh21 else {
522 1.1 bjh21 shift64ExtraRightJamming(
523 1.1 bjh21 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
524 1.1 bjh21 }
525 1.1 bjh21 zExp -= shiftCount;
526 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
527 1.1 bjh21
528 1.1 bjh21 }
529 1.1 bjh21
530 1.1 bjh21 /*
531 1.1 bjh21 -------------------------------------------------------------------------------
532 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
533 1.1 bjh21 the single-precision floating-point format. The conversion is performed
534 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
535 1.1 bjh21 -------------------------------------------------------------------------------
536 1.1 bjh21 */
537 1.1 bjh21 float32 int32_to_float32( int32 a )
538 1.1 bjh21 {
539 1.1 bjh21 flag zSign;
540 1.1 bjh21
541 1.1 bjh21 if ( a == 0 ) return 0;
542 1.1 bjh21 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
543 1.1 bjh21 zSign = ( a < 0 );
544 1.2 christos return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
545 1.1 bjh21
546 1.1 bjh21 }
547 1.1 bjh21
548 1.1 bjh21 /*
549 1.1 bjh21 -------------------------------------------------------------------------------
550 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
551 1.1 bjh21 the double-precision floating-point format. The conversion is performed
552 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
553 1.1 bjh21 -------------------------------------------------------------------------------
554 1.1 bjh21 */
555 1.1 bjh21 float64 int32_to_float64( int32 a )
556 1.1 bjh21 {
557 1.1 bjh21 flag zSign;
558 1.1 bjh21 bits32 absA;
559 1.1 bjh21 int8 shiftCount;
560 1.1 bjh21 bits32 zSig0, zSig1;
561 1.1 bjh21
562 1.1 bjh21 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
563 1.1 bjh21 zSign = ( a < 0 );
564 1.1 bjh21 absA = zSign ? - a : a;
565 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) - 11;
566 1.1 bjh21 if ( 0 <= shiftCount ) {
567 1.1 bjh21 zSig0 = absA<<shiftCount;
568 1.1 bjh21 zSig1 = 0;
569 1.1 bjh21 }
570 1.1 bjh21 else {
571 1.1 bjh21 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
572 1.1 bjh21 }
573 1.1 bjh21 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
574 1.1 bjh21
575 1.1 bjh21 }
576 1.1 bjh21
577 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
578 1.1 bjh21 /*
579 1.1 bjh21 -------------------------------------------------------------------------------
580 1.1 bjh21 Returns the result of converting the single-precision floating-point value
581 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
582 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
583 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded
584 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest
585 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the
586 1.1 bjh21 largest integer with the same sign as `a' is returned.
587 1.1 bjh21 -------------------------------------------------------------------------------
588 1.1 bjh21 */
589 1.1 bjh21 int32 float32_to_int32( float32 a )
590 1.1 bjh21 {
591 1.1 bjh21 flag aSign;
592 1.1 bjh21 int16 aExp, shiftCount;
593 1.1 bjh21 bits32 aSig, aSigExtra;
594 1.1 bjh21 int32 z;
595 1.1 bjh21 int8 roundingMode;
596 1.1 bjh21
597 1.1 bjh21 aSig = extractFloat32Frac( a );
598 1.1 bjh21 aExp = extractFloat32Exp( a );
599 1.1 bjh21 aSign = extractFloat32Sign( a );
600 1.1 bjh21 shiftCount = aExp - 0x96;
601 1.1 bjh21 if ( 0 <= shiftCount ) {
602 1.1 bjh21 if ( 0x9E <= aExp ) {
603 1.1 bjh21 if ( a != 0xCF000000 ) {
604 1.1 bjh21 float_raise( float_flag_invalid );
605 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
606 1.1 bjh21 return 0x7FFFFFFF;
607 1.1 bjh21 }
608 1.1 bjh21 }
609 1.1 bjh21 return (sbits32) 0x80000000;
610 1.1 bjh21 }
611 1.1 bjh21 z = ( aSig | 0x00800000 )<<shiftCount;
612 1.1 bjh21 if ( aSign ) z = - z;
613 1.1 bjh21 }
614 1.1 bjh21 else {
615 1.1 bjh21 if ( aExp < 0x7E ) {
616 1.1 bjh21 aSigExtra = aExp | aSig;
617 1.1 bjh21 z = 0;
618 1.1 bjh21 }
619 1.1 bjh21 else {
620 1.1 bjh21 aSig |= 0x00800000;
621 1.1 bjh21 aSigExtra = aSig<<( shiftCount & 31 );
622 1.1 bjh21 z = aSig>>( - shiftCount );
623 1.1 bjh21 }
624 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag();
625 1.1 bjh21 roundingMode = float_rounding_mode;
626 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
627 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) {
628 1.1 bjh21 ++z;
629 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
630 1.1 bjh21 }
631 1.1 bjh21 if ( aSign ) z = - z;
632 1.1 bjh21 }
633 1.1 bjh21 else {
634 1.1 bjh21 aSigExtra = ( aSigExtra != 0 );
635 1.1 bjh21 if ( aSign ) {
636 1.1 bjh21 z += ( roundingMode == float_round_down ) & aSigExtra;
637 1.1 bjh21 z = - z;
638 1.1 bjh21 }
639 1.1 bjh21 else {
640 1.1 bjh21 z += ( roundingMode == float_round_up ) & aSigExtra;
641 1.1 bjh21 }
642 1.1 bjh21 }
643 1.1 bjh21 }
644 1.1 bjh21 return z;
645 1.1 bjh21
646 1.1 bjh21 }
647 1.1 bjh21 #endif
648 1.1 bjh21
649 1.1 bjh21 /*
650 1.1 bjh21 -------------------------------------------------------------------------------
651 1.1 bjh21 Returns the result of converting the single-precision floating-point value
652 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
653 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
654 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero.
655 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if
656 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is
657 1.1 bjh21 returned.
658 1.1 bjh21 -------------------------------------------------------------------------------
659 1.1 bjh21 */
660 1.1 bjh21 int32 float32_to_int32_round_to_zero( float32 a )
661 1.1 bjh21 {
662 1.1 bjh21 flag aSign;
663 1.1 bjh21 int16 aExp, shiftCount;
664 1.1 bjh21 bits32 aSig;
665 1.1 bjh21 int32 z;
666 1.1 bjh21
667 1.1 bjh21 aSig = extractFloat32Frac( a );
668 1.1 bjh21 aExp = extractFloat32Exp( a );
669 1.1 bjh21 aSign = extractFloat32Sign( a );
670 1.1 bjh21 shiftCount = aExp - 0x9E;
671 1.1 bjh21 if ( 0 <= shiftCount ) {
672 1.1 bjh21 if ( a != 0xCF000000 ) {
673 1.1 bjh21 float_raise( float_flag_invalid );
674 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
675 1.1 bjh21 }
676 1.1 bjh21 return (sbits32) 0x80000000;
677 1.1 bjh21 }
678 1.1 bjh21 else if ( aExp <= 0x7E ) {
679 1.3 matt if ( aExp | aSig ) set_float_exception_inexact_flag();
680 1.1 bjh21 return 0;
681 1.1 bjh21 }
682 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
683 1.1 bjh21 z = aSig>>( - shiftCount );
684 1.1 bjh21 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
685 1.3 matt set_float_exception_inexact_flag();
686 1.1 bjh21 }
687 1.1 bjh21 if ( aSign ) z = - z;
688 1.1 bjh21 return z;
689 1.1 bjh21
690 1.1 bjh21 }
691 1.1 bjh21
692 1.1 bjh21 /*
693 1.1 bjh21 -------------------------------------------------------------------------------
694 1.1 bjh21 Returns the result of converting the single-precision floating-point value
695 1.1 bjh21 `a' to the double-precision floating-point format. The conversion is
696 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
697 1.1 bjh21 Arithmetic.
698 1.1 bjh21 -------------------------------------------------------------------------------
699 1.1 bjh21 */
700 1.1 bjh21 float64 float32_to_float64( float32 a )
701 1.1 bjh21 {
702 1.1 bjh21 flag aSign;
703 1.1 bjh21 int16 aExp;
704 1.1 bjh21 bits32 aSig, zSig0, zSig1;
705 1.1 bjh21
706 1.1 bjh21 aSig = extractFloat32Frac( a );
707 1.1 bjh21 aExp = extractFloat32Exp( a );
708 1.1 bjh21 aSign = extractFloat32Sign( a );
709 1.1 bjh21 if ( aExp == 0xFF ) {
710 1.1 bjh21 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
711 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0, 0 );
712 1.1 bjh21 }
713 1.1 bjh21 if ( aExp == 0 ) {
714 1.1 bjh21 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
715 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
716 1.1 bjh21 --aExp;
717 1.1 bjh21 }
718 1.1 bjh21 shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
719 1.1 bjh21 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
720 1.1 bjh21
721 1.1 bjh21 }
722 1.1 bjh21
723 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
724 1.1 bjh21 /*
725 1.1 bjh21 -------------------------------------------------------------------------------
726 1.1 bjh21 Rounds the single-precision floating-point value `a' to an integer,
727 1.1 bjh21 and returns the result as a single-precision floating-point value. The
728 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary
729 1.1 bjh21 Floating-Point Arithmetic.
730 1.1 bjh21 -------------------------------------------------------------------------------
731 1.1 bjh21 */
732 1.1 bjh21 float32 float32_round_to_int( float32 a )
733 1.1 bjh21 {
734 1.1 bjh21 flag aSign;
735 1.1 bjh21 int16 aExp;
736 1.1 bjh21 bits32 lastBitMask, roundBitsMask;
737 1.1 bjh21 int8 roundingMode;
738 1.1 bjh21 float32 z;
739 1.1 bjh21
740 1.1 bjh21 aExp = extractFloat32Exp( a );
741 1.1 bjh21 if ( 0x96 <= aExp ) {
742 1.1 bjh21 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
743 1.1 bjh21 return propagateFloat32NaN( a, a );
744 1.1 bjh21 }
745 1.1 bjh21 return a;
746 1.1 bjh21 }
747 1.1 bjh21 if ( aExp <= 0x7E ) {
748 1.1 bjh21 if ( (bits32) ( a<<1 ) == 0 ) return a;
749 1.3 matt set_float_exception_inexact_flag();
750 1.1 bjh21 aSign = extractFloat32Sign( a );
751 1.1 bjh21 switch ( float_rounding_mode ) {
752 1.1 bjh21 case float_round_nearest_even:
753 1.1 bjh21 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
754 1.1 bjh21 return packFloat32( aSign, 0x7F, 0 );
755 1.1 bjh21 }
756 1.1 bjh21 break;
757 1.1 bjh21 case float_round_to_zero:
758 1.1 bjh21 break;
759 1.1 bjh21 case float_round_down:
760 1.1 bjh21 return aSign ? 0xBF800000 : 0;
761 1.1 bjh21 case float_round_up:
762 1.1 bjh21 return aSign ? 0x80000000 : 0x3F800000;
763 1.1 bjh21 }
764 1.1 bjh21 return packFloat32( aSign, 0, 0 );
765 1.1 bjh21 }
766 1.1 bjh21 lastBitMask = 1;
767 1.1 bjh21 lastBitMask <<= 0x96 - aExp;
768 1.1 bjh21 roundBitsMask = lastBitMask - 1;
769 1.1 bjh21 z = a;
770 1.1 bjh21 roundingMode = float_rounding_mode;
771 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
772 1.1 bjh21 z += lastBitMask>>1;
773 1.1 bjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
774 1.1 bjh21 }
775 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
776 1.1 bjh21 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
777 1.1 bjh21 z += roundBitsMask;
778 1.1 bjh21 }
779 1.1 bjh21 }
780 1.1 bjh21 z &= ~ roundBitsMask;
781 1.3 matt if ( z != a ) set_float_exception_inexact_flag();
782 1.1 bjh21 return z;
783 1.1 bjh21
784 1.1 bjh21 }
785 1.1 bjh21 #endif
786 1.1 bjh21
787 1.1 bjh21 /*
788 1.1 bjh21 -------------------------------------------------------------------------------
789 1.1 bjh21 Returns the result of adding the absolute values of the single-precision
790 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
791 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN.
792 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
793 1.1 bjh21 Floating-Point Arithmetic.
794 1.1 bjh21 -------------------------------------------------------------------------------
795 1.1 bjh21 */
796 1.1 bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
797 1.1 bjh21 {
798 1.1 bjh21 int16 aExp, bExp, zExp;
799 1.1 bjh21 bits32 aSig, bSig, zSig;
800 1.1 bjh21 int16 expDiff;
801 1.1 bjh21
802 1.1 bjh21 aSig = extractFloat32Frac( a );
803 1.1 bjh21 aExp = extractFloat32Exp( a );
804 1.1 bjh21 bSig = extractFloat32Frac( b );
805 1.1 bjh21 bExp = extractFloat32Exp( b );
806 1.1 bjh21 expDiff = aExp - bExp;
807 1.1 bjh21 aSig <<= 6;
808 1.1 bjh21 bSig <<= 6;
809 1.1 bjh21 if ( 0 < expDiff ) {
810 1.1 bjh21 if ( aExp == 0xFF ) {
811 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
812 1.1 bjh21 return a;
813 1.1 bjh21 }
814 1.1 bjh21 if ( bExp == 0 ) {
815 1.1 bjh21 --expDiff;
816 1.1 bjh21 }
817 1.1 bjh21 else {
818 1.1 bjh21 bSig |= 0x20000000;
819 1.1 bjh21 }
820 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig );
821 1.1 bjh21 zExp = aExp;
822 1.1 bjh21 }
823 1.1 bjh21 else if ( expDiff < 0 ) {
824 1.1 bjh21 if ( bExp == 0xFF ) {
825 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
826 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
827 1.1 bjh21 }
828 1.1 bjh21 if ( aExp == 0 ) {
829 1.1 bjh21 ++expDiff;
830 1.1 bjh21 }
831 1.1 bjh21 else {
832 1.1 bjh21 aSig |= 0x20000000;
833 1.1 bjh21 }
834 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig );
835 1.1 bjh21 zExp = bExp;
836 1.1 bjh21 }
837 1.1 bjh21 else {
838 1.1 bjh21 if ( aExp == 0xFF ) {
839 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
840 1.1 bjh21 return a;
841 1.1 bjh21 }
842 1.1 bjh21 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
843 1.1 bjh21 zSig = 0x40000000 + aSig + bSig;
844 1.1 bjh21 zExp = aExp;
845 1.1 bjh21 goto roundAndPack;
846 1.1 bjh21 }
847 1.1 bjh21 aSig |= 0x20000000;
848 1.1 bjh21 zSig = ( aSig + bSig )<<1;
849 1.1 bjh21 --zExp;
850 1.1 bjh21 if ( (sbits32) zSig < 0 ) {
851 1.1 bjh21 zSig = aSig + bSig;
852 1.1 bjh21 ++zExp;
853 1.1 bjh21 }
854 1.1 bjh21 roundAndPack:
855 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig );
856 1.1 bjh21
857 1.1 bjh21 }
858 1.1 bjh21
859 1.1 bjh21 /*
860 1.1 bjh21 -------------------------------------------------------------------------------
861 1.1 bjh21 Returns the result of subtracting the absolute values of the single-
862 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the
863 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the
864 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE
865 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
866 1.1 bjh21 -------------------------------------------------------------------------------
867 1.1 bjh21 */
868 1.1 bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
869 1.1 bjh21 {
870 1.1 bjh21 int16 aExp, bExp, zExp;
871 1.1 bjh21 bits32 aSig, bSig, zSig;
872 1.1 bjh21 int16 expDiff;
873 1.1 bjh21
874 1.1 bjh21 aSig = extractFloat32Frac( a );
875 1.1 bjh21 aExp = extractFloat32Exp( a );
876 1.1 bjh21 bSig = extractFloat32Frac( b );
877 1.1 bjh21 bExp = extractFloat32Exp( b );
878 1.1 bjh21 expDiff = aExp - bExp;
879 1.1 bjh21 aSig <<= 7;
880 1.1 bjh21 bSig <<= 7;
881 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger;
882 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger;
883 1.1 bjh21 if ( aExp == 0xFF ) {
884 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
885 1.1 bjh21 float_raise( float_flag_invalid );
886 1.1 bjh21 return float32_default_nan;
887 1.1 bjh21 }
888 1.1 bjh21 if ( aExp == 0 ) {
889 1.1 bjh21 aExp = 1;
890 1.1 bjh21 bExp = 1;
891 1.1 bjh21 }
892 1.1 bjh21 if ( bSig < aSig ) goto aBigger;
893 1.1 bjh21 if ( aSig < bSig ) goto bBigger;
894 1.1 bjh21 return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
895 1.1 bjh21 bExpBigger:
896 1.1 bjh21 if ( bExp == 0xFF ) {
897 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
898 1.1 bjh21 return packFloat32( zSign ^ 1, 0xFF, 0 );
899 1.1 bjh21 }
900 1.1 bjh21 if ( aExp == 0 ) {
901 1.1 bjh21 ++expDiff;
902 1.1 bjh21 }
903 1.1 bjh21 else {
904 1.1 bjh21 aSig |= 0x40000000;
905 1.1 bjh21 }
906 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig );
907 1.1 bjh21 bSig |= 0x40000000;
908 1.1 bjh21 bBigger:
909 1.1 bjh21 zSig = bSig - aSig;
910 1.1 bjh21 zExp = bExp;
911 1.1 bjh21 zSign ^= 1;
912 1.1 bjh21 goto normalizeRoundAndPack;
913 1.1 bjh21 aExpBigger:
914 1.1 bjh21 if ( aExp == 0xFF ) {
915 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
916 1.1 bjh21 return a;
917 1.1 bjh21 }
918 1.1 bjh21 if ( bExp == 0 ) {
919 1.1 bjh21 --expDiff;
920 1.1 bjh21 }
921 1.1 bjh21 else {
922 1.1 bjh21 bSig |= 0x40000000;
923 1.1 bjh21 }
924 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig );
925 1.1 bjh21 aSig |= 0x40000000;
926 1.1 bjh21 aBigger:
927 1.1 bjh21 zSig = aSig - bSig;
928 1.1 bjh21 zExp = aExp;
929 1.1 bjh21 normalizeRoundAndPack:
930 1.1 bjh21 --zExp;
931 1.1 bjh21 return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
932 1.1 bjh21
933 1.1 bjh21 }
934 1.1 bjh21
935 1.1 bjh21 /*
936 1.1 bjh21 -------------------------------------------------------------------------------
937 1.1 bjh21 Returns the result of adding the single-precision floating-point values `a'
938 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for
939 1.1 bjh21 Binary Floating-Point Arithmetic.
940 1.1 bjh21 -------------------------------------------------------------------------------
941 1.1 bjh21 */
942 1.1 bjh21 float32 float32_add( float32 a, float32 b )
943 1.1 bjh21 {
944 1.1 bjh21 flag aSign, bSign;
945 1.1 bjh21
946 1.1 bjh21 aSign = extractFloat32Sign( a );
947 1.1 bjh21 bSign = extractFloat32Sign( b );
948 1.1 bjh21 if ( aSign == bSign ) {
949 1.1 bjh21 return addFloat32Sigs( a, b, aSign );
950 1.1 bjh21 }
951 1.1 bjh21 else {
952 1.1 bjh21 return subFloat32Sigs( a, b, aSign );
953 1.1 bjh21 }
954 1.1 bjh21
955 1.1 bjh21 }
956 1.1 bjh21
957 1.1 bjh21 /*
958 1.1 bjh21 -------------------------------------------------------------------------------
959 1.1 bjh21 Returns the result of subtracting the single-precision floating-point values
960 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
961 1.1 bjh21 for Binary Floating-Point Arithmetic.
962 1.1 bjh21 -------------------------------------------------------------------------------
963 1.1 bjh21 */
964 1.1 bjh21 float32 float32_sub( float32 a, float32 b )
965 1.1 bjh21 {
966 1.1 bjh21 flag aSign, bSign;
967 1.1 bjh21
968 1.1 bjh21 aSign = extractFloat32Sign( a );
969 1.1 bjh21 bSign = extractFloat32Sign( b );
970 1.1 bjh21 if ( aSign == bSign ) {
971 1.1 bjh21 return subFloat32Sigs( a, b, aSign );
972 1.1 bjh21 }
973 1.1 bjh21 else {
974 1.1 bjh21 return addFloat32Sigs( a, b, aSign );
975 1.1 bjh21 }
976 1.1 bjh21
977 1.1 bjh21 }
978 1.1 bjh21
979 1.1 bjh21 /*
980 1.1 bjh21 -------------------------------------------------------------------------------
981 1.1 bjh21 Returns the result of multiplying the single-precision floating-point values
982 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
983 1.1 bjh21 for Binary Floating-Point Arithmetic.
984 1.1 bjh21 -------------------------------------------------------------------------------
985 1.1 bjh21 */
986 1.1 bjh21 float32 float32_mul( float32 a, float32 b )
987 1.1 bjh21 {
988 1.1 bjh21 flag aSign, bSign, zSign;
989 1.1 bjh21 int16 aExp, bExp, zExp;
990 1.1 bjh21 bits32 aSig, bSig, zSig0, zSig1;
991 1.1 bjh21
992 1.1 bjh21 aSig = extractFloat32Frac( a );
993 1.1 bjh21 aExp = extractFloat32Exp( a );
994 1.1 bjh21 aSign = extractFloat32Sign( a );
995 1.1 bjh21 bSig = extractFloat32Frac( b );
996 1.1 bjh21 bExp = extractFloat32Exp( b );
997 1.1 bjh21 bSign = extractFloat32Sign( b );
998 1.1 bjh21 zSign = aSign ^ bSign;
999 1.1 bjh21 if ( aExp == 0xFF ) {
1000 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1001 1.1 bjh21 return propagateFloat32NaN( a, b );
1002 1.1 bjh21 }
1003 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) {
1004 1.1 bjh21 float_raise( float_flag_invalid );
1005 1.1 bjh21 return float32_default_nan;
1006 1.1 bjh21 }
1007 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1008 1.1 bjh21 }
1009 1.1 bjh21 if ( bExp == 0xFF ) {
1010 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1011 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) {
1012 1.1 bjh21 float_raise( float_flag_invalid );
1013 1.1 bjh21 return float32_default_nan;
1014 1.1 bjh21 }
1015 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1016 1.1 bjh21 }
1017 1.1 bjh21 if ( aExp == 0 ) {
1018 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1019 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1020 1.1 bjh21 }
1021 1.1 bjh21 if ( bExp == 0 ) {
1022 1.1 bjh21 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1023 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1024 1.1 bjh21 }
1025 1.1 bjh21 zExp = aExp + bExp - 0x7F;
1026 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7;
1027 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1028 1.1 bjh21 mul32To64( aSig, bSig, &zSig0, &zSig1 );
1029 1.1 bjh21 zSig0 |= ( zSig1 != 0 );
1030 1.1 bjh21 if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1031 1.1 bjh21 zSig0 <<= 1;
1032 1.1 bjh21 --zExp;
1033 1.1 bjh21 }
1034 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig0 );
1035 1.1 bjh21
1036 1.1 bjh21 }
1037 1.1 bjh21
1038 1.1 bjh21 /*
1039 1.1 bjh21 -------------------------------------------------------------------------------
1040 1.1 bjh21 Returns the result of dividing the single-precision floating-point value `a'
1041 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the
1042 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1043 1.1 bjh21 -------------------------------------------------------------------------------
1044 1.1 bjh21 */
1045 1.1 bjh21 float32 float32_div( float32 a, float32 b )
1046 1.1 bjh21 {
1047 1.1 bjh21 flag aSign, bSign, zSign;
1048 1.1 bjh21 int16 aExp, bExp, zExp;
1049 1.1 bjh21 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1050 1.1 bjh21
1051 1.1 bjh21 aSig = extractFloat32Frac( a );
1052 1.1 bjh21 aExp = extractFloat32Exp( a );
1053 1.1 bjh21 aSign = extractFloat32Sign( a );
1054 1.1 bjh21 bSig = extractFloat32Frac( b );
1055 1.1 bjh21 bExp = extractFloat32Exp( b );
1056 1.1 bjh21 bSign = extractFloat32Sign( b );
1057 1.1 bjh21 zSign = aSign ^ bSign;
1058 1.1 bjh21 if ( aExp == 0xFF ) {
1059 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b );
1060 1.1 bjh21 if ( bExp == 0xFF ) {
1061 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1062 1.1 bjh21 float_raise( float_flag_invalid );
1063 1.1 bjh21 return float32_default_nan;
1064 1.1 bjh21 }
1065 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1066 1.1 bjh21 }
1067 1.1 bjh21 if ( bExp == 0xFF ) {
1068 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1069 1.1 bjh21 return packFloat32( zSign, 0, 0 );
1070 1.1 bjh21 }
1071 1.1 bjh21 if ( bExp == 0 ) {
1072 1.1 bjh21 if ( bSig == 0 ) {
1073 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) {
1074 1.1 bjh21 float_raise( float_flag_invalid );
1075 1.1 bjh21 return float32_default_nan;
1076 1.1 bjh21 }
1077 1.1 bjh21 float_raise( float_flag_divbyzero );
1078 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 );
1079 1.1 bjh21 }
1080 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1081 1.1 bjh21 }
1082 1.1 bjh21 if ( aExp == 0 ) {
1083 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1084 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1085 1.1 bjh21 }
1086 1.1 bjh21 zExp = aExp - bExp + 0x7D;
1087 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7;
1088 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1089 1.1 bjh21 if ( bSig <= ( aSig + aSig ) ) {
1090 1.1 bjh21 aSig >>= 1;
1091 1.1 bjh21 ++zExp;
1092 1.1 bjh21 }
1093 1.1 bjh21 zSig = estimateDiv64To32( aSig, 0, bSig );
1094 1.1 bjh21 if ( ( zSig & 0x3F ) <= 2 ) {
1095 1.1 bjh21 mul32To64( bSig, zSig, &term0, &term1 );
1096 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1097 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1098 1.1 bjh21 --zSig;
1099 1.1 bjh21 add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1100 1.1 bjh21 }
1101 1.1 bjh21 zSig |= ( rem1 != 0 );
1102 1.1 bjh21 }
1103 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig );
1104 1.1 bjh21
1105 1.1 bjh21 }
1106 1.1 bjh21
1107 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1108 1.1 bjh21 /*
1109 1.1 bjh21 -------------------------------------------------------------------------------
1110 1.1 bjh21 Returns the remainder of the single-precision floating-point value `a'
1111 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed
1112 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1113 1.1 bjh21 -------------------------------------------------------------------------------
1114 1.1 bjh21 */
1115 1.1 bjh21 float32 float32_rem( float32 a, float32 b )
1116 1.1 bjh21 {
1117 1.1 bjh21 flag aSign, bSign, zSign;
1118 1.1 bjh21 int16 aExp, bExp, expDiff;
1119 1.1 bjh21 bits32 aSig, bSig, q, allZero, alternateASig;
1120 1.1 bjh21 sbits32 sigMean;
1121 1.1 bjh21
1122 1.1 bjh21 aSig = extractFloat32Frac( a );
1123 1.1 bjh21 aExp = extractFloat32Exp( a );
1124 1.1 bjh21 aSign = extractFloat32Sign( a );
1125 1.1 bjh21 bSig = extractFloat32Frac( b );
1126 1.1 bjh21 bExp = extractFloat32Exp( b );
1127 1.1 bjh21 bSign = extractFloat32Sign( b );
1128 1.1 bjh21 if ( aExp == 0xFF ) {
1129 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1130 1.1 bjh21 return propagateFloat32NaN( a, b );
1131 1.1 bjh21 }
1132 1.1 bjh21 float_raise( float_flag_invalid );
1133 1.1 bjh21 return float32_default_nan;
1134 1.1 bjh21 }
1135 1.1 bjh21 if ( bExp == 0xFF ) {
1136 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b );
1137 1.1 bjh21 return a;
1138 1.1 bjh21 }
1139 1.1 bjh21 if ( bExp == 0 ) {
1140 1.1 bjh21 if ( bSig == 0 ) {
1141 1.1 bjh21 float_raise( float_flag_invalid );
1142 1.1 bjh21 return float32_default_nan;
1143 1.1 bjh21 }
1144 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1145 1.1 bjh21 }
1146 1.1 bjh21 if ( aExp == 0 ) {
1147 1.1 bjh21 if ( aSig == 0 ) return a;
1148 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1149 1.1 bjh21 }
1150 1.1 bjh21 expDiff = aExp - bExp;
1151 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
1152 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8;
1153 1.1 bjh21 if ( expDiff < 0 ) {
1154 1.1 bjh21 if ( expDiff < -1 ) return a;
1155 1.1 bjh21 aSig >>= 1;
1156 1.1 bjh21 }
1157 1.1 bjh21 q = ( bSig <= aSig );
1158 1.1 bjh21 if ( q ) aSig -= bSig;
1159 1.1 bjh21 expDiff -= 32;
1160 1.1 bjh21 while ( 0 < expDiff ) {
1161 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig );
1162 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0;
1163 1.1 bjh21 aSig = - ( ( bSig>>2 ) * q );
1164 1.1 bjh21 expDiff -= 30;
1165 1.1 bjh21 }
1166 1.1 bjh21 expDiff += 32;
1167 1.1 bjh21 if ( 0 < expDiff ) {
1168 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig );
1169 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0;
1170 1.1 bjh21 q >>= 32 - expDiff;
1171 1.1 bjh21 bSig >>= 2;
1172 1.1 bjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1173 1.1 bjh21 }
1174 1.1 bjh21 else {
1175 1.1 bjh21 aSig >>= 2;
1176 1.1 bjh21 bSig >>= 2;
1177 1.1 bjh21 }
1178 1.1 bjh21 do {
1179 1.1 bjh21 alternateASig = aSig;
1180 1.1 bjh21 ++q;
1181 1.1 bjh21 aSig -= bSig;
1182 1.1 bjh21 } while ( 0 <= (sbits32) aSig );
1183 1.1 bjh21 sigMean = aSig + alternateASig;
1184 1.1 bjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1185 1.1 bjh21 aSig = alternateASig;
1186 1.1 bjh21 }
1187 1.1 bjh21 zSign = ( (sbits32) aSig < 0 );
1188 1.1 bjh21 if ( zSign ) aSig = - aSig;
1189 1.1 bjh21 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1190 1.1 bjh21
1191 1.1 bjh21 }
1192 1.1 bjh21 #endif
1193 1.1 bjh21
1194 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1195 1.1 bjh21 /*
1196 1.1 bjh21 -------------------------------------------------------------------------------
1197 1.1 bjh21 Returns the square root of the single-precision floating-point value `a'.
1198 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
1199 1.1 bjh21 Floating-Point Arithmetic.
1200 1.1 bjh21 -------------------------------------------------------------------------------
1201 1.1 bjh21 */
1202 1.1 bjh21 float32 float32_sqrt( float32 a )
1203 1.1 bjh21 {
1204 1.1 bjh21 flag aSign;
1205 1.1 bjh21 int16 aExp, zExp;
1206 1.1 bjh21 bits32 aSig, zSig, rem0, rem1, term0, term1;
1207 1.1 bjh21
1208 1.1 bjh21 aSig = extractFloat32Frac( a );
1209 1.1 bjh21 aExp = extractFloat32Exp( a );
1210 1.1 bjh21 aSign = extractFloat32Sign( a );
1211 1.1 bjh21 if ( aExp == 0xFF ) {
1212 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, 0 );
1213 1.1 bjh21 if ( ! aSign ) return a;
1214 1.1 bjh21 float_raise( float_flag_invalid );
1215 1.1 bjh21 return float32_default_nan;
1216 1.1 bjh21 }
1217 1.1 bjh21 if ( aSign ) {
1218 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) return a;
1219 1.1 bjh21 float_raise( float_flag_invalid );
1220 1.1 bjh21 return float32_default_nan;
1221 1.1 bjh21 }
1222 1.1 bjh21 if ( aExp == 0 ) {
1223 1.1 bjh21 if ( aSig == 0 ) return 0;
1224 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1225 1.1 bjh21 }
1226 1.1 bjh21 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1227 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8;
1228 1.1 bjh21 zSig = estimateSqrt32( aExp, aSig ) + 2;
1229 1.1 bjh21 if ( ( zSig & 0x7F ) <= 5 ) {
1230 1.1 bjh21 if ( zSig < 2 ) {
1231 1.1 bjh21 zSig = 0x7FFFFFFF;
1232 1.1 bjh21 goto roundAndPack;
1233 1.1 bjh21 }
1234 1.1 bjh21 else {
1235 1.1 bjh21 aSig >>= aExp & 1;
1236 1.1 bjh21 mul32To64( zSig, zSig, &term0, &term1 );
1237 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1238 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1239 1.1 bjh21 --zSig;
1240 1.1 bjh21 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1241 1.1 bjh21 term1 |= 1;
1242 1.1 bjh21 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1243 1.1 bjh21 }
1244 1.1 bjh21 zSig |= ( ( rem0 | rem1 ) != 0 );
1245 1.1 bjh21 }
1246 1.1 bjh21 }
1247 1.1 bjh21 shift32RightJamming( zSig, 1, &zSig );
1248 1.1 bjh21 roundAndPack:
1249 1.1 bjh21 return roundAndPackFloat32( 0, zExp, zSig );
1250 1.1 bjh21
1251 1.1 bjh21 }
1252 1.1 bjh21 #endif
1253 1.1 bjh21
1254 1.1 bjh21 /*
1255 1.1 bjh21 -------------------------------------------------------------------------------
1256 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1257 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
1258 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259 1.1 bjh21 -------------------------------------------------------------------------------
1260 1.1 bjh21 */
1261 1.1 bjh21 flag float32_eq( float32 a, float32 b )
1262 1.1 bjh21 {
1263 1.1 bjh21
1264 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1265 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1266 1.1 bjh21 ) {
1267 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1268 1.1 bjh21 float_raise( float_flag_invalid );
1269 1.1 bjh21 }
1270 1.1 bjh21 return 0;
1271 1.1 bjh21 }
1272 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1273 1.1 bjh21
1274 1.1 bjh21 }
1275 1.1 bjh21
1276 1.1 bjh21 /*
1277 1.1 bjh21 -------------------------------------------------------------------------------
1278 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1279 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison
1280 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1281 1.1 bjh21 Arithmetic.
1282 1.1 bjh21 -------------------------------------------------------------------------------
1283 1.1 bjh21 */
1284 1.1 bjh21 flag float32_le( float32 a, float32 b )
1285 1.1 bjh21 {
1286 1.1 bjh21 flag aSign, bSign;
1287 1.1 bjh21
1288 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1289 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1290 1.1 bjh21 ) {
1291 1.1 bjh21 float_raise( float_flag_invalid );
1292 1.1 bjh21 return 0;
1293 1.1 bjh21 }
1294 1.1 bjh21 aSign = extractFloat32Sign( a );
1295 1.1 bjh21 bSign = extractFloat32Sign( b );
1296 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1297 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
1298 1.1 bjh21
1299 1.1 bjh21 }
1300 1.1 bjh21
1301 1.1 bjh21 /*
1302 1.1 bjh21 -------------------------------------------------------------------------------
1303 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1304 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
1305 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1306 1.1 bjh21 -------------------------------------------------------------------------------
1307 1.1 bjh21 */
1308 1.1 bjh21 flag float32_lt( float32 a, float32 b )
1309 1.1 bjh21 {
1310 1.1 bjh21 flag aSign, bSign;
1311 1.1 bjh21
1312 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1313 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1314 1.1 bjh21 ) {
1315 1.1 bjh21 float_raise( float_flag_invalid );
1316 1.1 bjh21 return 0;
1317 1.1 bjh21 }
1318 1.1 bjh21 aSign = extractFloat32Sign( a );
1319 1.1 bjh21 bSign = extractFloat32Sign( b );
1320 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1321 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
1322 1.1 bjh21
1323 1.1 bjh21 }
1324 1.1 bjh21
1325 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1326 1.1 bjh21 /*
1327 1.1 bjh21 -------------------------------------------------------------------------------
1328 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1329 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is
1330 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed
1331 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1332 1.1 bjh21 -------------------------------------------------------------------------------
1333 1.1 bjh21 */
1334 1.1 bjh21 flag float32_eq_signaling( float32 a, float32 b )
1335 1.1 bjh21 {
1336 1.1 bjh21
1337 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1338 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1339 1.1 bjh21 ) {
1340 1.1 bjh21 float_raise( float_flag_invalid );
1341 1.1 bjh21 return 0;
1342 1.1 bjh21 }
1343 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1344 1.1 bjh21
1345 1.1 bjh21 }
1346 1.1 bjh21
1347 1.1 bjh21 /*
1348 1.1 bjh21 -------------------------------------------------------------------------------
1349 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than or
1350 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1351 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the
1352 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1353 1.1 bjh21 -------------------------------------------------------------------------------
1354 1.1 bjh21 */
1355 1.1 bjh21 flag float32_le_quiet( float32 a, float32 b )
1356 1.1 bjh21 {
1357 1.1 bjh21 flag aSign, bSign;
1358 1.1 bjh21 int16 aExp, bExp;
1359 1.1 bjh21
1360 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1361 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1362 1.1 bjh21 ) {
1363 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1364 1.1 bjh21 float_raise( float_flag_invalid );
1365 1.1 bjh21 }
1366 1.1 bjh21 return 0;
1367 1.1 bjh21 }
1368 1.1 bjh21 aSign = extractFloat32Sign( a );
1369 1.1 bjh21 bSign = extractFloat32Sign( b );
1370 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1371 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
1372 1.1 bjh21
1373 1.1 bjh21 }
1374 1.1 bjh21
1375 1.1 bjh21 /*
1376 1.1 bjh21 -------------------------------------------------------------------------------
1377 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than
1378 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1379 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE
1380 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
1381 1.1 bjh21 -------------------------------------------------------------------------------
1382 1.1 bjh21 */
1383 1.1 bjh21 flag float32_lt_quiet( float32 a, float32 b )
1384 1.1 bjh21 {
1385 1.1 bjh21 flag aSign, bSign;
1386 1.1 bjh21
1387 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1388 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1389 1.1 bjh21 ) {
1390 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1391 1.1 bjh21 float_raise( float_flag_invalid );
1392 1.1 bjh21 }
1393 1.1 bjh21 return 0;
1394 1.1 bjh21 }
1395 1.1 bjh21 aSign = extractFloat32Sign( a );
1396 1.1 bjh21 bSign = extractFloat32Sign( b );
1397 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1398 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
1399 1.1 bjh21
1400 1.1 bjh21 }
1401 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1402 1.1 bjh21
1403 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1404 1.1 bjh21 /*
1405 1.1 bjh21 -------------------------------------------------------------------------------
1406 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1407 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
1408 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1409 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded
1410 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest
1411 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the
1412 1.1 bjh21 largest integer with the same sign as `a' is returned.
1413 1.1 bjh21 -------------------------------------------------------------------------------
1414 1.1 bjh21 */
1415 1.1 bjh21 int32 float64_to_int32( float64 a )
1416 1.1 bjh21 {
1417 1.1 bjh21 flag aSign;
1418 1.1 bjh21 int16 aExp, shiftCount;
1419 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra;
1420 1.1 bjh21 int32 z;
1421 1.1 bjh21 int8 roundingMode;
1422 1.1 bjh21
1423 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1424 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1425 1.1 bjh21 aExp = extractFloat64Exp( a );
1426 1.1 bjh21 aSign = extractFloat64Sign( a );
1427 1.1 bjh21 shiftCount = aExp - 0x413;
1428 1.1 bjh21 if ( 0 <= shiftCount ) {
1429 1.1 bjh21 if ( 0x41E < aExp ) {
1430 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1431 1.1 bjh21 goto invalid;
1432 1.1 bjh21 }
1433 1.1 bjh21 shortShift64Left(
1434 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1435 1.1 bjh21 if ( 0x80000000 < absZ ) goto invalid;
1436 1.1 bjh21 }
1437 1.1 bjh21 else {
1438 1.1 bjh21 aSig1 = ( aSig1 != 0 );
1439 1.1 bjh21 if ( aExp < 0x3FE ) {
1440 1.1 bjh21 aSigExtra = aExp | aSig0 | aSig1;
1441 1.1 bjh21 absZ = 0;
1442 1.1 bjh21 }
1443 1.1 bjh21 else {
1444 1.1 bjh21 aSig0 |= 0x00100000;
1445 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1446 1.1 bjh21 absZ = aSig0>>( - shiftCount );
1447 1.1 bjh21 }
1448 1.1 bjh21 }
1449 1.1 bjh21 roundingMode = float_rounding_mode;
1450 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1451 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) {
1452 1.1 bjh21 ++absZ;
1453 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1454 1.1 bjh21 }
1455 1.1 bjh21 z = aSign ? - absZ : absZ;
1456 1.1 bjh21 }
1457 1.1 bjh21 else {
1458 1.1 bjh21 aSigExtra = ( aSigExtra != 0 );
1459 1.1 bjh21 if ( aSign ) {
1460 1.1 bjh21 z = - ( absZ
1461 1.1 bjh21 + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1462 1.1 bjh21 }
1463 1.1 bjh21 else {
1464 1.1 bjh21 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1465 1.1 bjh21 }
1466 1.1 bjh21 }
1467 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) {
1468 1.1 bjh21 invalid:
1469 1.1 bjh21 float_raise( float_flag_invalid );
1470 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1471 1.1 bjh21 }
1472 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag();
1473 1.1 bjh21 return z;
1474 1.1 bjh21
1475 1.1 bjh21 }
1476 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1477 1.1 bjh21
1478 1.1 bjh21 /*
1479 1.1 bjh21 -------------------------------------------------------------------------------
1480 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1481 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is
1482 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1483 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero.
1484 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1485 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is
1486 1.1 bjh21 returned.
1487 1.1 bjh21 -------------------------------------------------------------------------------
1488 1.1 bjh21 */
1489 1.1 bjh21 int32 float64_to_int32_round_to_zero( float64 a )
1490 1.1 bjh21 {
1491 1.1 bjh21 flag aSign;
1492 1.1 bjh21 int16 aExp, shiftCount;
1493 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra;
1494 1.1 bjh21 int32 z;
1495 1.1 bjh21
1496 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1497 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1498 1.1 bjh21 aExp = extractFloat64Exp( a );
1499 1.1 bjh21 aSign = extractFloat64Sign( a );
1500 1.1 bjh21 shiftCount = aExp - 0x413;
1501 1.1 bjh21 if ( 0 <= shiftCount ) {
1502 1.1 bjh21 if ( 0x41E < aExp ) {
1503 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1504 1.1 bjh21 goto invalid;
1505 1.1 bjh21 }
1506 1.1 bjh21 shortShift64Left(
1507 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1508 1.1 bjh21 }
1509 1.1 bjh21 else {
1510 1.1 bjh21 if ( aExp < 0x3FF ) {
1511 1.1 bjh21 if ( aExp | aSig0 | aSig1 ) {
1512 1.3 matt set_float_exception_inexact_flag();
1513 1.1 bjh21 }
1514 1.1 bjh21 return 0;
1515 1.1 bjh21 }
1516 1.1 bjh21 aSig0 |= 0x00100000;
1517 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1518 1.1 bjh21 absZ = aSig0>>( - shiftCount );
1519 1.1 bjh21 }
1520 1.1 bjh21 z = aSign ? - absZ : absZ;
1521 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) {
1522 1.1 bjh21 invalid:
1523 1.1 bjh21 float_raise( float_flag_invalid );
1524 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1525 1.1 bjh21 }
1526 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag();
1527 1.1 bjh21 return z;
1528 1.1 bjh21
1529 1.1 bjh21 }
1530 1.1 bjh21
1531 1.1 bjh21 /*
1532 1.1 bjh21 -------------------------------------------------------------------------------
1533 1.1 bjh21 Returns the result of converting the double-precision floating-point value
1534 1.1 bjh21 `a' to the single-precision floating-point format. The conversion is
1535 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1536 1.1 bjh21 Arithmetic.
1537 1.1 bjh21 -------------------------------------------------------------------------------
1538 1.1 bjh21 */
1539 1.1 bjh21 float32 float64_to_float32( float64 a )
1540 1.1 bjh21 {
1541 1.1 bjh21 flag aSign;
1542 1.1 bjh21 int16 aExp;
1543 1.1 bjh21 bits32 aSig0, aSig1, zSig;
1544 1.1 bjh21 bits32 allZero;
1545 1.1 bjh21
1546 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1547 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1548 1.1 bjh21 aExp = extractFloat64Exp( a );
1549 1.1 bjh21 aSign = extractFloat64Sign( a );
1550 1.1 bjh21 if ( aExp == 0x7FF ) {
1551 1.1 bjh21 if ( aSig0 | aSig1 ) {
1552 1.1 bjh21 return commonNaNToFloat32( float64ToCommonNaN( a ) );
1553 1.1 bjh21 }
1554 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 );
1555 1.1 bjh21 }
1556 1.1 bjh21 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1557 1.1 bjh21 if ( aExp ) zSig |= 0x40000000;
1558 1.1 bjh21 return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1559 1.1 bjh21
1560 1.1 bjh21 }
1561 1.1 bjh21
1562 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
1563 1.1 bjh21 /*
1564 1.1 bjh21 -------------------------------------------------------------------------------
1565 1.1 bjh21 Rounds the double-precision floating-point value `a' to an integer,
1566 1.1 bjh21 and returns the result as a double-precision floating-point value. The
1567 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary
1568 1.1 bjh21 Floating-Point Arithmetic.
1569 1.1 bjh21 -------------------------------------------------------------------------------
1570 1.1 bjh21 */
1571 1.1 bjh21 float64 float64_round_to_int( float64 a )
1572 1.1 bjh21 {
1573 1.1 bjh21 flag aSign;
1574 1.1 bjh21 int16 aExp;
1575 1.1 bjh21 bits32 lastBitMask, roundBitsMask;
1576 1.1 bjh21 int8 roundingMode;
1577 1.1 bjh21 float64 z;
1578 1.1 bjh21
1579 1.1 bjh21 aExp = extractFloat64Exp( a );
1580 1.1 bjh21 if ( 0x413 <= aExp ) {
1581 1.1 bjh21 if ( 0x433 <= aExp ) {
1582 1.1 bjh21 if ( ( aExp == 0x7FF )
1583 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1584 1.1 bjh21 return propagateFloat64NaN( a, a );
1585 1.1 bjh21 }
1586 1.1 bjh21 return a;
1587 1.1 bjh21 }
1588 1.1 bjh21 lastBitMask = 1;
1589 1.1 bjh21 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1590 1.1 bjh21 roundBitsMask = lastBitMask - 1;
1591 1.1 bjh21 z = a;
1592 1.1 bjh21 roundingMode = float_rounding_mode;
1593 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1594 1.1 bjh21 if ( lastBitMask ) {
1595 1.1 bjh21 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1596 1.1 bjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1597 1.1 bjh21 }
1598 1.1 bjh21 else {
1599 1.1 bjh21 if ( (sbits32) z.low < 0 ) {
1600 1.1 bjh21 ++z.high;
1601 1.1 bjh21 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1602 1.1 bjh21 }
1603 1.1 bjh21 }
1604 1.1 bjh21 }
1605 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
1606 1.1 bjh21 if ( extractFloat64Sign( z )
1607 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) {
1608 1.1 bjh21 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1609 1.1 bjh21 }
1610 1.1 bjh21 }
1611 1.1 bjh21 z.low &= ~ roundBitsMask;
1612 1.1 bjh21 }
1613 1.1 bjh21 else {
1614 1.1 bjh21 if ( aExp <= 0x3FE ) {
1615 1.1 bjh21 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1616 1.3 matt set_float_exception_inexact_flag();
1617 1.1 bjh21 aSign = extractFloat64Sign( a );
1618 1.1 bjh21 switch ( float_rounding_mode ) {
1619 1.1 bjh21 case float_round_nearest_even:
1620 1.1 bjh21 if ( ( aExp == 0x3FE )
1621 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1622 1.1 bjh21 ) {
1623 1.1 bjh21 return packFloat64( aSign, 0x3FF, 0, 0 );
1624 1.1 bjh21 }
1625 1.1 bjh21 break;
1626 1.1 bjh21 case float_round_down:
1627 1.1 bjh21 return
1628 1.1 bjh21 aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1629 1.1 bjh21 : packFloat64( 0, 0, 0, 0 );
1630 1.1 bjh21 case float_round_up:
1631 1.1 bjh21 return
1632 1.1 bjh21 aSign ? packFloat64( 1, 0, 0, 0 )
1633 1.1 bjh21 : packFloat64( 0, 0x3FF, 0, 0 );
1634 1.1 bjh21 }
1635 1.1 bjh21 return packFloat64( aSign, 0, 0, 0 );
1636 1.1 bjh21 }
1637 1.1 bjh21 lastBitMask = 1;
1638 1.1 bjh21 lastBitMask <<= 0x413 - aExp;
1639 1.1 bjh21 roundBitsMask = lastBitMask - 1;
1640 1.1 bjh21 z.low = 0;
1641 1.1 bjh21 z.high = a.high;
1642 1.1 bjh21 roundingMode = float_rounding_mode;
1643 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) {
1644 1.1 bjh21 z.high += lastBitMask>>1;
1645 1.1 bjh21 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1646 1.1 bjh21 z.high &= ~ lastBitMask;
1647 1.1 bjh21 }
1648 1.1 bjh21 }
1649 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) {
1650 1.1 bjh21 if ( extractFloat64Sign( z )
1651 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) {
1652 1.1 bjh21 z.high |= ( a.low != 0 );
1653 1.1 bjh21 z.high += roundBitsMask;
1654 1.1 bjh21 }
1655 1.1 bjh21 }
1656 1.1 bjh21 z.high &= ~ roundBitsMask;
1657 1.1 bjh21 }
1658 1.1 bjh21 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1659 1.3 matt set_float_exception_inexact_flag();
1660 1.1 bjh21 }
1661 1.1 bjh21 return z;
1662 1.1 bjh21
1663 1.1 bjh21 }
1664 1.1 bjh21 #endif
1665 1.1 bjh21
1666 1.1 bjh21 /*
1667 1.1 bjh21 -------------------------------------------------------------------------------
1668 1.1 bjh21 Returns the result of adding the absolute values of the double-precision
1669 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1670 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN.
1671 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
1672 1.1 bjh21 Floating-Point Arithmetic.
1673 1.1 bjh21 -------------------------------------------------------------------------------
1674 1.1 bjh21 */
1675 1.1 bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1676 1.1 bjh21 {
1677 1.1 bjh21 int16 aExp, bExp, zExp;
1678 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1679 1.1 bjh21 int16 expDiff;
1680 1.1 bjh21
1681 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1682 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1683 1.1 bjh21 aExp = extractFloat64Exp( a );
1684 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1685 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1686 1.1 bjh21 bExp = extractFloat64Exp( b );
1687 1.1 bjh21 expDiff = aExp - bExp;
1688 1.1 bjh21 if ( 0 < expDiff ) {
1689 1.1 bjh21 if ( aExp == 0x7FF ) {
1690 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1691 1.1 bjh21 return a;
1692 1.1 bjh21 }
1693 1.1 bjh21 if ( bExp == 0 ) {
1694 1.1 bjh21 --expDiff;
1695 1.1 bjh21 }
1696 1.1 bjh21 else {
1697 1.1 bjh21 bSig0 |= 0x00100000;
1698 1.1 bjh21 }
1699 1.1 bjh21 shift64ExtraRightJamming(
1700 1.1 bjh21 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1701 1.1 bjh21 zExp = aExp;
1702 1.1 bjh21 }
1703 1.1 bjh21 else if ( expDiff < 0 ) {
1704 1.1 bjh21 if ( bExp == 0x7FF ) {
1705 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1706 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1707 1.1 bjh21 }
1708 1.1 bjh21 if ( aExp == 0 ) {
1709 1.1 bjh21 ++expDiff;
1710 1.1 bjh21 }
1711 1.1 bjh21 else {
1712 1.1 bjh21 aSig0 |= 0x00100000;
1713 1.1 bjh21 }
1714 1.1 bjh21 shift64ExtraRightJamming(
1715 1.1 bjh21 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1716 1.1 bjh21 zExp = bExp;
1717 1.1 bjh21 }
1718 1.1 bjh21 else {
1719 1.1 bjh21 if ( aExp == 0x7FF ) {
1720 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1721 1.1 bjh21 return propagateFloat64NaN( a, b );
1722 1.1 bjh21 }
1723 1.1 bjh21 return a;
1724 1.1 bjh21 }
1725 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726 1.1 bjh21 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1727 1.1 bjh21 zSig2 = 0;
1728 1.1 bjh21 zSig0 |= 0x00200000;
1729 1.1 bjh21 zExp = aExp;
1730 1.1 bjh21 goto shiftRight1;
1731 1.1 bjh21 }
1732 1.1 bjh21 aSig0 |= 0x00100000;
1733 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1734 1.1 bjh21 --zExp;
1735 1.1 bjh21 if ( zSig0 < 0x00200000 ) goto roundAndPack;
1736 1.1 bjh21 ++zExp;
1737 1.1 bjh21 shiftRight1:
1738 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1739 1.1 bjh21 roundAndPack:
1740 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1741 1.1 bjh21
1742 1.1 bjh21 }
1743 1.1 bjh21
1744 1.1 bjh21 /*
1745 1.1 bjh21 -------------------------------------------------------------------------------
1746 1.1 bjh21 Returns the result of subtracting the absolute values of the double-
1747 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the
1748 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the
1749 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE
1750 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
1751 1.1 bjh21 -------------------------------------------------------------------------------
1752 1.1 bjh21 */
1753 1.1 bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1754 1.1 bjh21 {
1755 1.1 bjh21 int16 aExp, bExp, zExp;
1756 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1757 1.1 bjh21 int16 expDiff;
1758 1.1 bjh21
1759 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1760 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1761 1.1 bjh21 aExp = extractFloat64Exp( a );
1762 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1763 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1764 1.1 bjh21 bExp = extractFloat64Exp( b );
1765 1.1 bjh21 expDiff = aExp - bExp;
1766 1.1 bjh21 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1767 1.1 bjh21 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1768 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger;
1769 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger;
1770 1.1 bjh21 if ( aExp == 0x7FF ) {
1771 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1772 1.1 bjh21 return propagateFloat64NaN( a, b );
1773 1.1 bjh21 }
1774 1.1 bjh21 float_raise( float_flag_invalid );
1775 1.1 bjh21 return float64_default_nan;
1776 1.1 bjh21 }
1777 1.1 bjh21 if ( aExp == 0 ) {
1778 1.1 bjh21 aExp = 1;
1779 1.1 bjh21 bExp = 1;
1780 1.1 bjh21 }
1781 1.1 bjh21 if ( bSig0 < aSig0 ) goto aBigger;
1782 1.1 bjh21 if ( aSig0 < bSig0 ) goto bBigger;
1783 1.1 bjh21 if ( bSig1 < aSig1 ) goto aBigger;
1784 1.1 bjh21 if ( aSig1 < bSig1 ) goto bBigger;
1785 1.1 bjh21 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1786 1.1 bjh21 bExpBigger:
1787 1.1 bjh21 if ( bExp == 0x7FF ) {
1788 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1789 1.1 bjh21 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1790 1.1 bjh21 }
1791 1.1 bjh21 if ( aExp == 0 ) {
1792 1.1 bjh21 ++expDiff;
1793 1.1 bjh21 }
1794 1.1 bjh21 else {
1795 1.1 bjh21 aSig0 |= 0x40000000;
1796 1.1 bjh21 }
1797 1.1 bjh21 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1798 1.1 bjh21 bSig0 |= 0x40000000;
1799 1.1 bjh21 bBigger:
1800 1.1 bjh21 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1801 1.1 bjh21 zExp = bExp;
1802 1.1 bjh21 zSign ^= 1;
1803 1.1 bjh21 goto normalizeRoundAndPack;
1804 1.1 bjh21 aExpBigger:
1805 1.1 bjh21 if ( aExp == 0x7FF ) {
1806 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1807 1.1 bjh21 return a;
1808 1.1 bjh21 }
1809 1.1 bjh21 if ( bExp == 0 ) {
1810 1.1 bjh21 --expDiff;
1811 1.1 bjh21 }
1812 1.1 bjh21 else {
1813 1.1 bjh21 bSig0 |= 0x40000000;
1814 1.1 bjh21 }
1815 1.1 bjh21 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1816 1.1 bjh21 aSig0 |= 0x40000000;
1817 1.1 bjh21 aBigger:
1818 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1819 1.1 bjh21 zExp = aExp;
1820 1.1 bjh21 normalizeRoundAndPack:
1821 1.1 bjh21 --zExp;
1822 1.1 bjh21 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1823 1.1 bjh21
1824 1.1 bjh21 }
1825 1.1 bjh21
1826 1.1 bjh21 /*
1827 1.1 bjh21 -------------------------------------------------------------------------------
1828 1.1 bjh21 Returns the result of adding the double-precision floating-point values `a'
1829 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for
1830 1.1 bjh21 Binary Floating-Point Arithmetic.
1831 1.1 bjh21 -------------------------------------------------------------------------------
1832 1.1 bjh21 */
1833 1.1 bjh21 float64 float64_add( float64 a, float64 b )
1834 1.1 bjh21 {
1835 1.1 bjh21 flag aSign, bSign;
1836 1.1 bjh21
1837 1.1 bjh21 aSign = extractFloat64Sign( a );
1838 1.1 bjh21 bSign = extractFloat64Sign( b );
1839 1.1 bjh21 if ( aSign == bSign ) {
1840 1.1 bjh21 return addFloat64Sigs( a, b, aSign );
1841 1.1 bjh21 }
1842 1.1 bjh21 else {
1843 1.1 bjh21 return subFloat64Sigs( a, b, aSign );
1844 1.1 bjh21 }
1845 1.1 bjh21
1846 1.1 bjh21 }
1847 1.1 bjh21
1848 1.1 bjh21 /*
1849 1.1 bjh21 -------------------------------------------------------------------------------
1850 1.1 bjh21 Returns the result of subtracting the double-precision floating-point values
1851 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1852 1.1 bjh21 for Binary Floating-Point Arithmetic.
1853 1.1 bjh21 -------------------------------------------------------------------------------
1854 1.1 bjh21 */
1855 1.1 bjh21 float64 float64_sub( float64 a, float64 b )
1856 1.1 bjh21 {
1857 1.1 bjh21 flag aSign, bSign;
1858 1.1 bjh21
1859 1.1 bjh21 aSign = extractFloat64Sign( a );
1860 1.1 bjh21 bSign = extractFloat64Sign( b );
1861 1.1 bjh21 if ( aSign == bSign ) {
1862 1.1 bjh21 return subFloat64Sigs( a, b, aSign );
1863 1.1 bjh21 }
1864 1.1 bjh21 else {
1865 1.1 bjh21 return addFloat64Sigs( a, b, aSign );
1866 1.1 bjh21 }
1867 1.1 bjh21
1868 1.1 bjh21 }
1869 1.1 bjh21
1870 1.1 bjh21 /*
1871 1.1 bjh21 -------------------------------------------------------------------------------
1872 1.1 bjh21 Returns the result of multiplying the double-precision floating-point values
1873 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1874 1.1 bjh21 for Binary Floating-Point Arithmetic.
1875 1.1 bjh21 -------------------------------------------------------------------------------
1876 1.1 bjh21 */
1877 1.1 bjh21 float64 float64_mul( float64 a, float64 b )
1878 1.1 bjh21 {
1879 1.1 bjh21 flag aSign, bSign, zSign;
1880 1.1 bjh21 int16 aExp, bExp, zExp;
1881 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1882 1.1 bjh21
1883 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1884 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1885 1.1 bjh21 aExp = extractFloat64Exp( a );
1886 1.1 bjh21 aSign = extractFloat64Sign( a );
1887 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1888 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1889 1.1 bjh21 bExp = extractFloat64Exp( b );
1890 1.1 bjh21 bSign = extractFloat64Sign( b );
1891 1.1 bjh21 zSign = aSign ^ bSign;
1892 1.1 bjh21 if ( aExp == 0x7FF ) {
1893 1.1 bjh21 if ( ( aSig0 | aSig1 )
1894 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1895 1.1 bjh21 return propagateFloat64NaN( a, b );
1896 1.1 bjh21 }
1897 1.1 bjh21 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1898 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1899 1.1 bjh21 }
1900 1.1 bjh21 if ( bExp == 0x7FF ) {
1901 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1902 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1903 1.1 bjh21 invalid:
1904 1.1 bjh21 float_raise( float_flag_invalid );
1905 1.1 bjh21 return float64_default_nan;
1906 1.1 bjh21 }
1907 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1908 1.1 bjh21 }
1909 1.1 bjh21 if ( aExp == 0 ) {
1910 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1911 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1912 1.1 bjh21 }
1913 1.1 bjh21 if ( bExp == 0 ) {
1914 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1915 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1916 1.1 bjh21 }
1917 1.1 bjh21 zExp = aExp + bExp - 0x400;
1918 1.1 bjh21 aSig0 |= 0x00100000;
1919 1.1 bjh21 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1920 1.1 bjh21 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1921 1.1 bjh21 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1922 1.1 bjh21 zSig2 |= ( zSig3 != 0 );
1923 1.1 bjh21 if ( 0x00200000 <= zSig0 ) {
1924 1.1 bjh21 shift64ExtraRightJamming(
1925 1.1 bjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1926 1.1 bjh21 ++zExp;
1927 1.1 bjh21 }
1928 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1929 1.1 bjh21
1930 1.1 bjh21 }
1931 1.1 bjh21
1932 1.1 bjh21 /*
1933 1.1 bjh21 -------------------------------------------------------------------------------
1934 1.1 bjh21 Returns the result of dividing the double-precision floating-point value `a'
1935 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the
1936 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1937 1.1 bjh21 -------------------------------------------------------------------------------
1938 1.1 bjh21 */
1939 1.1 bjh21 float64 float64_div( float64 a, float64 b )
1940 1.1 bjh21 {
1941 1.1 bjh21 flag aSign, bSign, zSign;
1942 1.1 bjh21 int16 aExp, bExp, zExp;
1943 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1944 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1945 1.1 bjh21
1946 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
1947 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
1948 1.1 bjh21 aExp = extractFloat64Exp( a );
1949 1.1 bjh21 aSign = extractFloat64Sign( a );
1950 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
1951 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
1952 1.1 bjh21 bExp = extractFloat64Exp( b );
1953 1.1 bjh21 bSign = extractFloat64Sign( b );
1954 1.1 bjh21 zSign = aSign ^ bSign;
1955 1.1 bjh21 if ( aExp == 0x7FF ) {
1956 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1957 1.1 bjh21 if ( bExp == 0x7FF ) {
1958 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959 1.1 bjh21 goto invalid;
1960 1.1 bjh21 }
1961 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1962 1.1 bjh21 }
1963 1.1 bjh21 if ( bExp == 0x7FF ) {
1964 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1965 1.1 bjh21 return packFloat64( zSign, 0, 0, 0 );
1966 1.1 bjh21 }
1967 1.1 bjh21 if ( bExp == 0 ) {
1968 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) {
1969 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1970 1.1 bjh21 invalid:
1971 1.1 bjh21 float_raise( float_flag_invalid );
1972 1.1 bjh21 return float64_default_nan;
1973 1.1 bjh21 }
1974 1.1 bjh21 float_raise( float_flag_divbyzero );
1975 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 );
1976 1.1 bjh21 }
1977 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1978 1.1 bjh21 }
1979 1.1 bjh21 if ( aExp == 0 ) {
1980 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1981 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1982 1.1 bjh21 }
1983 1.1 bjh21 zExp = aExp - bExp + 0x3FD;
1984 1.1 bjh21 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1985 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1986 1.1 bjh21 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1987 1.1 bjh21 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1988 1.1 bjh21 ++zExp;
1989 1.1 bjh21 }
1990 1.1 bjh21 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1991 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1992 1.1 bjh21 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1993 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
1994 1.1 bjh21 --zSig0;
1995 1.1 bjh21 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1996 1.1 bjh21 }
1997 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1998 1.1 bjh21 if ( ( zSig1 & 0x3FF ) <= 4 ) {
1999 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
2000 1.1 bjh21 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
2001 1.1 bjh21 while ( (sbits32) rem1 < 0 ) {
2002 1.1 bjh21 --zSig1;
2003 1.1 bjh21 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
2004 1.1 bjh21 }
2005 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2006 1.1 bjh21 }
2007 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2008 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2009 1.1 bjh21
2010 1.1 bjh21 }
2011 1.1 bjh21
2012 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2013 1.1 bjh21 /*
2014 1.1 bjh21 -------------------------------------------------------------------------------
2015 1.1 bjh21 Returns the remainder of the double-precision floating-point value `a'
2016 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed
2017 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2018 1.1 bjh21 -------------------------------------------------------------------------------
2019 1.1 bjh21 */
2020 1.1 bjh21 float64 float64_rem( float64 a, float64 b )
2021 1.1 bjh21 {
2022 1.1 bjh21 flag aSign, bSign, zSign;
2023 1.1 bjh21 int16 aExp, bExp, expDiff;
2024 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2025 1.1 bjh21 bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2026 1.1 bjh21 sbits32 sigMean0;
2027 1.1 bjh21 float64 z;
2028 1.1 bjh21
2029 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
2030 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
2031 1.1 bjh21 aExp = extractFloat64Exp( a );
2032 1.1 bjh21 aSign = extractFloat64Sign( a );
2033 1.1 bjh21 bSig1 = extractFloat64Frac1( b );
2034 1.1 bjh21 bSig0 = extractFloat64Frac0( b );
2035 1.1 bjh21 bExp = extractFloat64Exp( b );
2036 1.1 bjh21 bSign = extractFloat64Sign( b );
2037 1.1 bjh21 if ( aExp == 0x7FF ) {
2038 1.1 bjh21 if ( ( aSig0 | aSig1 )
2039 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2040 1.1 bjh21 return propagateFloat64NaN( a, b );
2041 1.1 bjh21 }
2042 1.1 bjh21 goto invalid;
2043 1.1 bjh21 }
2044 1.1 bjh21 if ( bExp == 0x7FF ) {
2045 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2046 1.1 bjh21 return a;
2047 1.1 bjh21 }
2048 1.1 bjh21 if ( bExp == 0 ) {
2049 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) {
2050 1.1 bjh21 invalid:
2051 1.1 bjh21 float_raise( float_flag_invalid );
2052 1.1 bjh21 return float64_default_nan;
2053 1.1 bjh21 }
2054 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2055 1.1 bjh21 }
2056 1.1 bjh21 if ( aExp == 0 ) {
2057 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return a;
2058 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2059 1.1 bjh21 }
2060 1.1 bjh21 expDiff = aExp - bExp;
2061 1.1 bjh21 if ( expDiff < -1 ) return a;
2062 1.1 bjh21 shortShift64Left(
2063 1.1 bjh21 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2064 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2065 1.1 bjh21 q = le64( bSig0, bSig1, aSig0, aSig1 );
2066 1.1 bjh21 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2067 1.1 bjh21 expDiff -= 32;
2068 1.1 bjh21 while ( 0 < expDiff ) {
2069 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2070 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0;
2071 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2072 1.1 bjh21 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2073 1.1 bjh21 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2074 1.1 bjh21 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2075 1.1 bjh21 expDiff -= 29;
2076 1.1 bjh21 }
2077 1.1 bjh21 if ( -32 < expDiff ) {
2078 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2079 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0;
2080 1.1 bjh21 q >>= - expDiff;
2081 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2082 1.1 bjh21 expDiff += 24;
2083 1.1 bjh21 if ( expDiff < 0 ) {
2084 1.1 bjh21 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2085 1.1 bjh21 }
2086 1.1 bjh21 else {
2087 1.1 bjh21 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2088 1.1 bjh21 }
2089 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2090 1.1 bjh21 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2091 1.1 bjh21 }
2092 1.1 bjh21 else {
2093 1.1 bjh21 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2094 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2095 1.1 bjh21 }
2096 1.1 bjh21 do {
2097 1.1 bjh21 alternateASig0 = aSig0;
2098 1.1 bjh21 alternateASig1 = aSig1;
2099 1.1 bjh21 ++q;
2100 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2101 1.1 bjh21 } while ( 0 <= (sbits32) aSig0 );
2102 1.1 bjh21 add64(
2103 1.1 bjh21 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2104 1.1 bjh21 if ( ( sigMean0 < 0 )
2105 1.1 bjh21 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2106 1.1 bjh21 aSig0 = alternateASig0;
2107 1.1 bjh21 aSig1 = alternateASig1;
2108 1.1 bjh21 }
2109 1.1 bjh21 zSign = ( (sbits32) aSig0 < 0 );
2110 1.1 bjh21 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2111 1.1 bjh21 return
2112 1.1 bjh21 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2113 1.1 bjh21
2114 1.1 bjh21 }
2115 1.1 bjh21 #endif
2116 1.1 bjh21
2117 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2118 1.1 bjh21 /*
2119 1.1 bjh21 -------------------------------------------------------------------------------
2120 1.1 bjh21 Returns the square root of the double-precision floating-point value `a'.
2121 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
2122 1.1 bjh21 Floating-Point Arithmetic.
2123 1.1 bjh21 -------------------------------------------------------------------------------
2124 1.1 bjh21 */
2125 1.1 bjh21 float64 float64_sqrt( float64 a )
2126 1.1 bjh21 {
2127 1.1 bjh21 flag aSign;
2128 1.1 bjh21 int16 aExp, zExp;
2129 1.1 bjh21 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2130 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2131 1.1 bjh21 float64 z;
2132 1.1 bjh21
2133 1.1 bjh21 aSig1 = extractFloat64Frac1( a );
2134 1.1 bjh21 aSig0 = extractFloat64Frac0( a );
2135 1.1 bjh21 aExp = extractFloat64Exp( a );
2136 1.1 bjh21 aSign = extractFloat64Sign( a );
2137 1.1 bjh21 if ( aExp == 0x7FF ) {
2138 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2139 1.1 bjh21 if ( ! aSign ) return a;
2140 1.1 bjh21 goto invalid;
2141 1.1 bjh21 }
2142 1.1 bjh21 if ( aSign ) {
2143 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2144 1.1 bjh21 invalid:
2145 1.1 bjh21 float_raise( float_flag_invalid );
2146 1.1 bjh21 return float64_default_nan;
2147 1.1 bjh21 }
2148 1.1 bjh21 if ( aExp == 0 ) {
2149 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2150 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2151 1.1 bjh21 }
2152 1.1 bjh21 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2153 1.1 bjh21 aSig0 |= 0x00100000;
2154 1.1 bjh21 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2155 1.1 bjh21 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2156 1.1 bjh21 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2157 1.1 bjh21 doubleZSig0 = zSig0 + zSig0;
2158 1.1 bjh21 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2159 1.1 bjh21 mul32To64( zSig0, zSig0, &term0, &term1 );
2160 1.1 bjh21 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2161 1.1 bjh21 while ( (sbits32) rem0 < 0 ) {
2162 1.1 bjh21 --zSig0;
2163 1.1 bjh21 doubleZSig0 -= 2;
2164 1.1 bjh21 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2165 1.1 bjh21 }
2166 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2167 1.1 bjh21 if ( ( zSig1 & 0x1FF ) <= 5 ) {
2168 1.1 bjh21 if ( zSig1 == 0 ) zSig1 = 1;
2169 1.1 bjh21 mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2170 1.1 bjh21 sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2171 1.1 bjh21 mul32To64( zSig1, zSig1, &term2, &term3 );
2172 1.1 bjh21 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2173 1.1 bjh21 while ( (sbits32) rem1 < 0 ) {
2174 1.1 bjh21 --zSig1;
2175 1.1 bjh21 shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2176 1.1 bjh21 term3 |= 1;
2177 1.1 bjh21 term2 |= doubleZSig0;
2178 1.1 bjh21 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2179 1.1 bjh21 }
2180 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2181 1.1 bjh21 }
2182 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2183 1.1 bjh21 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2184 1.1 bjh21
2185 1.1 bjh21 }
2186 1.1 bjh21 #endif
2187 1.1 bjh21
2188 1.1 bjh21 /*
2189 1.1 bjh21 -------------------------------------------------------------------------------
2190 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2191 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
2192 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2193 1.1 bjh21 -------------------------------------------------------------------------------
2194 1.1 bjh21 */
2195 1.1 bjh21 flag float64_eq( float64 a, float64 b )
2196 1.1 bjh21 {
2197 1.1 bjh21
2198 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2199 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2200 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2201 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2202 1.1 bjh21 ) {
2203 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2204 1.1 bjh21 float_raise( float_flag_invalid );
2205 1.1 bjh21 }
2206 1.1 bjh21 return 0;
2207 1.1 bjh21 }
2208 1.1 bjh21 return ( a == b ) ||
2209 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2210 1.1 bjh21
2211 1.1 bjh21 }
2212 1.1 bjh21
2213 1.1 bjh21 /*
2214 1.1 bjh21 -------------------------------------------------------------------------------
2215 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2216 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison
2217 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2218 1.1 bjh21 Arithmetic.
2219 1.1 bjh21 -------------------------------------------------------------------------------
2220 1.1 bjh21 */
2221 1.1 bjh21 flag float64_le( float64 a, float64 b )
2222 1.1 bjh21 {
2223 1.1 bjh21 flag aSign, bSign;
2224 1.1 bjh21
2225 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2226 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2227 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2228 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2229 1.1 bjh21 ) {
2230 1.1 bjh21 float_raise( float_flag_invalid );
2231 1.1 bjh21 return 0;
2232 1.1 bjh21 }
2233 1.1 bjh21 aSign = extractFloat64Sign( a );
2234 1.1 bjh21 bSign = extractFloat64Sign( b );
2235 1.1 bjh21 if ( aSign != bSign )
2236 1.1 bjh21 return aSign ||
2237 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2238 1.1 bjh21 0 );
2239 1.1 bjh21 return ( a == b ) ||
2240 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2241 1.1 bjh21 }
2242 1.1 bjh21
2243 1.1 bjh21 /*
2244 1.1 bjh21 -------------------------------------------------------------------------------
2245 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2246 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed
2247 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2248 1.1 bjh21 -------------------------------------------------------------------------------
2249 1.1 bjh21 */
2250 1.1 bjh21 flag float64_lt( float64 a, float64 b )
2251 1.1 bjh21 {
2252 1.1 bjh21 flag aSign, bSign;
2253 1.1 bjh21
2254 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2255 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2256 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2257 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2258 1.1 bjh21 ) {
2259 1.1 bjh21 float_raise( float_flag_invalid );
2260 1.1 bjh21 return 0;
2261 1.1 bjh21 }
2262 1.1 bjh21 aSign = extractFloat64Sign( a );
2263 1.1 bjh21 bSign = extractFloat64Sign( b );
2264 1.1 bjh21 if ( aSign != bSign )
2265 1.1 bjh21 return aSign &&
2266 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2267 1.1 bjh21 0 );
2268 1.1 bjh21 return ( a != b ) &&
2269 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2270 1.1 bjh21
2271 1.1 bjh21 }
2272 1.1 bjh21
2273 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC
2274 1.1 bjh21 /*
2275 1.1 bjh21 -------------------------------------------------------------------------------
2276 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2277 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is
2278 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed
2279 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2280 1.1 bjh21 -------------------------------------------------------------------------------
2281 1.1 bjh21 */
2282 1.1 bjh21 flag float64_eq_signaling( float64 a, float64 b )
2283 1.1 bjh21 {
2284 1.1 bjh21
2285 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2286 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2287 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2288 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2289 1.1 bjh21 ) {
2290 1.1 bjh21 float_raise( float_flag_invalid );
2291 1.1 bjh21 return 0;
2292 1.1 bjh21 }
2293 1.1 bjh21 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2294 1.1 bjh21
2295 1.1 bjh21 }
2296 1.1 bjh21
2297 1.1 bjh21 /*
2298 1.1 bjh21 -------------------------------------------------------------------------------
2299 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than or
2300 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2301 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the
2302 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2303 1.1 bjh21 -------------------------------------------------------------------------------
2304 1.1 bjh21 */
2305 1.1 bjh21 flag float64_le_quiet( float64 a, float64 b )
2306 1.1 bjh21 {
2307 1.1 bjh21 flag aSign, bSign;
2308 1.1 bjh21
2309 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2310 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2311 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2312 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2313 1.1 bjh21 ) {
2314 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2315 1.1 bjh21 float_raise( float_flag_invalid );
2316 1.1 bjh21 }
2317 1.1 bjh21 return 0;
2318 1.1 bjh21 }
2319 1.1 bjh21 aSign = extractFloat64Sign( a );
2320 1.1 bjh21 bSign = extractFloat64Sign( b );
2321 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2322 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) );
2323 1.1 bjh21
2324 1.1 bjh21 }
2325 1.1 bjh21
2326 1.1 bjh21 /*
2327 1.1 bjh21 -------------------------------------------------------------------------------
2328 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than
2329 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2330 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE
2331 1.1 bjh21 Standard for Binary Floating-Point Arithmetic.
2332 1.1 bjh21 -------------------------------------------------------------------------------
2333 1.1 bjh21 */
2334 1.1 bjh21 flag float64_lt_quiet( float64 a, float64 b )
2335 1.1 bjh21 {
2336 1.1 bjh21 flag aSign, bSign;
2337 1.1 bjh21
2338 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF )
2339 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2340 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF )
2341 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2342 1.1 bjh21 ) {
2343 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2344 1.1 bjh21 float_raise( float_flag_invalid );
2345 1.1 bjh21 }
2346 1.1 bjh21 return 0;
2347 1.1 bjh21 }
2348 1.1 bjh21 aSign = extractFloat64Sign( a );
2349 1.1 bjh21 bSign = extractFloat64Sign( b );
2350 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2351 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) );
2352 1.1 bjh21
2353 1.1 bjh21 }
2354 1.1 bjh21
2355 1.1 bjh21 #endif
2356