1 1.3 matt /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */ 2 1.1 bjh21 3 1.1 bjh21 /* 4 1.1 bjh21 * This version hacked for use with gcc -msoft-float by bjh21. 5 1.1 bjh21 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6 1.1 bjh21 * itself). 7 1.1 bjh21 */ 8 1.1 bjh21 9 1.1 bjh21 /* 10 1.1 bjh21 * Things you may want to define: 11 1.1 bjh21 * 12 1.1 bjh21 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13 1.1 bjh21 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14 1.1 bjh21 * properly renamed. 15 1.1 bjh21 */ 16 1.1 bjh21 17 1.1 bjh21 /* 18 1.1 bjh21 * This differs from the standard bits32/softfloat.c in that float64 19 1.1 bjh21 * is defined to be a 64-bit integer rather than a structure. The 20 1.1 bjh21 * structure is float64s, with translation between the two going via 21 1.1 bjh21 * float64u. 22 1.1 bjh21 */ 23 1.1 bjh21 24 1.1 bjh21 /* 25 1.1 bjh21 =============================================================================== 26 1.1 bjh21 27 1.1 bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point 28 1.1 bjh21 Arithmetic Package, Release 2a. 29 1.1 bjh21 30 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the 31 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center 32 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the 33 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version 34 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector 35 1.1 bjh21 processor in collaboration with the University of California at Berkeley, 36 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 37 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 38 1.1 bjh21 arithmetic/SoftFloat.html'. 39 1.1 bjh21 40 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 41 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 42 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 43 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 44 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 45 1.1 bjh21 46 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as 47 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they 48 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of 49 1.1 bjh21 this code that are retained. 50 1.1 bjh21 51 1.1 bjh21 =============================================================================== 52 1.1 bjh21 */ 53 1.1 bjh21 54 1.1 bjh21 #include <sys/cdefs.h> 55 1.1 bjh21 #if defined(LIBC_SCCS) && !defined(lint) 56 1.3 matt __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $"); 57 1.1 bjh21 #endif /* LIBC_SCCS and not lint */ 58 1.1 bjh21 59 1.1 bjh21 #ifdef SOFTFLOAT_FOR_GCC 60 1.1 bjh21 #include "softfloat-for-gcc.h" 61 1.1 bjh21 #endif 62 1.1 bjh21 63 1.1 bjh21 #include "milieu.h" 64 1.1 bjh21 #include "softfloat.h" 65 1.1 bjh21 66 1.1 bjh21 /* 67 1.1 bjh21 * Conversions between floats as stored in memory and floats as 68 1.1 bjh21 * SoftFloat uses them 69 1.1 bjh21 */ 70 1.1 bjh21 #ifndef FLOAT64_DEMANGLE 71 1.1 bjh21 #define FLOAT64_DEMANGLE(a) (a) 72 1.1 bjh21 #endif 73 1.1 bjh21 #ifndef FLOAT64_MANGLE 74 1.1 bjh21 #define FLOAT64_MANGLE(a) (a) 75 1.1 bjh21 #endif 76 1.1 bjh21 77 1.1 bjh21 /* 78 1.1 bjh21 ------------------------------------------------------------------------------- 79 1.1 bjh21 Floating-point rounding mode and exception flags. 80 1.1 bjh21 ------------------------------------------------------------------------------- 81 1.1 bjh21 */ 82 1.3 matt #ifndef set_float_rounding_mode 83 1.1 bjh21 fp_rnd float_rounding_mode = float_round_nearest_even; 84 1.1 bjh21 fp_except float_exception_flags = 0; 85 1.3 matt #endif 86 1.3 matt #ifndef set_float_exception_inexact_flag 87 1.3 matt #define set_float_exception_inexact_flag() \ 88 1.3 matt ((void)(float_exception_flags |= float_flag_inexact)) 89 1.3 matt #endif 90 1.1 bjh21 91 1.1 bjh21 /* 92 1.1 bjh21 ------------------------------------------------------------------------------- 93 1.1 bjh21 Primitive arithmetic functions, including multi-word arithmetic, and 94 1.1 bjh21 division and square root approximations. (Can be specialized to target if 95 1.1 bjh21 desired.) 96 1.1 bjh21 ------------------------------------------------------------------------------- 97 1.1 bjh21 */ 98 1.1 bjh21 #include "softfloat-macros" 99 1.1 bjh21 100 1.1 bjh21 /* 101 1.1 bjh21 ------------------------------------------------------------------------------- 102 1.1 bjh21 Functions and definitions to determine: (1) whether tininess for underflow 103 1.1 bjh21 is detected before or after rounding by default, (2) what (if anything) 104 1.1 bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished 105 1.1 bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs 106 1.1 bjh21 are propagated from function inputs to output. These details are target- 107 1.1 bjh21 specific. 108 1.1 bjh21 ------------------------------------------------------------------------------- 109 1.1 bjh21 */ 110 1.1 bjh21 #include "softfloat-specialize" 111 1.1 bjh21 112 1.1 bjh21 /* 113 1.1 bjh21 ------------------------------------------------------------------------------- 114 1.1 bjh21 Returns the fraction bits of the single-precision floating-point value `a'. 115 1.1 bjh21 ------------------------------------------------------------------------------- 116 1.1 bjh21 */ 117 1.1 bjh21 INLINE bits32 extractFloat32Frac( float32 a ) 118 1.1 bjh21 { 119 1.1 bjh21 120 1.1 bjh21 return a & 0x007FFFFF; 121 1.1 bjh21 122 1.1 bjh21 } 123 1.1 bjh21 124 1.1 bjh21 /* 125 1.1 bjh21 ------------------------------------------------------------------------------- 126 1.1 bjh21 Returns the exponent bits of the single-precision floating-point value `a'. 127 1.1 bjh21 ------------------------------------------------------------------------------- 128 1.1 bjh21 */ 129 1.1 bjh21 INLINE int16 extractFloat32Exp( float32 a ) 130 1.1 bjh21 { 131 1.1 bjh21 132 1.1 bjh21 return ( a>>23 ) & 0xFF; 133 1.1 bjh21 134 1.1 bjh21 } 135 1.1 bjh21 136 1.1 bjh21 /* 137 1.1 bjh21 ------------------------------------------------------------------------------- 138 1.1 bjh21 Returns the sign bit of the single-precision floating-point value `a'. 139 1.1 bjh21 ------------------------------------------------------------------------------- 140 1.1 bjh21 */ 141 1.1 bjh21 INLINE flag extractFloat32Sign( float32 a ) 142 1.1 bjh21 { 143 1.1 bjh21 144 1.1 bjh21 return a>>31; 145 1.1 bjh21 146 1.1 bjh21 } 147 1.1 bjh21 148 1.1 bjh21 /* 149 1.1 bjh21 ------------------------------------------------------------------------------- 150 1.1 bjh21 Normalizes the subnormal single-precision floating-point value represented 151 1.1 bjh21 by the denormalized significand `aSig'. The normalized exponent and 152 1.1 bjh21 significand are stored at the locations pointed to by `zExpPtr' and 153 1.1 bjh21 `zSigPtr', respectively. 154 1.1 bjh21 ------------------------------------------------------------------------------- 155 1.1 bjh21 */ 156 1.1 bjh21 static void 157 1.1 bjh21 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 158 1.1 bjh21 { 159 1.1 bjh21 int8 shiftCount; 160 1.1 bjh21 161 1.1 bjh21 shiftCount = countLeadingZeros32( aSig ) - 8; 162 1.1 bjh21 *zSigPtr = aSig<<shiftCount; 163 1.1 bjh21 *zExpPtr = 1 - shiftCount; 164 1.1 bjh21 165 1.1 bjh21 } 166 1.1 bjh21 167 1.1 bjh21 /* 168 1.1 bjh21 ------------------------------------------------------------------------------- 169 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 170 1.1 bjh21 single-precision floating-point value, returning the result. After being 171 1.1 bjh21 shifted into the proper positions, the three fields are simply added 172 1.1 bjh21 together to form the result. This means that any integer portion of `zSig' 173 1.1 bjh21 will be added into the exponent. Since a properly normalized significand 174 1.1 bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less 175 1.1 bjh21 than the desired result exponent whenever `zSig' is a complete, normalized 176 1.1 bjh21 significand. 177 1.1 bjh21 ------------------------------------------------------------------------------- 178 1.1 bjh21 */ 179 1.1 bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 180 1.1 bjh21 { 181 1.1 bjh21 182 1.1 bjh21 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 183 1.1 bjh21 184 1.1 bjh21 } 185 1.1 bjh21 186 1.1 bjh21 /* 187 1.1 bjh21 ------------------------------------------------------------------------------- 188 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 189 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating- 190 1.1 bjh21 point value corresponding to the abstract input. Ordinarily, the abstract 191 1.1 bjh21 value is simply rounded and packed into the single-precision format, with 192 1.1 bjh21 the inexact exception raised if the abstract input cannot be represented 193 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and 194 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is 195 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to 196 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if 197 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal single- 198 1.1 bjh21 precision floating-point number. 199 1.1 bjh21 The input significand `zSig' has its binary point between bits 30 200 1.1 bjh21 and 29, which is 7 bits to the left of the usual location. This shifted 201 1.1 bjh21 significand must be normalized or smaller. If `zSig' is not normalized, 202 1.1 bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number, 203 1.1 bjh21 and it must not require rounding. In the usual case that `zSig' is 204 1.1 bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 205 1.1 bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for 206 1.1 bjh21 Binary Floating-Point Arithmetic. 207 1.1 bjh21 ------------------------------------------------------------------------------- 208 1.1 bjh21 */ 209 1.1 bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 210 1.1 bjh21 { 211 1.1 bjh21 int8 roundingMode; 212 1.1 bjh21 flag roundNearestEven; 213 1.1 bjh21 int8 roundIncrement, roundBits; 214 1.1 bjh21 flag isTiny; 215 1.1 bjh21 216 1.1 bjh21 roundingMode = float_rounding_mode; 217 1.1 bjh21 roundNearestEven = roundingMode == float_round_nearest_even; 218 1.1 bjh21 roundIncrement = 0x40; 219 1.1 bjh21 if ( ! roundNearestEven ) { 220 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 221 1.1 bjh21 roundIncrement = 0; 222 1.1 bjh21 } 223 1.1 bjh21 else { 224 1.1 bjh21 roundIncrement = 0x7F; 225 1.1 bjh21 if ( zSign ) { 226 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 227 1.1 bjh21 } 228 1.1 bjh21 else { 229 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 230 1.1 bjh21 } 231 1.1 bjh21 } 232 1.1 bjh21 } 233 1.1 bjh21 roundBits = zSig & 0x7F; 234 1.1 bjh21 if ( 0xFD <= (bits16) zExp ) { 235 1.1 bjh21 if ( ( 0xFD < zExp ) 236 1.1 bjh21 || ( ( zExp == 0xFD ) 237 1.1 bjh21 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 238 1.1 bjh21 ) { 239 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 240 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 241 1.1 bjh21 } 242 1.1 bjh21 if ( zExp < 0 ) { 243 1.1 bjh21 isTiny = 244 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 245 1.1 bjh21 || ( zExp < -1 ) 246 1.2 christos || ( zSig + roundIncrement < (uint32)0x80000000 ); 247 1.1 bjh21 shift32RightJamming( zSig, - zExp, &zSig ); 248 1.1 bjh21 zExp = 0; 249 1.1 bjh21 roundBits = zSig & 0x7F; 250 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 251 1.1 bjh21 } 252 1.1 bjh21 } 253 1.3 matt if ( roundBits ) set_float_exception_inexact_flag(); 254 1.1 bjh21 zSig = ( zSig + roundIncrement )>>7; 255 1.1 bjh21 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 256 1.1 bjh21 if ( zSig == 0 ) zExp = 0; 257 1.1 bjh21 return packFloat32( zSign, zExp, zSig ); 258 1.1 bjh21 259 1.1 bjh21 } 260 1.1 bjh21 261 1.1 bjh21 /* 262 1.1 bjh21 ------------------------------------------------------------------------------- 263 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 264 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating- 265 1.1 bjh21 point value corresponding to the abstract input. This routine is just like 266 1.1 bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized. 267 1.1 bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 268 1.1 bjh21 floating-point exponent. 269 1.1 bjh21 ------------------------------------------------------------------------------- 270 1.1 bjh21 */ 271 1.1 bjh21 static float32 272 1.1 bjh21 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 273 1.1 bjh21 { 274 1.1 bjh21 int8 shiftCount; 275 1.1 bjh21 276 1.1 bjh21 shiftCount = countLeadingZeros32( zSig ) - 1; 277 1.1 bjh21 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 278 1.1 bjh21 279 1.1 bjh21 } 280 1.1 bjh21 281 1.1 bjh21 /* 282 1.1 bjh21 ------------------------------------------------------------------------------- 283 1.1 bjh21 Returns the least-significant 32 fraction bits of the double-precision 284 1.1 bjh21 floating-point value `a'. 285 1.1 bjh21 ------------------------------------------------------------------------------- 286 1.1 bjh21 */ 287 1.1 bjh21 INLINE bits32 extractFloat64Frac1( float64 a ) 288 1.1 bjh21 { 289 1.1 bjh21 290 1.2 christos return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF)); 291 1.1 bjh21 292 1.1 bjh21 } 293 1.1 bjh21 294 1.1 bjh21 /* 295 1.1 bjh21 ------------------------------------------------------------------------------- 296 1.1 bjh21 Returns the most-significant 20 fraction bits of the double-precision 297 1.1 bjh21 floating-point value `a'. 298 1.1 bjh21 ------------------------------------------------------------------------------- 299 1.1 bjh21 */ 300 1.1 bjh21 INLINE bits32 extractFloat64Frac0( float64 a ) 301 1.1 bjh21 { 302 1.1 bjh21 303 1.2 christos return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF); 304 1.1 bjh21 305 1.1 bjh21 } 306 1.1 bjh21 307 1.1 bjh21 /* 308 1.1 bjh21 ------------------------------------------------------------------------------- 309 1.1 bjh21 Returns the exponent bits of the double-precision floating-point value `a'. 310 1.1 bjh21 ------------------------------------------------------------------------------- 311 1.1 bjh21 */ 312 1.1 bjh21 INLINE int16 extractFloat64Exp( float64 a ) 313 1.1 bjh21 { 314 1.1 bjh21 315 1.2 christos return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); 316 1.1 bjh21 317 1.1 bjh21 } 318 1.1 bjh21 319 1.1 bjh21 /* 320 1.1 bjh21 ------------------------------------------------------------------------------- 321 1.1 bjh21 Returns the sign bit of the double-precision floating-point value `a'. 322 1.1 bjh21 ------------------------------------------------------------------------------- 323 1.1 bjh21 */ 324 1.1 bjh21 INLINE flag extractFloat64Sign( float64 a ) 325 1.1 bjh21 { 326 1.1 bjh21 327 1.2 christos return (flag)(FLOAT64_DEMANGLE(a) >> 63); 328 1.1 bjh21 329 1.1 bjh21 } 330 1.1 bjh21 331 1.1 bjh21 /* 332 1.1 bjh21 ------------------------------------------------------------------------------- 333 1.1 bjh21 Normalizes the subnormal double-precision floating-point value represented 334 1.1 bjh21 by the denormalized significand formed by the concatenation of `aSig0' and 335 1.1 bjh21 `aSig1'. The normalized exponent is stored at the location pointed to by 336 1.1 bjh21 `zExpPtr'. The most significant 21 bits of the normalized significand are 337 1.1 bjh21 stored at the location pointed to by `zSig0Ptr', and the least significant 338 1.1 bjh21 32 bits of the normalized significand are stored at the location pointed to 339 1.1 bjh21 by `zSig1Ptr'. 340 1.1 bjh21 ------------------------------------------------------------------------------- 341 1.1 bjh21 */ 342 1.1 bjh21 static void 343 1.1 bjh21 normalizeFloat64Subnormal( 344 1.1 bjh21 bits32 aSig0, 345 1.1 bjh21 bits32 aSig1, 346 1.1 bjh21 int16 *zExpPtr, 347 1.1 bjh21 bits32 *zSig0Ptr, 348 1.1 bjh21 bits32 *zSig1Ptr 349 1.1 bjh21 ) 350 1.1 bjh21 { 351 1.1 bjh21 int8 shiftCount; 352 1.1 bjh21 353 1.1 bjh21 if ( aSig0 == 0 ) { 354 1.1 bjh21 shiftCount = countLeadingZeros32( aSig1 ) - 11; 355 1.1 bjh21 if ( shiftCount < 0 ) { 356 1.1 bjh21 *zSig0Ptr = aSig1>>( - shiftCount ); 357 1.1 bjh21 *zSig1Ptr = aSig1<<( shiftCount & 31 ); 358 1.1 bjh21 } 359 1.1 bjh21 else { 360 1.1 bjh21 *zSig0Ptr = aSig1<<shiftCount; 361 1.1 bjh21 *zSig1Ptr = 0; 362 1.1 bjh21 } 363 1.1 bjh21 *zExpPtr = - shiftCount - 31; 364 1.1 bjh21 } 365 1.1 bjh21 else { 366 1.1 bjh21 shiftCount = countLeadingZeros32( aSig0 ) - 11; 367 1.1 bjh21 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 368 1.1 bjh21 *zExpPtr = 1 - shiftCount; 369 1.1 bjh21 } 370 1.1 bjh21 371 1.1 bjh21 } 372 1.1 bjh21 373 1.1 bjh21 /* 374 1.1 bjh21 ------------------------------------------------------------------------------- 375 1.1 bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by 376 1.1 bjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating- 377 1.1 bjh21 point value, returning the result. After being shifted into the proper 378 1.1 bjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added 379 1.1 bjh21 together to form the most significant 32 bits of the result. This means 380 1.1 bjh21 that any integer portion of `zSig0' will be added into the exponent. Since 381 1.1 bjh21 a properly normalized significand will have an integer portion equal to 1, 382 1.1 bjh21 the `zExp' input should be 1 less than the desired result exponent whenever 383 1.1 bjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand. 384 1.1 bjh21 ------------------------------------------------------------------------------- 385 1.1 bjh21 */ 386 1.1 bjh21 INLINE float64 387 1.1 bjh21 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 388 1.1 bjh21 { 389 1.1 bjh21 390 1.1 bjh21 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 391 1.1 bjh21 ( ( (bits64) zExp )<<52 ) + 392 1.1 bjh21 ( ( (bits64) zSig0 )<<32 ) + zSig1 ); 393 1.1 bjh21 394 1.1 bjh21 395 1.1 bjh21 } 396 1.1 bjh21 397 1.1 bjh21 /* 398 1.1 bjh21 ------------------------------------------------------------------------------- 399 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 400 1.1 bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1', 401 1.1 bjh21 and `zSig2', and returns the proper double-precision floating-point value 402 1.1 bjh21 corresponding to the abstract input. Ordinarily, the abstract value is 403 1.1 bjh21 simply rounded and packed into the double-precision format, with the inexact 404 1.1 bjh21 exception raised if the abstract input cannot be represented exactly. 405 1.1 bjh21 However, if the abstract value is too large, the overflow and inexact 406 1.1 bjh21 exceptions are raised and an infinity or maximal finite value is returned. 407 1.1 bjh21 If the abstract value is too small, the input value is rounded to a 408 1.1 bjh21 subnormal number, and the underflow and inexact exceptions are raised if the 409 1.1 bjh21 abstract input cannot be represented exactly as a subnormal double-precision 410 1.1 bjh21 floating-point number. 411 1.1 bjh21 The input significand must be normalized or smaller. If the input 412 1.1 bjh21 significand is not normalized, `zExp' must be 0; in that case, the result 413 1.1 bjh21 returned is a subnormal number, and it must not require rounding. In the 414 1.1 bjh21 usual case that the input significand is normalized, `zExp' must be 1 less 415 1.1 bjh21 than the ``true'' floating-point exponent. The handling of underflow and 416 1.1 bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 417 1.1 bjh21 ------------------------------------------------------------------------------- 418 1.1 bjh21 */ 419 1.1 bjh21 static float64 420 1.1 bjh21 roundAndPackFloat64( 421 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) 422 1.1 bjh21 { 423 1.1 bjh21 int8 roundingMode; 424 1.1 bjh21 flag roundNearestEven, increment, isTiny; 425 1.1 bjh21 426 1.1 bjh21 roundingMode = float_rounding_mode; 427 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 428 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 ); 429 1.1 bjh21 if ( ! roundNearestEven ) { 430 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 431 1.1 bjh21 increment = 0; 432 1.1 bjh21 } 433 1.1 bjh21 else { 434 1.1 bjh21 if ( zSign ) { 435 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2; 436 1.1 bjh21 } 437 1.1 bjh21 else { 438 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2; 439 1.1 bjh21 } 440 1.1 bjh21 } 441 1.1 bjh21 } 442 1.1 bjh21 if ( 0x7FD <= (bits16) zExp ) { 443 1.1 bjh21 if ( ( 0x7FD < zExp ) 444 1.1 bjh21 || ( ( zExp == 0x7FD ) 445 1.1 bjh21 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) 446 1.1 bjh21 && increment 447 1.1 bjh21 ) 448 1.1 bjh21 ) { 449 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 450 1.1 bjh21 if ( ( roundingMode == float_round_to_zero ) 451 1.1 bjh21 || ( zSign && ( roundingMode == float_round_up ) ) 452 1.1 bjh21 || ( ! zSign && ( roundingMode == float_round_down ) ) 453 1.1 bjh21 ) { 454 1.1 bjh21 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); 455 1.1 bjh21 } 456 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 457 1.1 bjh21 } 458 1.1 bjh21 if ( zExp < 0 ) { 459 1.1 bjh21 isTiny = 460 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 461 1.1 bjh21 || ( zExp < -1 ) 462 1.1 bjh21 || ! increment 463 1.1 bjh21 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); 464 1.1 bjh21 shift64ExtraRightJamming( 465 1.1 bjh21 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 466 1.1 bjh21 zExp = 0; 467 1.1 bjh21 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 468 1.1 bjh21 if ( roundNearestEven ) { 469 1.1 bjh21 increment = ( (sbits32) zSig2 < 0 ); 470 1.1 bjh21 } 471 1.1 bjh21 else { 472 1.1 bjh21 if ( zSign ) { 473 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2; 474 1.1 bjh21 } 475 1.1 bjh21 else { 476 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2; 477 1.1 bjh21 } 478 1.1 bjh21 } 479 1.1 bjh21 } 480 1.1 bjh21 } 481 1.3 matt if ( zSig2 ) set_float_exception_inexact_flag(); 482 1.1 bjh21 if ( increment ) { 483 1.1 bjh21 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 484 1.1 bjh21 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 485 1.1 bjh21 } 486 1.1 bjh21 else { 487 1.1 bjh21 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 488 1.1 bjh21 } 489 1.1 bjh21 return packFloat64( zSign, zExp, zSig0, zSig1 ); 490 1.1 bjh21 491 1.1 bjh21 } 492 1.1 bjh21 493 1.1 bjh21 /* 494 1.1 bjh21 ------------------------------------------------------------------------------- 495 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 496 1.1 bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and 497 1.1 bjh21 returns the proper double-precision floating-point value corresponding 498 1.1 bjh21 to the abstract input. This routine is just like `roundAndPackFloat64' 499 1.1 bjh21 except that the input significand has fewer bits and does not have to be 500 1.1 bjh21 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 501 1.1 bjh21 point exponent. 502 1.1 bjh21 ------------------------------------------------------------------------------- 503 1.1 bjh21 */ 504 1.1 bjh21 static float64 505 1.1 bjh21 normalizeRoundAndPackFloat64( 506 1.1 bjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 507 1.1 bjh21 { 508 1.1 bjh21 int8 shiftCount; 509 1.1 bjh21 bits32 zSig2; 510 1.1 bjh21 511 1.1 bjh21 if ( zSig0 == 0 ) { 512 1.1 bjh21 zSig0 = zSig1; 513 1.1 bjh21 zSig1 = 0; 514 1.1 bjh21 zExp -= 32; 515 1.1 bjh21 } 516 1.1 bjh21 shiftCount = countLeadingZeros32( zSig0 ) - 11; 517 1.1 bjh21 if ( 0 <= shiftCount ) { 518 1.1 bjh21 zSig2 = 0; 519 1.1 bjh21 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 520 1.1 bjh21 } 521 1.1 bjh21 else { 522 1.1 bjh21 shift64ExtraRightJamming( 523 1.1 bjh21 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 524 1.1 bjh21 } 525 1.1 bjh21 zExp -= shiftCount; 526 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 527 1.1 bjh21 528 1.1 bjh21 } 529 1.1 bjh21 530 1.1 bjh21 /* 531 1.1 bjh21 ------------------------------------------------------------------------------- 532 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to 533 1.1 bjh21 the single-precision floating-point format. The conversion is performed 534 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 535 1.1 bjh21 ------------------------------------------------------------------------------- 536 1.1 bjh21 */ 537 1.1 bjh21 float32 int32_to_float32( int32 a ) 538 1.1 bjh21 { 539 1.1 bjh21 flag zSign; 540 1.1 bjh21 541 1.1 bjh21 if ( a == 0 ) return 0; 542 1.1 bjh21 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 543 1.1 bjh21 zSign = ( a < 0 ); 544 1.2 christos return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a)); 545 1.1 bjh21 546 1.1 bjh21 } 547 1.1 bjh21 548 1.1 bjh21 /* 549 1.1 bjh21 ------------------------------------------------------------------------------- 550 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to 551 1.1 bjh21 the double-precision floating-point format. The conversion is performed 552 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 553 1.1 bjh21 ------------------------------------------------------------------------------- 554 1.1 bjh21 */ 555 1.1 bjh21 float64 int32_to_float64( int32 a ) 556 1.1 bjh21 { 557 1.1 bjh21 flag zSign; 558 1.1 bjh21 bits32 absA; 559 1.1 bjh21 int8 shiftCount; 560 1.1 bjh21 bits32 zSig0, zSig1; 561 1.1 bjh21 562 1.1 bjh21 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); 563 1.1 bjh21 zSign = ( a < 0 ); 564 1.1 bjh21 absA = zSign ? - a : a; 565 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) - 11; 566 1.1 bjh21 if ( 0 <= shiftCount ) { 567 1.1 bjh21 zSig0 = absA<<shiftCount; 568 1.1 bjh21 zSig1 = 0; 569 1.1 bjh21 } 570 1.1 bjh21 else { 571 1.1 bjh21 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); 572 1.1 bjh21 } 573 1.1 bjh21 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); 574 1.1 bjh21 575 1.1 bjh21 } 576 1.1 bjh21 577 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 578 1.1 bjh21 /* 579 1.1 bjh21 ------------------------------------------------------------------------------- 580 1.1 bjh21 Returns the result of converting the single-precision floating-point value 581 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 582 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 583 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 584 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 585 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 586 1.1 bjh21 largest integer with the same sign as `a' is returned. 587 1.1 bjh21 ------------------------------------------------------------------------------- 588 1.1 bjh21 */ 589 1.1 bjh21 int32 float32_to_int32( float32 a ) 590 1.1 bjh21 { 591 1.1 bjh21 flag aSign; 592 1.1 bjh21 int16 aExp, shiftCount; 593 1.1 bjh21 bits32 aSig, aSigExtra; 594 1.1 bjh21 int32 z; 595 1.1 bjh21 int8 roundingMode; 596 1.1 bjh21 597 1.1 bjh21 aSig = extractFloat32Frac( a ); 598 1.1 bjh21 aExp = extractFloat32Exp( a ); 599 1.1 bjh21 aSign = extractFloat32Sign( a ); 600 1.1 bjh21 shiftCount = aExp - 0x96; 601 1.1 bjh21 if ( 0 <= shiftCount ) { 602 1.1 bjh21 if ( 0x9E <= aExp ) { 603 1.1 bjh21 if ( a != 0xCF000000 ) { 604 1.1 bjh21 float_raise( float_flag_invalid ); 605 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 606 1.1 bjh21 return 0x7FFFFFFF; 607 1.1 bjh21 } 608 1.1 bjh21 } 609 1.1 bjh21 return (sbits32) 0x80000000; 610 1.1 bjh21 } 611 1.1 bjh21 z = ( aSig | 0x00800000 )<<shiftCount; 612 1.1 bjh21 if ( aSign ) z = - z; 613 1.1 bjh21 } 614 1.1 bjh21 else { 615 1.1 bjh21 if ( aExp < 0x7E ) { 616 1.1 bjh21 aSigExtra = aExp | aSig; 617 1.1 bjh21 z = 0; 618 1.1 bjh21 } 619 1.1 bjh21 else { 620 1.1 bjh21 aSig |= 0x00800000; 621 1.1 bjh21 aSigExtra = aSig<<( shiftCount & 31 ); 622 1.1 bjh21 z = aSig>>( - shiftCount ); 623 1.1 bjh21 } 624 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag(); 625 1.1 bjh21 roundingMode = float_rounding_mode; 626 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 627 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) { 628 1.1 bjh21 ++z; 629 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; 630 1.1 bjh21 } 631 1.1 bjh21 if ( aSign ) z = - z; 632 1.1 bjh21 } 633 1.1 bjh21 else { 634 1.1 bjh21 aSigExtra = ( aSigExtra != 0 ); 635 1.1 bjh21 if ( aSign ) { 636 1.1 bjh21 z += ( roundingMode == float_round_down ) & aSigExtra; 637 1.1 bjh21 z = - z; 638 1.1 bjh21 } 639 1.1 bjh21 else { 640 1.1 bjh21 z += ( roundingMode == float_round_up ) & aSigExtra; 641 1.1 bjh21 } 642 1.1 bjh21 } 643 1.1 bjh21 } 644 1.1 bjh21 return z; 645 1.1 bjh21 646 1.1 bjh21 } 647 1.1 bjh21 #endif 648 1.1 bjh21 649 1.1 bjh21 /* 650 1.1 bjh21 ------------------------------------------------------------------------------- 651 1.1 bjh21 Returns the result of converting the single-precision floating-point value 652 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 653 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 654 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 655 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 656 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 657 1.1 bjh21 returned. 658 1.1 bjh21 ------------------------------------------------------------------------------- 659 1.1 bjh21 */ 660 1.1 bjh21 int32 float32_to_int32_round_to_zero( float32 a ) 661 1.1 bjh21 { 662 1.1 bjh21 flag aSign; 663 1.1 bjh21 int16 aExp, shiftCount; 664 1.1 bjh21 bits32 aSig; 665 1.1 bjh21 int32 z; 666 1.1 bjh21 667 1.1 bjh21 aSig = extractFloat32Frac( a ); 668 1.1 bjh21 aExp = extractFloat32Exp( a ); 669 1.1 bjh21 aSign = extractFloat32Sign( a ); 670 1.1 bjh21 shiftCount = aExp - 0x9E; 671 1.1 bjh21 if ( 0 <= shiftCount ) { 672 1.1 bjh21 if ( a != 0xCF000000 ) { 673 1.1 bjh21 float_raise( float_flag_invalid ); 674 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 675 1.1 bjh21 } 676 1.1 bjh21 return (sbits32) 0x80000000; 677 1.1 bjh21 } 678 1.1 bjh21 else if ( aExp <= 0x7E ) { 679 1.3 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 680 1.1 bjh21 return 0; 681 1.1 bjh21 } 682 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8; 683 1.1 bjh21 z = aSig>>( - shiftCount ); 684 1.1 bjh21 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 685 1.3 matt set_float_exception_inexact_flag(); 686 1.1 bjh21 } 687 1.1 bjh21 if ( aSign ) z = - z; 688 1.1 bjh21 return z; 689 1.1 bjh21 690 1.1 bjh21 } 691 1.1 bjh21 692 1.1 bjh21 /* 693 1.1 bjh21 ------------------------------------------------------------------------------- 694 1.1 bjh21 Returns the result of converting the single-precision floating-point value 695 1.1 bjh21 `a' to the double-precision floating-point format. The conversion is 696 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 697 1.1 bjh21 Arithmetic. 698 1.1 bjh21 ------------------------------------------------------------------------------- 699 1.1 bjh21 */ 700 1.1 bjh21 float64 float32_to_float64( float32 a ) 701 1.1 bjh21 { 702 1.1 bjh21 flag aSign; 703 1.1 bjh21 int16 aExp; 704 1.1 bjh21 bits32 aSig, zSig0, zSig1; 705 1.1 bjh21 706 1.1 bjh21 aSig = extractFloat32Frac( a ); 707 1.1 bjh21 aExp = extractFloat32Exp( a ); 708 1.1 bjh21 aSign = extractFloat32Sign( a ); 709 1.1 bjh21 if ( aExp == 0xFF ) { 710 1.1 bjh21 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 711 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0, 0 ); 712 1.1 bjh21 } 713 1.1 bjh21 if ( aExp == 0 ) { 714 1.1 bjh21 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); 715 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 716 1.1 bjh21 --aExp; 717 1.1 bjh21 } 718 1.1 bjh21 shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); 719 1.1 bjh21 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); 720 1.1 bjh21 721 1.1 bjh21 } 722 1.1 bjh21 723 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 724 1.1 bjh21 /* 725 1.1 bjh21 ------------------------------------------------------------------------------- 726 1.1 bjh21 Rounds the single-precision floating-point value `a' to an integer, 727 1.1 bjh21 and returns the result as a single-precision floating-point value. The 728 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary 729 1.1 bjh21 Floating-Point Arithmetic. 730 1.1 bjh21 ------------------------------------------------------------------------------- 731 1.1 bjh21 */ 732 1.1 bjh21 float32 float32_round_to_int( float32 a ) 733 1.1 bjh21 { 734 1.1 bjh21 flag aSign; 735 1.1 bjh21 int16 aExp; 736 1.1 bjh21 bits32 lastBitMask, roundBitsMask; 737 1.1 bjh21 int8 roundingMode; 738 1.1 bjh21 float32 z; 739 1.1 bjh21 740 1.1 bjh21 aExp = extractFloat32Exp( a ); 741 1.1 bjh21 if ( 0x96 <= aExp ) { 742 1.1 bjh21 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 743 1.1 bjh21 return propagateFloat32NaN( a, a ); 744 1.1 bjh21 } 745 1.1 bjh21 return a; 746 1.1 bjh21 } 747 1.1 bjh21 if ( aExp <= 0x7E ) { 748 1.1 bjh21 if ( (bits32) ( a<<1 ) == 0 ) return a; 749 1.3 matt set_float_exception_inexact_flag(); 750 1.1 bjh21 aSign = extractFloat32Sign( a ); 751 1.1 bjh21 switch ( float_rounding_mode ) { 752 1.1 bjh21 case float_round_nearest_even: 753 1.1 bjh21 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 754 1.1 bjh21 return packFloat32( aSign, 0x7F, 0 ); 755 1.1 bjh21 } 756 1.1 bjh21 break; 757 1.1 bjh21 case float_round_to_zero: 758 1.1 bjh21 break; 759 1.1 bjh21 case float_round_down: 760 1.1 bjh21 return aSign ? 0xBF800000 : 0; 761 1.1 bjh21 case float_round_up: 762 1.1 bjh21 return aSign ? 0x80000000 : 0x3F800000; 763 1.1 bjh21 } 764 1.1 bjh21 return packFloat32( aSign, 0, 0 ); 765 1.1 bjh21 } 766 1.1 bjh21 lastBitMask = 1; 767 1.1 bjh21 lastBitMask <<= 0x96 - aExp; 768 1.1 bjh21 roundBitsMask = lastBitMask - 1; 769 1.1 bjh21 z = a; 770 1.1 bjh21 roundingMode = float_rounding_mode; 771 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 772 1.1 bjh21 z += lastBitMask>>1; 773 1.1 bjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 774 1.1 bjh21 } 775 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 776 1.1 bjh21 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 777 1.1 bjh21 z += roundBitsMask; 778 1.1 bjh21 } 779 1.1 bjh21 } 780 1.1 bjh21 z &= ~ roundBitsMask; 781 1.3 matt if ( z != a ) set_float_exception_inexact_flag(); 782 1.1 bjh21 return z; 783 1.1 bjh21 784 1.1 bjh21 } 785 1.1 bjh21 #endif 786 1.1 bjh21 787 1.1 bjh21 /* 788 1.1 bjh21 ------------------------------------------------------------------------------- 789 1.1 bjh21 Returns the result of adding the absolute values of the single-precision 790 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 791 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN. 792 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 793 1.1 bjh21 Floating-Point Arithmetic. 794 1.1 bjh21 ------------------------------------------------------------------------------- 795 1.1 bjh21 */ 796 1.1 bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 797 1.1 bjh21 { 798 1.1 bjh21 int16 aExp, bExp, zExp; 799 1.1 bjh21 bits32 aSig, bSig, zSig; 800 1.1 bjh21 int16 expDiff; 801 1.1 bjh21 802 1.1 bjh21 aSig = extractFloat32Frac( a ); 803 1.1 bjh21 aExp = extractFloat32Exp( a ); 804 1.1 bjh21 bSig = extractFloat32Frac( b ); 805 1.1 bjh21 bExp = extractFloat32Exp( b ); 806 1.1 bjh21 expDiff = aExp - bExp; 807 1.1 bjh21 aSig <<= 6; 808 1.1 bjh21 bSig <<= 6; 809 1.1 bjh21 if ( 0 < expDiff ) { 810 1.1 bjh21 if ( aExp == 0xFF ) { 811 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 812 1.1 bjh21 return a; 813 1.1 bjh21 } 814 1.1 bjh21 if ( bExp == 0 ) { 815 1.1 bjh21 --expDiff; 816 1.1 bjh21 } 817 1.1 bjh21 else { 818 1.1 bjh21 bSig |= 0x20000000; 819 1.1 bjh21 } 820 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig ); 821 1.1 bjh21 zExp = aExp; 822 1.1 bjh21 } 823 1.1 bjh21 else if ( expDiff < 0 ) { 824 1.1 bjh21 if ( bExp == 0xFF ) { 825 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 826 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 827 1.1 bjh21 } 828 1.1 bjh21 if ( aExp == 0 ) { 829 1.1 bjh21 ++expDiff; 830 1.1 bjh21 } 831 1.1 bjh21 else { 832 1.1 bjh21 aSig |= 0x20000000; 833 1.1 bjh21 } 834 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 835 1.1 bjh21 zExp = bExp; 836 1.1 bjh21 } 837 1.1 bjh21 else { 838 1.1 bjh21 if ( aExp == 0xFF ) { 839 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 840 1.1 bjh21 return a; 841 1.1 bjh21 } 842 1.1 bjh21 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 843 1.1 bjh21 zSig = 0x40000000 + aSig + bSig; 844 1.1 bjh21 zExp = aExp; 845 1.1 bjh21 goto roundAndPack; 846 1.1 bjh21 } 847 1.1 bjh21 aSig |= 0x20000000; 848 1.1 bjh21 zSig = ( aSig + bSig )<<1; 849 1.1 bjh21 --zExp; 850 1.1 bjh21 if ( (sbits32) zSig < 0 ) { 851 1.1 bjh21 zSig = aSig + bSig; 852 1.1 bjh21 ++zExp; 853 1.1 bjh21 } 854 1.1 bjh21 roundAndPack: 855 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 856 1.1 bjh21 857 1.1 bjh21 } 858 1.1 bjh21 859 1.1 bjh21 /* 860 1.1 bjh21 ------------------------------------------------------------------------------- 861 1.1 bjh21 Returns the result of subtracting the absolute values of the single- 862 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 863 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 864 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 865 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 866 1.1 bjh21 ------------------------------------------------------------------------------- 867 1.1 bjh21 */ 868 1.1 bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 869 1.1 bjh21 { 870 1.1 bjh21 int16 aExp, bExp, zExp; 871 1.1 bjh21 bits32 aSig, bSig, zSig; 872 1.1 bjh21 int16 expDiff; 873 1.1 bjh21 874 1.1 bjh21 aSig = extractFloat32Frac( a ); 875 1.1 bjh21 aExp = extractFloat32Exp( a ); 876 1.1 bjh21 bSig = extractFloat32Frac( b ); 877 1.1 bjh21 bExp = extractFloat32Exp( b ); 878 1.1 bjh21 expDiff = aExp - bExp; 879 1.1 bjh21 aSig <<= 7; 880 1.1 bjh21 bSig <<= 7; 881 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 882 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 883 1.1 bjh21 if ( aExp == 0xFF ) { 884 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 885 1.1 bjh21 float_raise( float_flag_invalid ); 886 1.1 bjh21 return float32_default_nan; 887 1.1 bjh21 } 888 1.1 bjh21 if ( aExp == 0 ) { 889 1.1 bjh21 aExp = 1; 890 1.1 bjh21 bExp = 1; 891 1.1 bjh21 } 892 1.1 bjh21 if ( bSig < aSig ) goto aBigger; 893 1.1 bjh21 if ( aSig < bSig ) goto bBigger; 894 1.1 bjh21 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 895 1.1 bjh21 bExpBigger: 896 1.1 bjh21 if ( bExp == 0xFF ) { 897 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 898 1.1 bjh21 return packFloat32( zSign ^ 1, 0xFF, 0 ); 899 1.1 bjh21 } 900 1.1 bjh21 if ( aExp == 0 ) { 901 1.1 bjh21 ++expDiff; 902 1.1 bjh21 } 903 1.1 bjh21 else { 904 1.1 bjh21 aSig |= 0x40000000; 905 1.1 bjh21 } 906 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 907 1.1 bjh21 bSig |= 0x40000000; 908 1.1 bjh21 bBigger: 909 1.1 bjh21 zSig = bSig - aSig; 910 1.1 bjh21 zExp = bExp; 911 1.1 bjh21 zSign ^= 1; 912 1.1 bjh21 goto normalizeRoundAndPack; 913 1.1 bjh21 aExpBigger: 914 1.1 bjh21 if ( aExp == 0xFF ) { 915 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 916 1.1 bjh21 return a; 917 1.1 bjh21 } 918 1.1 bjh21 if ( bExp == 0 ) { 919 1.1 bjh21 --expDiff; 920 1.1 bjh21 } 921 1.1 bjh21 else { 922 1.1 bjh21 bSig |= 0x40000000; 923 1.1 bjh21 } 924 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig ); 925 1.1 bjh21 aSig |= 0x40000000; 926 1.1 bjh21 aBigger: 927 1.1 bjh21 zSig = aSig - bSig; 928 1.1 bjh21 zExp = aExp; 929 1.1 bjh21 normalizeRoundAndPack: 930 1.1 bjh21 --zExp; 931 1.1 bjh21 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 932 1.1 bjh21 933 1.1 bjh21 } 934 1.1 bjh21 935 1.1 bjh21 /* 936 1.1 bjh21 ------------------------------------------------------------------------------- 937 1.1 bjh21 Returns the result of adding the single-precision floating-point values `a' 938 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 939 1.1 bjh21 Binary Floating-Point Arithmetic. 940 1.1 bjh21 ------------------------------------------------------------------------------- 941 1.1 bjh21 */ 942 1.1 bjh21 float32 float32_add( float32 a, float32 b ) 943 1.1 bjh21 { 944 1.1 bjh21 flag aSign, bSign; 945 1.1 bjh21 946 1.1 bjh21 aSign = extractFloat32Sign( a ); 947 1.1 bjh21 bSign = extractFloat32Sign( b ); 948 1.1 bjh21 if ( aSign == bSign ) { 949 1.1 bjh21 return addFloat32Sigs( a, b, aSign ); 950 1.1 bjh21 } 951 1.1 bjh21 else { 952 1.1 bjh21 return subFloat32Sigs( a, b, aSign ); 953 1.1 bjh21 } 954 1.1 bjh21 955 1.1 bjh21 } 956 1.1 bjh21 957 1.1 bjh21 /* 958 1.1 bjh21 ------------------------------------------------------------------------------- 959 1.1 bjh21 Returns the result of subtracting the single-precision floating-point values 960 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 961 1.1 bjh21 for Binary Floating-Point Arithmetic. 962 1.1 bjh21 ------------------------------------------------------------------------------- 963 1.1 bjh21 */ 964 1.1 bjh21 float32 float32_sub( float32 a, float32 b ) 965 1.1 bjh21 { 966 1.1 bjh21 flag aSign, bSign; 967 1.1 bjh21 968 1.1 bjh21 aSign = extractFloat32Sign( a ); 969 1.1 bjh21 bSign = extractFloat32Sign( b ); 970 1.1 bjh21 if ( aSign == bSign ) { 971 1.1 bjh21 return subFloat32Sigs( a, b, aSign ); 972 1.1 bjh21 } 973 1.1 bjh21 else { 974 1.1 bjh21 return addFloat32Sigs( a, b, aSign ); 975 1.1 bjh21 } 976 1.1 bjh21 977 1.1 bjh21 } 978 1.1 bjh21 979 1.1 bjh21 /* 980 1.1 bjh21 ------------------------------------------------------------------------------- 981 1.1 bjh21 Returns the result of multiplying the single-precision floating-point values 982 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 983 1.1 bjh21 for Binary Floating-Point Arithmetic. 984 1.1 bjh21 ------------------------------------------------------------------------------- 985 1.1 bjh21 */ 986 1.1 bjh21 float32 float32_mul( float32 a, float32 b ) 987 1.1 bjh21 { 988 1.1 bjh21 flag aSign, bSign, zSign; 989 1.1 bjh21 int16 aExp, bExp, zExp; 990 1.1 bjh21 bits32 aSig, bSig, zSig0, zSig1; 991 1.1 bjh21 992 1.1 bjh21 aSig = extractFloat32Frac( a ); 993 1.1 bjh21 aExp = extractFloat32Exp( a ); 994 1.1 bjh21 aSign = extractFloat32Sign( a ); 995 1.1 bjh21 bSig = extractFloat32Frac( b ); 996 1.1 bjh21 bExp = extractFloat32Exp( b ); 997 1.1 bjh21 bSign = extractFloat32Sign( b ); 998 1.1 bjh21 zSign = aSign ^ bSign; 999 1.1 bjh21 if ( aExp == 0xFF ) { 1000 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1001 1.1 bjh21 return propagateFloat32NaN( a, b ); 1002 1.1 bjh21 } 1003 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) { 1004 1.1 bjh21 float_raise( float_flag_invalid ); 1005 1.1 bjh21 return float32_default_nan; 1006 1.1 bjh21 } 1007 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1008 1.1 bjh21 } 1009 1.1 bjh21 if ( bExp == 0xFF ) { 1010 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1011 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 1012 1.1 bjh21 float_raise( float_flag_invalid ); 1013 1.1 bjh21 return float32_default_nan; 1014 1.1 bjh21 } 1015 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1016 1.1 bjh21 } 1017 1.1 bjh21 if ( aExp == 0 ) { 1018 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1019 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1020 1.1 bjh21 } 1021 1.1 bjh21 if ( bExp == 0 ) { 1022 1.1 bjh21 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1023 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1024 1.1 bjh21 } 1025 1.1 bjh21 zExp = aExp + bExp - 0x7F; 1026 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7; 1027 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8; 1028 1.1 bjh21 mul32To64( aSig, bSig, &zSig0, &zSig1 ); 1029 1.1 bjh21 zSig0 |= ( zSig1 != 0 ); 1030 1.1 bjh21 if ( 0 <= (sbits32) ( zSig0<<1 ) ) { 1031 1.1 bjh21 zSig0 <<= 1; 1032 1.1 bjh21 --zExp; 1033 1.1 bjh21 } 1034 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig0 ); 1035 1.1 bjh21 1036 1.1 bjh21 } 1037 1.1 bjh21 1038 1.1 bjh21 /* 1039 1.1 bjh21 ------------------------------------------------------------------------------- 1040 1.1 bjh21 Returns the result of dividing the single-precision floating-point value `a' 1041 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the 1042 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1043 1.1 bjh21 ------------------------------------------------------------------------------- 1044 1.1 bjh21 */ 1045 1.1 bjh21 float32 float32_div( float32 a, float32 b ) 1046 1.1 bjh21 { 1047 1.1 bjh21 flag aSign, bSign, zSign; 1048 1.1 bjh21 int16 aExp, bExp, zExp; 1049 1.1 bjh21 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; 1050 1.1 bjh21 1051 1.1 bjh21 aSig = extractFloat32Frac( a ); 1052 1.1 bjh21 aExp = extractFloat32Exp( a ); 1053 1.1 bjh21 aSign = extractFloat32Sign( a ); 1054 1.1 bjh21 bSig = extractFloat32Frac( b ); 1055 1.1 bjh21 bExp = extractFloat32Exp( b ); 1056 1.1 bjh21 bSign = extractFloat32Sign( b ); 1057 1.1 bjh21 zSign = aSign ^ bSign; 1058 1.1 bjh21 if ( aExp == 0xFF ) { 1059 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 1060 1.1 bjh21 if ( bExp == 0xFF ) { 1061 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1062 1.1 bjh21 float_raise( float_flag_invalid ); 1063 1.1 bjh21 return float32_default_nan; 1064 1.1 bjh21 } 1065 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1066 1.1 bjh21 } 1067 1.1 bjh21 if ( bExp == 0xFF ) { 1068 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1069 1.1 bjh21 return packFloat32( zSign, 0, 0 ); 1070 1.1 bjh21 } 1071 1.1 bjh21 if ( bExp == 0 ) { 1072 1.1 bjh21 if ( bSig == 0 ) { 1073 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 1074 1.1 bjh21 float_raise( float_flag_invalid ); 1075 1.1 bjh21 return float32_default_nan; 1076 1.1 bjh21 } 1077 1.1 bjh21 float_raise( float_flag_divbyzero ); 1078 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1079 1.1 bjh21 } 1080 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1081 1.1 bjh21 } 1082 1.1 bjh21 if ( aExp == 0 ) { 1083 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1084 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1085 1.1 bjh21 } 1086 1.1 bjh21 zExp = aExp - bExp + 0x7D; 1087 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7; 1088 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8; 1089 1.1 bjh21 if ( bSig <= ( aSig + aSig ) ) { 1090 1.1 bjh21 aSig >>= 1; 1091 1.1 bjh21 ++zExp; 1092 1.1 bjh21 } 1093 1.1 bjh21 zSig = estimateDiv64To32( aSig, 0, bSig ); 1094 1.1 bjh21 if ( ( zSig & 0x3F ) <= 2 ) { 1095 1.1 bjh21 mul32To64( bSig, zSig, &term0, &term1 ); 1096 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1097 1.1 bjh21 while ( (sbits32) rem0 < 0 ) { 1098 1.1 bjh21 --zSig; 1099 1.1 bjh21 add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); 1100 1.1 bjh21 } 1101 1.1 bjh21 zSig |= ( rem1 != 0 ); 1102 1.1 bjh21 } 1103 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 1104 1.1 bjh21 1105 1.1 bjh21 } 1106 1.1 bjh21 1107 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 1108 1.1 bjh21 /* 1109 1.1 bjh21 ------------------------------------------------------------------------------- 1110 1.1 bjh21 Returns the remainder of the single-precision floating-point value `a' 1111 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed 1112 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1113 1.1 bjh21 ------------------------------------------------------------------------------- 1114 1.1 bjh21 */ 1115 1.1 bjh21 float32 float32_rem( float32 a, float32 b ) 1116 1.1 bjh21 { 1117 1.1 bjh21 flag aSign, bSign, zSign; 1118 1.1 bjh21 int16 aExp, bExp, expDiff; 1119 1.1 bjh21 bits32 aSig, bSig, q, allZero, alternateASig; 1120 1.1 bjh21 sbits32 sigMean; 1121 1.1 bjh21 1122 1.1 bjh21 aSig = extractFloat32Frac( a ); 1123 1.1 bjh21 aExp = extractFloat32Exp( a ); 1124 1.1 bjh21 aSign = extractFloat32Sign( a ); 1125 1.1 bjh21 bSig = extractFloat32Frac( b ); 1126 1.1 bjh21 bExp = extractFloat32Exp( b ); 1127 1.1 bjh21 bSign = extractFloat32Sign( b ); 1128 1.1 bjh21 if ( aExp == 0xFF ) { 1129 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1130 1.1 bjh21 return propagateFloat32NaN( a, b ); 1131 1.1 bjh21 } 1132 1.1 bjh21 float_raise( float_flag_invalid ); 1133 1.1 bjh21 return float32_default_nan; 1134 1.1 bjh21 } 1135 1.1 bjh21 if ( bExp == 0xFF ) { 1136 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1137 1.1 bjh21 return a; 1138 1.1 bjh21 } 1139 1.1 bjh21 if ( bExp == 0 ) { 1140 1.1 bjh21 if ( bSig == 0 ) { 1141 1.1 bjh21 float_raise( float_flag_invalid ); 1142 1.1 bjh21 return float32_default_nan; 1143 1.1 bjh21 } 1144 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1145 1.1 bjh21 } 1146 1.1 bjh21 if ( aExp == 0 ) { 1147 1.1 bjh21 if ( aSig == 0 ) return a; 1148 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1149 1.1 bjh21 } 1150 1.1 bjh21 expDiff = aExp - bExp; 1151 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8; 1152 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8; 1153 1.1 bjh21 if ( expDiff < 0 ) { 1154 1.1 bjh21 if ( expDiff < -1 ) return a; 1155 1.1 bjh21 aSig >>= 1; 1156 1.1 bjh21 } 1157 1.1 bjh21 q = ( bSig <= aSig ); 1158 1.1 bjh21 if ( q ) aSig -= bSig; 1159 1.1 bjh21 expDiff -= 32; 1160 1.1 bjh21 while ( 0 < expDiff ) { 1161 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig ); 1162 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 1163 1.1 bjh21 aSig = - ( ( bSig>>2 ) * q ); 1164 1.1 bjh21 expDiff -= 30; 1165 1.1 bjh21 } 1166 1.1 bjh21 expDiff += 32; 1167 1.1 bjh21 if ( 0 < expDiff ) { 1168 1.1 bjh21 q = estimateDiv64To32( aSig, 0, bSig ); 1169 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 1170 1.1 bjh21 q >>= 32 - expDiff; 1171 1.1 bjh21 bSig >>= 2; 1172 1.1 bjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 1173 1.1 bjh21 } 1174 1.1 bjh21 else { 1175 1.1 bjh21 aSig >>= 2; 1176 1.1 bjh21 bSig >>= 2; 1177 1.1 bjh21 } 1178 1.1 bjh21 do { 1179 1.1 bjh21 alternateASig = aSig; 1180 1.1 bjh21 ++q; 1181 1.1 bjh21 aSig -= bSig; 1182 1.1 bjh21 } while ( 0 <= (sbits32) aSig ); 1183 1.1 bjh21 sigMean = aSig + alternateASig; 1184 1.1 bjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 1185 1.1 bjh21 aSig = alternateASig; 1186 1.1 bjh21 } 1187 1.1 bjh21 zSign = ( (sbits32) aSig < 0 ); 1188 1.1 bjh21 if ( zSign ) aSig = - aSig; 1189 1.1 bjh21 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1190 1.1 bjh21 1191 1.1 bjh21 } 1192 1.1 bjh21 #endif 1193 1.1 bjh21 1194 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 1195 1.1 bjh21 /* 1196 1.1 bjh21 ------------------------------------------------------------------------------- 1197 1.1 bjh21 Returns the square root of the single-precision floating-point value `a'. 1198 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary 1199 1.1 bjh21 Floating-Point Arithmetic. 1200 1.1 bjh21 ------------------------------------------------------------------------------- 1201 1.1 bjh21 */ 1202 1.1 bjh21 float32 float32_sqrt( float32 a ) 1203 1.1 bjh21 { 1204 1.1 bjh21 flag aSign; 1205 1.1 bjh21 int16 aExp, zExp; 1206 1.1 bjh21 bits32 aSig, zSig, rem0, rem1, term0, term1; 1207 1.1 bjh21 1208 1.1 bjh21 aSig = extractFloat32Frac( a ); 1209 1.1 bjh21 aExp = extractFloat32Exp( a ); 1210 1.1 bjh21 aSign = extractFloat32Sign( a ); 1211 1.1 bjh21 if ( aExp == 0xFF ) { 1212 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, 0 ); 1213 1.1 bjh21 if ( ! aSign ) return a; 1214 1.1 bjh21 float_raise( float_flag_invalid ); 1215 1.1 bjh21 return float32_default_nan; 1216 1.1 bjh21 } 1217 1.1 bjh21 if ( aSign ) { 1218 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) return a; 1219 1.1 bjh21 float_raise( float_flag_invalid ); 1220 1.1 bjh21 return float32_default_nan; 1221 1.1 bjh21 } 1222 1.1 bjh21 if ( aExp == 0 ) { 1223 1.1 bjh21 if ( aSig == 0 ) return 0; 1224 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1225 1.1 bjh21 } 1226 1.1 bjh21 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 1227 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8; 1228 1.1 bjh21 zSig = estimateSqrt32( aExp, aSig ) + 2; 1229 1.1 bjh21 if ( ( zSig & 0x7F ) <= 5 ) { 1230 1.1 bjh21 if ( zSig < 2 ) { 1231 1.1 bjh21 zSig = 0x7FFFFFFF; 1232 1.1 bjh21 goto roundAndPack; 1233 1.1 bjh21 } 1234 1.1 bjh21 else { 1235 1.1 bjh21 aSig >>= aExp & 1; 1236 1.1 bjh21 mul32To64( zSig, zSig, &term0, &term1 ); 1237 1.1 bjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1238 1.1 bjh21 while ( (sbits32) rem0 < 0 ) { 1239 1.1 bjh21 --zSig; 1240 1.1 bjh21 shortShift64Left( 0, zSig, 1, &term0, &term1 ); 1241 1.1 bjh21 term1 |= 1; 1242 1.1 bjh21 add64( rem0, rem1, term0, term1, &rem0, &rem1 ); 1243 1.1 bjh21 } 1244 1.1 bjh21 zSig |= ( ( rem0 | rem1 ) != 0 ); 1245 1.1 bjh21 } 1246 1.1 bjh21 } 1247 1.1 bjh21 shift32RightJamming( zSig, 1, &zSig ); 1248 1.1 bjh21 roundAndPack: 1249 1.1 bjh21 return roundAndPackFloat32( 0, zExp, zSig ); 1250 1.1 bjh21 1251 1.1 bjh21 } 1252 1.1 bjh21 #endif 1253 1.1 bjh21 1254 1.1 bjh21 /* 1255 1.1 bjh21 ------------------------------------------------------------------------------- 1256 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to 1257 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 1258 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1259 1.1 bjh21 ------------------------------------------------------------------------------- 1260 1.1 bjh21 */ 1261 1.1 bjh21 flag float32_eq( float32 a, float32 b ) 1262 1.1 bjh21 { 1263 1.1 bjh21 1264 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1265 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1266 1.1 bjh21 ) { 1267 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1268 1.1 bjh21 float_raise( float_flag_invalid ); 1269 1.1 bjh21 } 1270 1.1 bjh21 return 0; 1271 1.1 bjh21 } 1272 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1273 1.1 bjh21 1274 1.1 bjh21 } 1275 1.1 bjh21 1276 1.1 bjh21 /* 1277 1.1 bjh21 ------------------------------------------------------------------------------- 1278 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 1279 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 1280 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1281 1.1 bjh21 Arithmetic. 1282 1.1 bjh21 ------------------------------------------------------------------------------- 1283 1.1 bjh21 */ 1284 1.1 bjh21 flag float32_le( float32 a, float32 b ) 1285 1.1 bjh21 { 1286 1.1 bjh21 flag aSign, bSign; 1287 1.1 bjh21 1288 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1289 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1290 1.1 bjh21 ) { 1291 1.1 bjh21 float_raise( float_flag_invalid ); 1292 1.1 bjh21 return 0; 1293 1.1 bjh21 } 1294 1.1 bjh21 aSign = extractFloat32Sign( a ); 1295 1.1 bjh21 bSign = extractFloat32Sign( b ); 1296 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1297 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 1298 1.1 bjh21 1299 1.1 bjh21 } 1300 1.1 bjh21 1301 1.1 bjh21 /* 1302 1.1 bjh21 ------------------------------------------------------------------------------- 1303 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 1304 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 1305 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1306 1.1 bjh21 ------------------------------------------------------------------------------- 1307 1.1 bjh21 */ 1308 1.1 bjh21 flag float32_lt( float32 a, float32 b ) 1309 1.1 bjh21 { 1310 1.1 bjh21 flag aSign, bSign; 1311 1.1 bjh21 1312 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1313 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1314 1.1 bjh21 ) { 1315 1.1 bjh21 float_raise( float_flag_invalid ); 1316 1.1 bjh21 return 0; 1317 1.1 bjh21 } 1318 1.1 bjh21 aSign = extractFloat32Sign( a ); 1319 1.1 bjh21 bSign = extractFloat32Sign( b ); 1320 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1321 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 1322 1.1 bjh21 1323 1.1 bjh21 } 1324 1.1 bjh21 1325 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1326 1.1 bjh21 /* 1327 1.1 bjh21 ------------------------------------------------------------------------------- 1328 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to 1329 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 1330 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 1331 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1332 1.1 bjh21 ------------------------------------------------------------------------------- 1333 1.1 bjh21 */ 1334 1.1 bjh21 flag float32_eq_signaling( float32 a, float32 b ) 1335 1.1 bjh21 { 1336 1.1 bjh21 1337 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1338 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1339 1.1 bjh21 ) { 1340 1.1 bjh21 float_raise( float_flag_invalid ); 1341 1.1 bjh21 return 0; 1342 1.1 bjh21 } 1343 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1344 1.1 bjh21 1345 1.1 bjh21 } 1346 1.1 bjh21 1347 1.1 bjh21 /* 1348 1.1 bjh21 ------------------------------------------------------------------------------- 1349 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than or 1350 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 1351 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the 1352 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1353 1.1 bjh21 ------------------------------------------------------------------------------- 1354 1.1 bjh21 */ 1355 1.1 bjh21 flag float32_le_quiet( float32 a, float32 b ) 1356 1.1 bjh21 { 1357 1.1 bjh21 flag aSign, bSign; 1358 1.1 bjh21 int16 aExp, bExp; 1359 1.1 bjh21 1360 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1361 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1362 1.1 bjh21 ) { 1363 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1364 1.1 bjh21 float_raise( float_flag_invalid ); 1365 1.1 bjh21 } 1366 1.1 bjh21 return 0; 1367 1.1 bjh21 } 1368 1.1 bjh21 aSign = extractFloat32Sign( a ); 1369 1.1 bjh21 bSign = extractFloat32Sign( b ); 1370 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1371 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 1372 1.1 bjh21 1373 1.1 bjh21 } 1374 1.1 bjh21 1375 1.1 bjh21 /* 1376 1.1 bjh21 ------------------------------------------------------------------------------- 1377 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 1378 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 1379 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 1380 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 1381 1.1 bjh21 ------------------------------------------------------------------------------- 1382 1.1 bjh21 */ 1383 1.1 bjh21 flag float32_lt_quiet( float32 a, float32 b ) 1384 1.1 bjh21 { 1385 1.1 bjh21 flag aSign, bSign; 1386 1.1 bjh21 1387 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1388 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1389 1.1 bjh21 ) { 1390 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1391 1.1 bjh21 float_raise( float_flag_invalid ); 1392 1.1 bjh21 } 1393 1.1 bjh21 return 0; 1394 1.1 bjh21 } 1395 1.1 bjh21 aSign = extractFloat32Sign( a ); 1396 1.1 bjh21 bSign = extractFloat32Sign( b ); 1397 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1398 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 1399 1.1 bjh21 1400 1.1 bjh21 } 1401 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1402 1.1 bjh21 1403 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1404 1.1 bjh21 /* 1405 1.1 bjh21 ------------------------------------------------------------------------------- 1406 1.1 bjh21 Returns the result of converting the double-precision floating-point value 1407 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 1408 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1409 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 1410 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 1411 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 1412 1.1 bjh21 largest integer with the same sign as `a' is returned. 1413 1.1 bjh21 ------------------------------------------------------------------------------- 1414 1.1 bjh21 */ 1415 1.1 bjh21 int32 float64_to_int32( float64 a ) 1416 1.1 bjh21 { 1417 1.1 bjh21 flag aSign; 1418 1.1 bjh21 int16 aExp, shiftCount; 1419 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra; 1420 1.1 bjh21 int32 z; 1421 1.1 bjh21 int8 roundingMode; 1422 1.1 bjh21 1423 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1424 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1425 1.1 bjh21 aExp = extractFloat64Exp( a ); 1426 1.1 bjh21 aSign = extractFloat64Sign( a ); 1427 1.1 bjh21 shiftCount = aExp - 0x413; 1428 1.1 bjh21 if ( 0 <= shiftCount ) { 1429 1.1 bjh21 if ( 0x41E < aExp ) { 1430 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1431 1.1 bjh21 goto invalid; 1432 1.1 bjh21 } 1433 1.1 bjh21 shortShift64Left( 1434 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1435 1.1 bjh21 if ( 0x80000000 < absZ ) goto invalid; 1436 1.1 bjh21 } 1437 1.1 bjh21 else { 1438 1.1 bjh21 aSig1 = ( aSig1 != 0 ); 1439 1.1 bjh21 if ( aExp < 0x3FE ) { 1440 1.1 bjh21 aSigExtra = aExp | aSig0 | aSig1; 1441 1.1 bjh21 absZ = 0; 1442 1.1 bjh21 } 1443 1.1 bjh21 else { 1444 1.1 bjh21 aSig0 |= 0x00100000; 1445 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1446 1.1 bjh21 absZ = aSig0>>( - shiftCount ); 1447 1.1 bjh21 } 1448 1.1 bjh21 } 1449 1.1 bjh21 roundingMode = float_rounding_mode; 1450 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 1451 1.1 bjh21 if ( (sbits32) aSigExtra < 0 ) { 1452 1.1 bjh21 ++absZ; 1453 1.1 bjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; 1454 1.1 bjh21 } 1455 1.1 bjh21 z = aSign ? - absZ : absZ; 1456 1.1 bjh21 } 1457 1.1 bjh21 else { 1458 1.1 bjh21 aSigExtra = ( aSigExtra != 0 ); 1459 1.1 bjh21 if ( aSign ) { 1460 1.1 bjh21 z = - ( absZ 1461 1.1 bjh21 + ( ( roundingMode == float_round_down ) & aSigExtra ) ); 1462 1.1 bjh21 } 1463 1.1 bjh21 else { 1464 1.1 bjh21 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); 1465 1.1 bjh21 } 1466 1.1 bjh21 } 1467 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1468 1.1 bjh21 invalid: 1469 1.1 bjh21 float_raise( float_flag_invalid ); 1470 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1471 1.1 bjh21 } 1472 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag(); 1473 1.1 bjh21 return z; 1474 1.1 bjh21 1475 1.1 bjh21 } 1476 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1477 1.1 bjh21 1478 1.1 bjh21 /* 1479 1.1 bjh21 ------------------------------------------------------------------------------- 1480 1.1 bjh21 Returns the result of converting the double-precision floating-point value 1481 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 1482 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1483 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 1484 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1485 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 1486 1.1 bjh21 returned. 1487 1.1 bjh21 ------------------------------------------------------------------------------- 1488 1.1 bjh21 */ 1489 1.1 bjh21 int32 float64_to_int32_round_to_zero( float64 a ) 1490 1.1 bjh21 { 1491 1.1 bjh21 flag aSign; 1492 1.1 bjh21 int16 aExp, shiftCount; 1493 1.1 bjh21 bits32 aSig0, aSig1, absZ, aSigExtra; 1494 1.1 bjh21 int32 z; 1495 1.1 bjh21 1496 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1497 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1498 1.1 bjh21 aExp = extractFloat64Exp( a ); 1499 1.1 bjh21 aSign = extractFloat64Sign( a ); 1500 1.1 bjh21 shiftCount = aExp - 0x413; 1501 1.1 bjh21 if ( 0 <= shiftCount ) { 1502 1.1 bjh21 if ( 0x41E < aExp ) { 1503 1.1 bjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1504 1.1 bjh21 goto invalid; 1505 1.1 bjh21 } 1506 1.1 bjh21 shortShift64Left( 1507 1.1 bjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1508 1.1 bjh21 } 1509 1.1 bjh21 else { 1510 1.1 bjh21 if ( aExp < 0x3FF ) { 1511 1.1 bjh21 if ( aExp | aSig0 | aSig1 ) { 1512 1.3 matt set_float_exception_inexact_flag(); 1513 1.1 bjh21 } 1514 1.1 bjh21 return 0; 1515 1.1 bjh21 } 1516 1.1 bjh21 aSig0 |= 0x00100000; 1517 1.1 bjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1518 1.1 bjh21 absZ = aSig0>>( - shiftCount ); 1519 1.1 bjh21 } 1520 1.1 bjh21 z = aSign ? - absZ : absZ; 1521 1.1 bjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1522 1.1 bjh21 invalid: 1523 1.1 bjh21 float_raise( float_flag_invalid ); 1524 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1525 1.1 bjh21 } 1526 1.3 matt if ( aSigExtra ) set_float_exception_inexact_flag(); 1527 1.1 bjh21 return z; 1528 1.1 bjh21 1529 1.1 bjh21 } 1530 1.1 bjh21 1531 1.1 bjh21 /* 1532 1.1 bjh21 ------------------------------------------------------------------------------- 1533 1.1 bjh21 Returns the result of converting the double-precision floating-point value 1534 1.1 bjh21 `a' to the single-precision floating-point format. The conversion is 1535 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1536 1.1 bjh21 Arithmetic. 1537 1.1 bjh21 ------------------------------------------------------------------------------- 1538 1.1 bjh21 */ 1539 1.1 bjh21 float32 float64_to_float32( float64 a ) 1540 1.1 bjh21 { 1541 1.1 bjh21 flag aSign; 1542 1.1 bjh21 int16 aExp; 1543 1.1 bjh21 bits32 aSig0, aSig1, zSig; 1544 1.1 bjh21 bits32 allZero; 1545 1.1 bjh21 1546 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1547 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1548 1.1 bjh21 aExp = extractFloat64Exp( a ); 1549 1.1 bjh21 aSign = extractFloat64Sign( a ); 1550 1.1 bjh21 if ( aExp == 0x7FF ) { 1551 1.1 bjh21 if ( aSig0 | aSig1 ) { 1552 1.1 bjh21 return commonNaNToFloat32( float64ToCommonNaN( a ) ); 1553 1.1 bjh21 } 1554 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 ); 1555 1.1 bjh21 } 1556 1.1 bjh21 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); 1557 1.1 bjh21 if ( aExp ) zSig |= 0x40000000; 1558 1.1 bjh21 return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); 1559 1.1 bjh21 1560 1.1 bjh21 } 1561 1.1 bjh21 1562 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 1563 1.1 bjh21 /* 1564 1.1 bjh21 ------------------------------------------------------------------------------- 1565 1.1 bjh21 Rounds the double-precision floating-point value `a' to an integer, 1566 1.1 bjh21 and returns the result as a double-precision floating-point value. The 1567 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary 1568 1.1 bjh21 Floating-Point Arithmetic. 1569 1.1 bjh21 ------------------------------------------------------------------------------- 1570 1.1 bjh21 */ 1571 1.1 bjh21 float64 float64_round_to_int( float64 a ) 1572 1.1 bjh21 { 1573 1.1 bjh21 flag aSign; 1574 1.1 bjh21 int16 aExp; 1575 1.1 bjh21 bits32 lastBitMask, roundBitsMask; 1576 1.1 bjh21 int8 roundingMode; 1577 1.1 bjh21 float64 z; 1578 1.1 bjh21 1579 1.1 bjh21 aExp = extractFloat64Exp( a ); 1580 1.1 bjh21 if ( 0x413 <= aExp ) { 1581 1.1 bjh21 if ( 0x433 <= aExp ) { 1582 1.1 bjh21 if ( ( aExp == 0x7FF ) 1583 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { 1584 1.1 bjh21 return propagateFloat64NaN( a, a ); 1585 1.1 bjh21 } 1586 1.1 bjh21 return a; 1587 1.1 bjh21 } 1588 1.1 bjh21 lastBitMask = 1; 1589 1.1 bjh21 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; 1590 1.1 bjh21 roundBitsMask = lastBitMask - 1; 1591 1.1 bjh21 z = a; 1592 1.1 bjh21 roundingMode = float_rounding_mode; 1593 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 1594 1.1 bjh21 if ( lastBitMask ) { 1595 1.1 bjh21 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 1596 1.1 bjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1597 1.1 bjh21 } 1598 1.1 bjh21 else { 1599 1.1 bjh21 if ( (sbits32) z.low < 0 ) { 1600 1.1 bjh21 ++z.high; 1601 1.1 bjh21 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; 1602 1.1 bjh21 } 1603 1.1 bjh21 } 1604 1.1 bjh21 } 1605 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 1606 1.1 bjh21 if ( extractFloat64Sign( z ) 1607 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) { 1608 1.1 bjh21 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 1609 1.1 bjh21 } 1610 1.1 bjh21 } 1611 1.1 bjh21 z.low &= ~ roundBitsMask; 1612 1.1 bjh21 } 1613 1.1 bjh21 else { 1614 1.1 bjh21 if ( aExp <= 0x3FE ) { 1615 1.1 bjh21 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 1616 1.3 matt set_float_exception_inexact_flag(); 1617 1.1 bjh21 aSign = extractFloat64Sign( a ); 1618 1.1 bjh21 switch ( float_rounding_mode ) { 1619 1.1 bjh21 case float_round_nearest_even: 1620 1.1 bjh21 if ( ( aExp == 0x3FE ) 1621 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) 1622 1.1 bjh21 ) { 1623 1.1 bjh21 return packFloat64( aSign, 0x3FF, 0, 0 ); 1624 1.1 bjh21 } 1625 1.1 bjh21 break; 1626 1.1 bjh21 case float_round_down: 1627 1.1 bjh21 return 1628 1.1 bjh21 aSign ? packFloat64( 1, 0x3FF, 0, 0 ) 1629 1.1 bjh21 : packFloat64( 0, 0, 0, 0 ); 1630 1.1 bjh21 case float_round_up: 1631 1.1 bjh21 return 1632 1.1 bjh21 aSign ? packFloat64( 1, 0, 0, 0 ) 1633 1.1 bjh21 : packFloat64( 0, 0x3FF, 0, 0 ); 1634 1.1 bjh21 } 1635 1.1 bjh21 return packFloat64( aSign, 0, 0, 0 ); 1636 1.1 bjh21 } 1637 1.1 bjh21 lastBitMask = 1; 1638 1.1 bjh21 lastBitMask <<= 0x413 - aExp; 1639 1.1 bjh21 roundBitsMask = lastBitMask - 1; 1640 1.1 bjh21 z.low = 0; 1641 1.1 bjh21 z.high = a.high; 1642 1.1 bjh21 roundingMode = float_rounding_mode; 1643 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 1644 1.1 bjh21 z.high += lastBitMask>>1; 1645 1.1 bjh21 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 1646 1.1 bjh21 z.high &= ~ lastBitMask; 1647 1.1 bjh21 } 1648 1.1 bjh21 } 1649 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 1650 1.1 bjh21 if ( extractFloat64Sign( z ) 1651 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) { 1652 1.1 bjh21 z.high |= ( a.low != 0 ); 1653 1.1 bjh21 z.high += roundBitsMask; 1654 1.1 bjh21 } 1655 1.1 bjh21 } 1656 1.1 bjh21 z.high &= ~ roundBitsMask; 1657 1.1 bjh21 } 1658 1.1 bjh21 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 1659 1.3 matt set_float_exception_inexact_flag(); 1660 1.1 bjh21 } 1661 1.1 bjh21 return z; 1662 1.1 bjh21 1663 1.1 bjh21 } 1664 1.1 bjh21 #endif 1665 1.1 bjh21 1666 1.1 bjh21 /* 1667 1.1 bjh21 ------------------------------------------------------------------------------- 1668 1.1 bjh21 Returns the result of adding the absolute values of the double-precision 1669 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1670 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN. 1671 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 1672 1.1 bjh21 Floating-Point Arithmetic. 1673 1.1 bjh21 ------------------------------------------------------------------------------- 1674 1.1 bjh21 */ 1675 1.1 bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1676 1.1 bjh21 { 1677 1.1 bjh21 int16 aExp, bExp, zExp; 1678 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1679 1.1 bjh21 int16 expDiff; 1680 1.1 bjh21 1681 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1682 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1683 1.1 bjh21 aExp = extractFloat64Exp( a ); 1684 1.1 bjh21 bSig1 = extractFloat64Frac1( b ); 1685 1.1 bjh21 bSig0 = extractFloat64Frac0( b ); 1686 1.1 bjh21 bExp = extractFloat64Exp( b ); 1687 1.1 bjh21 expDiff = aExp - bExp; 1688 1.1 bjh21 if ( 0 < expDiff ) { 1689 1.1 bjh21 if ( aExp == 0x7FF ) { 1690 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1691 1.1 bjh21 return a; 1692 1.1 bjh21 } 1693 1.1 bjh21 if ( bExp == 0 ) { 1694 1.1 bjh21 --expDiff; 1695 1.1 bjh21 } 1696 1.1 bjh21 else { 1697 1.1 bjh21 bSig0 |= 0x00100000; 1698 1.1 bjh21 } 1699 1.1 bjh21 shift64ExtraRightJamming( 1700 1.1 bjh21 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 1701 1.1 bjh21 zExp = aExp; 1702 1.1 bjh21 } 1703 1.1 bjh21 else if ( expDiff < 0 ) { 1704 1.1 bjh21 if ( bExp == 0x7FF ) { 1705 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1706 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1707 1.1 bjh21 } 1708 1.1 bjh21 if ( aExp == 0 ) { 1709 1.1 bjh21 ++expDiff; 1710 1.1 bjh21 } 1711 1.1 bjh21 else { 1712 1.1 bjh21 aSig0 |= 0x00100000; 1713 1.1 bjh21 } 1714 1.1 bjh21 shift64ExtraRightJamming( 1715 1.1 bjh21 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 1716 1.1 bjh21 zExp = bExp; 1717 1.1 bjh21 } 1718 1.1 bjh21 else { 1719 1.1 bjh21 if ( aExp == 0x7FF ) { 1720 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1721 1.1 bjh21 return propagateFloat64NaN( a, b ); 1722 1.1 bjh21 } 1723 1.1 bjh21 return a; 1724 1.1 bjh21 } 1725 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1726 1.1 bjh21 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); 1727 1.1 bjh21 zSig2 = 0; 1728 1.1 bjh21 zSig0 |= 0x00200000; 1729 1.1 bjh21 zExp = aExp; 1730 1.1 bjh21 goto shiftRight1; 1731 1.1 bjh21 } 1732 1.1 bjh21 aSig0 |= 0x00100000; 1733 1.1 bjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1734 1.1 bjh21 --zExp; 1735 1.1 bjh21 if ( zSig0 < 0x00200000 ) goto roundAndPack; 1736 1.1 bjh21 ++zExp; 1737 1.1 bjh21 shiftRight1: 1738 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1739 1.1 bjh21 roundAndPack: 1740 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1741 1.1 bjh21 1742 1.1 bjh21 } 1743 1.1 bjh21 1744 1.1 bjh21 /* 1745 1.1 bjh21 ------------------------------------------------------------------------------- 1746 1.1 bjh21 Returns the result of subtracting the absolute values of the double- 1747 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 1748 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 1749 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 1750 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 1751 1.1 bjh21 ------------------------------------------------------------------------------- 1752 1.1 bjh21 */ 1753 1.1 bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 1754 1.1 bjh21 { 1755 1.1 bjh21 int16 aExp, bExp, zExp; 1756 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 1757 1.1 bjh21 int16 expDiff; 1758 1.1 bjh21 1759 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1760 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1761 1.1 bjh21 aExp = extractFloat64Exp( a ); 1762 1.1 bjh21 bSig1 = extractFloat64Frac1( b ); 1763 1.1 bjh21 bSig0 = extractFloat64Frac0( b ); 1764 1.1 bjh21 bExp = extractFloat64Exp( b ); 1765 1.1 bjh21 expDiff = aExp - bExp; 1766 1.1 bjh21 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); 1767 1.1 bjh21 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); 1768 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 1769 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 1770 1.1 bjh21 if ( aExp == 0x7FF ) { 1771 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1772 1.1 bjh21 return propagateFloat64NaN( a, b ); 1773 1.1 bjh21 } 1774 1.1 bjh21 float_raise( float_flag_invalid ); 1775 1.1 bjh21 return float64_default_nan; 1776 1.1 bjh21 } 1777 1.1 bjh21 if ( aExp == 0 ) { 1778 1.1 bjh21 aExp = 1; 1779 1.1 bjh21 bExp = 1; 1780 1.1 bjh21 } 1781 1.1 bjh21 if ( bSig0 < aSig0 ) goto aBigger; 1782 1.1 bjh21 if ( aSig0 < bSig0 ) goto bBigger; 1783 1.1 bjh21 if ( bSig1 < aSig1 ) goto aBigger; 1784 1.1 bjh21 if ( aSig1 < bSig1 ) goto bBigger; 1785 1.1 bjh21 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); 1786 1.1 bjh21 bExpBigger: 1787 1.1 bjh21 if ( bExp == 0x7FF ) { 1788 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1789 1.1 bjh21 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); 1790 1.1 bjh21 } 1791 1.1 bjh21 if ( aExp == 0 ) { 1792 1.1 bjh21 ++expDiff; 1793 1.1 bjh21 } 1794 1.1 bjh21 else { 1795 1.1 bjh21 aSig0 |= 0x40000000; 1796 1.1 bjh21 } 1797 1.1 bjh21 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 1798 1.1 bjh21 bSig0 |= 0x40000000; 1799 1.1 bjh21 bBigger: 1800 1.1 bjh21 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1801 1.1 bjh21 zExp = bExp; 1802 1.1 bjh21 zSign ^= 1; 1803 1.1 bjh21 goto normalizeRoundAndPack; 1804 1.1 bjh21 aExpBigger: 1805 1.1 bjh21 if ( aExp == 0x7FF ) { 1806 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1807 1.1 bjh21 return a; 1808 1.1 bjh21 } 1809 1.1 bjh21 if ( bExp == 0 ) { 1810 1.1 bjh21 --expDiff; 1811 1.1 bjh21 } 1812 1.1 bjh21 else { 1813 1.1 bjh21 bSig0 |= 0x40000000; 1814 1.1 bjh21 } 1815 1.1 bjh21 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 1816 1.1 bjh21 aSig0 |= 0x40000000; 1817 1.1 bjh21 aBigger: 1818 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1819 1.1 bjh21 zExp = aExp; 1820 1.1 bjh21 normalizeRoundAndPack: 1821 1.1 bjh21 --zExp; 1822 1.1 bjh21 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); 1823 1.1 bjh21 1824 1.1 bjh21 } 1825 1.1 bjh21 1826 1.1 bjh21 /* 1827 1.1 bjh21 ------------------------------------------------------------------------------- 1828 1.1 bjh21 Returns the result of adding the double-precision floating-point values `a' 1829 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 1830 1.1 bjh21 Binary Floating-Point Arithmetic. 1831 1.1 bjh21 ------------------------------------------------------------------------------- 1832 1.1 bjh21 */ 1833 1.1 bjh21 float64 float64_add( float64 a, float64 b ) 1834 1.1 bjh21 { 1835 1.1 bjh21 flag aSign, bSign; 1836 1.1 bjh21 1837 1.1 bjh21 aSign = extractFloat64Sign( a ); 1838 1.1 bjh21 bSign = extractFloat64Sign( b ); 1839 1.1 bjh21 if ( aSign == bSign ) { 1840 1.1 bjh21 return addFloat64Sigs( a, b, aSign ); 1841 1.1 bjh21 } 1842 1.1 bjh21 else { 1843 1.1 bjh21 return subFloat64Sigs( a, b, aSign ); 1844 1.1 bjh21 } 1845 1.1 bjh21 1846 1.1 bjh21 } 1847 1.1 bjh21 1848 1.1 bjh21 /* 1849 1.1 bjh21 ------------------------------------------------------------------------------- 1850 1.1 bjh21 Returns the result of subtracting the double-precision floating-point values 1851 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1852 1.1 bjh21 for Binary Floating-Point Arithmetic. 1853 1.1 bjh21 ------------------------------------------------------------------------------- 1854 1.1 bjh21 */ 1855 1.1 bjh21 float64 float64_sub( float64 a, float64 b ) 1856 1.1 bjh21 { 1857 1.1 bjh21 flag aSign, bSign; 1858 1.1 bjh21 1859 1.1 bjh21 aSign = extractFloat64Sign( a ); 1860 1.1 bjh21 bSign = extractFloat64Sign( b ); 1861 1.1 bjh21 if ( aSign == bSign ) { 1862 1.1 bjh21 return subFloat64Sigs( a, b, aSign ); 1863 1.1 bjh21 } 1864 1.1 bjh21 else { 1865 1.1 bjh21 return addFloat64Sigs( a, b, aSign ); 1866 1.1 bjh21 } 1867 1.1 bjh21 1868 1.1 bjh21 } 1869 1.1 bjh21 1870 1.1 bjh21 /* 1871 1.1 bjh21 ------------------------------------------------------------------------------- 1872 1.1 bjh21 Returns the result of multiplying the double-precision floating-point values 1873 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1874 1.1 bjh21 for Binary Floating-Point Arithmetic. 1875 1.1 bjh21 ------------------------------------------------------------------------------- 1876 1.1 bjh21 */ 1877 1.1 bjh21 float64 float64_mul( float64 a, float64 b ) 1878 1.1 bjh21 { 1879 1.1 bjh21 flag aSign, bSign, zSign; 1880 1.1 bjh21 int16 aExp, bExp, zExp; 1881 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 1882 1.1 bjh21 1883 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1884 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1885 1.1 bjh21 aExp = extractFloat64Exp( a ); 1886 1.1 bjh21 aSign = extractFloat64Sign( a ); 1887 1.1 bjh21 bSig1 = extractFloat64Frac1( b ); 1888 1.1 bjh21 bSig0 = extractFloat64Frac0( b ); 1889 1.1 bjh21 bExp = extractFloat64Exp( b ); 1890 1.1 bjh21 bSign = extractFloat64Sign( b ); 1891 1.1 bjh21 zSign = aSign ^ bSign; 1892 1.1 bjh21 if ( aExp == 0x7FF ) { 1893 1.1 bjh21 if ( ( aSig0 | aSig1 ) 1894 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 1895 1.1 bjh21 return propagateFloat64NaN( a, b ); 1896 1.1 bjh21 } 1897 1.1 bjh21 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 1898 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1899 1.1 bjh21 } 1900 1.1 bjh21 if ( bExp == 0x7FF ) { 1901 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1902 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1903 1.1 bjh21 invalid: 1904 1.1 bjh21 float_raise( float_flag_invalid ); 1905 1.1 bjh21 return float64_default_nan; 1906 1.1 bjh21 } 1907 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1908 1.1 bjh21 } 1909 1.1 bjh21 if ( aExp == 0 ) { 1910 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1911 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1912 1.1 bjh21 } 1913 1.1 bjh21 if ( bExp == 0 ) { 1914 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1915 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1916 1.1 bjh21 } 1917 1.1 bjh21 zExp = aExp + bExp - 0x400; 1918 1.1 bjh21 aSig0 |= 0x00100000; 1919 1.1 bjh21 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); 1920 1.1 bjh21 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 1921 1.1 bjh21 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1922 1.1 bjh21 zSig2 |= ( zSig3 != 0 ); 1923 1.1 bjh21 if ( 0x00200000 <= zSig0 ) { 1924 1.1 bjh21 shift64ExtraRightJamming( 1925 1.1 bjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1926 1.1 bjh21 ++zExp; 1927 1.1 bjh21 } 1928 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1929 1.1 bjh21 1930 1.1 bjh21 } 1931 1.1 bjh21 1932 1.1 bjh21 /* 1933 1.1 bjh21 ------------------------------------------------------------------------------- 1934 1.1 bjh21 Returns the result of dividing the double-precision floating-point value `a' 1935 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the 1936 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1937 1.1 bjh21 ------------------------------------------------------------------------------- 1938 1.1 bjh21 */ 1939 1.1 bjh21 float64 float64_div( float64 a, float64 b ) 1940 1.1 bjh21 { 1941 1.1 bjh21 flag aSign, bSign, zSign; 1942 1.1 bjh21 int16 aExp, bExp, zExp; 1943 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1944 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 1945 1.1 bjh21 1946 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 1947 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 1948 1.1 bjh21 aExp = extractFloat64Exp( a ); 1949 1.1 bjh21 aSign = extractFloat64Sign( a ); 1950 1.1 bjh21 bSig1 = extractFloat64Frac1( b ); 1951 1.1 bjh21 bSig0 = extractFloat64Frac0( b ); 1952 1.1 bjh21 bExp = extractFloat64Exp( b ); 1953 1.1 bjh21 bSign = extractFloat64Sign( b ); 1954 1.1 bjh21 zSign = aSign ^ bSign; 1955 1.1 bjh21 if ( aExp == 0x7FF ) { 1956 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1957 1.1 bjh21 if ( bExp == 0x7FF ) { 1958 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1959 1.1 bjh21 goto invalid; 1960 1.1 bjh21 } 1961 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1962 1.1 bjh21 } 1963 1.1 bjh21 if ( bExp == 0x7FF ) { 1964 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1965 1.1 bjh21 return packFloat64( zSign, 0, 0, 0 ); 1966 1.1 bjh21 } 1967 1.1 bjh21 if ( bExp == 0 ) { 1968 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 1969 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1970 1.1 bjh21 invalid: 1971 1.1 bjh21 float_raise( float_flag_invalid ); 1972 1.1 bjh21 return float64_default_nan; 1973 1.1 bjh21 } 1974 1.1 bjh21 float_raise( float_flag_divbyzero ); 1975 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1976 1.1 bjh21 } 1977 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1978 1.1 bjh21 } 1979 1.1 bjh21 if ( aExp == 0 ) { 1980 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1981 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1982 1.1 bjh21 } 1983 1.1 bjh21 zExp = aExp - bExp + 0x3FD; 1984 1.1 bjh21 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); 1985 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 1986 1.1 bjh21 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { 1987 1.1 bjh21 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 1988 1.1 bjh21 ++zExp; 1989 1.1 bjh21 } 1990 1.1 bjh21 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); 1991 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 1992 1.1 bjh21 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 1993 1.1 bjh21 while ( (sbits32) rem0 < 0 ) { 1994 1.1 bjh21 --zSig0; 1995 1.1 bjh21 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 1996 1.1 bjh21 } 1997 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); 1998 1.1 bjh21 if ( ( zSig1 & 0x3FF ) <= 4 ) { 1999 1.1 bjh21 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 2000 1.1 bjh21 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 2001 1.1 bjh21 while ( (sbits32) rem1 < 0 ) { 2002 1.1 bjh21 --zSig1; 2003 1.1 bjh21 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 2004 1.1 bjh21 } 2005 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2006 1.1 bjh21 } 2007 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); 2008 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 2009 1.1 bjh21 2010 1.1 bjh21 } 2011 1.1 bjh21 2012 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 2013 1.1 bjh21 /* 2014 1.1 bjh21 ------------------------------------------------------------------------------- 2015 1.1 bjh21 Returns the remainder of the double-precision floating-point value `a' 2016 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed 2017 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2018 1.1 bjh21 ------------------------------------------------------------------------------- 2019 1.1 bjh21 */ 2020 1.1 bjh21 float64 float64_rem( float64 a, float64 b ) 2021 1.1 bjh21 { 2022 1.1 bjh21 flag aSign, bSign, zSign; 2023 1.1 bjh21 int16 aExp, bExp, expDiff; 2024 1.1 bjh21 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 2025 1.1 bjh21 bits32 allZero, alternateASig0, alternateASig1, sigMean1; 2026 1.1 bjh21 sbits32 sigMean0; 2027 1.1 bjh21 float64 z; 2028 1.1 bjh21 2029 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 2030 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 2031 1.1 bjh21 aExp = extractFloat64Exp( a ); 2032 1.1 bjh21 aSign = extractFloat64Sign( a ); 2033 1.1 bjh21 bSig1 = extractFloat64Frac1( b ); 2034 1.1 bjh21 bSig0 = extractFloat64Frac0( b ); 2035 1.1 bjh21 bExp = extractFloat64Exp( b ); 2036 1.1 bjh21 bSign = extractFloat64Sign( b ); 2037 1.1 bjh21 if ( aExp == 0x7FF ) { 2038 1.1 bjh21 if ( ( aSig0 | aSig1 ) 2039 1.1 bjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 2040 1.1 bjh21 return propagateFloat64NaN( a, b ); 2041 1.1 bjh21 } 2042 1.1 bjh21 goto invalid; 2043 1.1 bjh21 } 2044 1.1 bjh21 if ( bExp == 0x7FF ) { 2045 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 2046 1.1 bjh21 return a; 2047 1.1 bjh21 } 2048 1.1 bjh21 if ( bExp == 0 ) { 2049 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 2050 1.1 bjh21 invalid: 2051 1.1 bjh21 float_raise( float_flag_invalid ); 2052 1.1 bjh21 return float64_default_nan; 2053 1.1 bjh21 } 2054 1.1 bjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2055 1.1 bjh21 } 2056 1.1 bjh21 if ( aExp == 0 ) { 2057 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return a; 2058 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2059 1.1 bjh21 } 2060 1.1 bjh21 expDiff = aExp - bExp; 2061 1.1 bjh21 if ( expDiff < -1 ) return a; 2062 1.1 bjh21 shortShift64Left( 2063 1.1 bjh21 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); 2064 1.1 bjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 2065 1.1 bjh21 q = le64( bSig0, bSig1, aSig0, aSig1 ); 2066 1.1 bjh21 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2067 1.1 bjh21 expDiff -= 32; 2068 1.1 bjh21 while ( 0 < expDiff ) { 2069 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2070 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0; 2071 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2072 1.1 bjh21 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); 2073 1.1 bjh21 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); 2074 1.1 bjh21 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 2075 1.1 bjh21 expDiff -= 29; 2076 1.1 bjh21 } 2077 1.1 bjh21 if ( -32 < expDiff ) { 2078 1.1 bjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2079 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0; 2080 1.1 bjh21 q >>= - expDiff; 2081 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2082 1.1 bjh21 expDiff += 24; 2083 1.1 bjh21 if ( expDiff < 0 ) { 2084 1.1 bjh21 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2085 1.1 bjh21 } 2086 1.1 bjh21 else { 2087 1.1 bjh21 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 2088 1.1 bjh21 } 2089 1.1 bjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2090 1.1 bjh21 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 2091 1.1 bjh21 } 2092 1.1 bjh21 else { 2093 1.1 bjh21 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); 2094 1.1 bjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2095 1.1 bjh21 } 2096 1.1 bjh21 do { 2097 1.1 bjh21 alternateASig0 = aSig0; 2098 1.1 bjh21 alternateASig1 = aSig1; 2099 1.1 bjh21 ++q; 2100 1.1 bjh21 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2101 1.1 bjh21 } while ( 0 <= (sbits32) aSig0 ); 2102 1.1 bjh21 add64( 2103 1.1 bjh21 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 2104 1.1 bjh21 if ( ( sigMean0 < 0 ) 2105 1.1 bjh21 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 2106 1.1 bjh21 aSig0 = alternateASig0; 2107 1.1 bjh21 aSig1 = alternateASig1; 2108 1.1 bjh21 } 2109 1.1 bjh21 zSign = ( (sbits32) aSig0 < 0 ); 2110 1.1 bjh21 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 2111 1.1 bjh21 return 2112 1.1 bjh21 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 2113 1.1 bjh21 2114 1.1 bjh21 } 2115 1.1 bjh21 #endif 2116 1.1 bjh21 2117 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 2118 1.1 bjh21 /* 2119 1.1 bjh21 ------------------------------------------------------------------------------- 2120 1.1 bjh21 Returns the square root of the double-precision floating-point value `a'. 2121 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary 2122 1.1 bjh21 Floating-Point Arithmetic. 2123 1.1 bjh21 ------------------------------------------------------------------------------- 2124 1.1 bjh21 */ 2125 1.1 bjh21 float64 float64_sqrt( float64 a ) 2126 1.1 bjh21 { 2127 1.1 bjh21 flag aSign; 2128 1.1 bjh21 int16 aExp, zExp; 2129 1.1 bjh21 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 2130 1.1 bjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2131 1.1 bjh21 float64 z; 2132 1.1 bjh21 2133 1.1 bjh21 aSig1 = extractFloat64Frac1( a ); 2134 1.1 bjh21 aSig0 = extractFloat64Frac0( a ); 2135 1.1 bjh21 aExp = extractFloat64Exp( a ); 2136 1.1 bjh21 aSign = extractFloat64Sign( a ); 2137 1.1 bjh21 if ( aExp == 0x7FF ) { 2138 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); 2139 1.1 bjh21 if ( ! aSign ) return a; 2140 1.1 bjh21 goto invalid; 2141 1.1 bjh21 } 2142 1.1 bjh21 if ( aSign ) { 2143 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 2144 1.1 bjh21 invalid: 2145 1.1 bjh21 float_raise( float_flag_invalid ); 2146 1.1 bjh21 return float64_default_nan; 2147 1.1 bjh21 } 2148 1.1 bjh21 if ( aExp == 0 ) { 2149 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); 2150 1.1 bjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2151 1.1 bjh21 } 2152 1.1 bjh21 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 2153 1.1 bjh21 aSig0 |= 0x00100000; 2154 1.1 bjh21 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); 2155 1.1 bjh21 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; 2156 1.1 bjh21 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; 2157 1.1 bjh21 doubleZSig0 = zSig0 + zSig0; 2158 1.1 bjh21 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); 2159 1.1 bjh21 mul32To64( zSig0, zSig0, &term0, &term1 ); 2160 1.1 bjh21 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2161 1.1 bjh21 while ( (sbits32) rem0 < 0 ) { 2162 1.1 bjh21 --zSig0; 2163 1.1 bjh21 doubleZSig0 -= 2; 2164 1.1 bjh21 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); 2165 1.1 bjh21 } 2166 1.1 bjh21 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); 2167 1.1 bjh21 if ( ( zSig1 & 0x1FF ) <= 5 ) { 2168 1.1 bjh21 if ( zSig1 == 0 ) zSig1 = 1; 2169 1.1 bjh21 mul32To64( doubleZSig0, zSig1, &term1, &term2 ); 2170 1.1 bjh21 sub64( rem1, 0, term1, term2, &rem1, &rem2 ); 2171 1.1 bjh21 mul32To64( zSig1, zSig1, &term2, &term3 ); 2172 1.1 bjh21 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2173 1.1 bjh21 while ( (sbits32) rem1 < 0 ) { 2174 1.1 bjh21 --zSig1; 2175 1.1 bjh21 shortShift64Left( 0, zSig1, 1, &term2, &term3 ); 2176 1.1 bjh21 term3 |= 1; 2177 1.1 bjh21 term2 |= doubleZSig0; 2178 1.1 bjh21 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2179 1.1 bjh21 } 2180 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2181 1.1 bjh21 } 2182 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); 2183 1.1 bjh21 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); 2184 1.1 bjh21 2185 1.1 bjh21 } 2186 1.1 bjh21 #endif 2187 1.1 bjh21 2188 1.1 bjh21 /* 2189 1.1 bjh21 ------------------------------------------------------------------------------- 2190 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to 2191 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2192 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2193 1.1 bjh21 ------------------------------------------------------------------------------- 2194 1.1 bjh21 */ 2195 1.1 bjh21 flag float64_eq( float64 a, float64 b ) 2196 1.1 bjh21 { 2197 1.1 bjh21 2198 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2199 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2200 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2201 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2202 1.1 bjh21 ) { 2203 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2204 1.1 bjh21 float_raise( float_flag_invalid ); 2205 1.1 bjh21 } 2206 1.1 bjh21 return 0; 2207 1.1 bjh21 } 2208 1.1 bjh21 return ( a == b ) || 2209 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 2210 1.1 bjh21 2211 1.1 bjh21 } 2212 1.1 bjh21 2213 1.1 bjh21 /* 2214 1.1 bjh21 ------------------------------------------------------------------------------- 2215 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than 2216 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 2217 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2218 1.1 bjh21 Arithmetic. 2219 1.1 bjh21 ------------------------------------------------------------------------------- 2220 1.1 bjh21 */ 2221 1.1 bjh21 flag float64_le( float64 a, float64 b ) 2222 1.1 bjh21 { 2223 1.1 bjh21 flag aSign, bSign; 2224 1.1 bjh21 2225 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2226 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2227 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2228 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2229 1.1 bjh21 ) { 2230 1.1 bjh21 float_raise( float_flag_invalid ); 2231 1.1 bjh21 return 0; 2232 1.1 bjh21 } 2233 1.1 bjh21 aSign = extractFloat64Sign( a ); 2234 1.1 bjh21 bSign = extractFloat64Sign( b ); 2235 1.1 bjh21 if ( aSign != bSign ) 2236 1.1 bjh21 return aSign || 2237 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 2238 1.1 bjh21 0 ); 2239 1.1 bjh21 return ( a == b ) || 2240 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2241 1.1 bjh21 } 2242 1.1 bjh21 2243 1.1 bjh21 /* 2244 1.1 bjh21 ------------------------------------------------------------------------------- 2245 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than 2246 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2247 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2248 1.1 bjh21 ------------------------------------------------------------------------------- 2249 1.1 bjh21 */ 2250 1.1 bjh21 flag float64_lt( float64 a, float64 b ) 2251 1.1 bjh21 { 2252 1.1 bjh21 flag aSign, bSign; 2253 1.1 bjh21 2254 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2255 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2256 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2257 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2258 1.1 bjh21 ) { 2259 1.1 bjh21 float_raise( float_flag_invalid ); 2260 1.1 bjh21 return 0; 2261 1.1 bjh21 } 2262 1.1 bjh21 aSign = extractFloat64Sign( a ); 2263 1.1 bjh21 bSign = extractFloat64Sign( b ); 2264 1.1 bjh21 if ( aSign != bSign ) 2265 1.1 bjh21 return aSign && 2266 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 2267 1.1 bjh21 0 ); 2268 1.1 bjh21 return ( a != b ) && 2269 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2270 1.1 bjh21 2271 1.1 bjh21 } 2272 1.1 bjh21 2273 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 2274 1.1 bjh21 /* 2275 1.1 bjh21 ------------------------------------------------------------------------------- 2276 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to 2277 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 2278 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 2279 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2280 1.1 bjh21 ------------------------------------------------------------------------------- 2281 1.1 bjh21 */ 2282 1.1 bjh21 flag float64_eq_signaling( float64 a, float64 b ) 2283 1.1 bjh21 { 2284 1.1 bjh21 2285 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2286 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2287 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2288 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2289 1.1 bjh21 ) { 2290 1.1 bjh21 float_raise( float_flag_invalid ); 2291 1.1 bjh21 return 0; 2292 1.1 bjh21 } 2293 1.1 bjh21 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2294 1.1 bjh21 2295 1.1 bjh21 } 2296 1.1 bjh21 2297 1.1 bjh21 /* 2298 1.1 bjh21 ------------------------------------------------------------------------------- 2299 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than or 2300 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2301 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the 2302 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2303 1.1 bjh21 ------------------------------------------------------------------------------- 2304 1.1 bjh21 */ 2305 1.1 bjh21 flag float64_le_quiet( float64 a, float64 b ) 2306 1.1 bjh21 { 2307 1.1 bjh21 flag aSign, bSign; 2308 1.1 bjh21 2309 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2310 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2311 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2312 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2313 1.1 bjh21 ) { 2314 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2315 1.1 bjh21 float_raise( float_flag_invalid ); 2316 1.1 bjh21 } 2317 1.1 bjh21 return 0; 2318 1.1 bjh21 } 2319 1.1 bjh21 aSign = extractFloat64Sign( a ); 2320 1.1 bjh21 bSign = extractFloat64Sign( b ); 2321 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2322 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 2323 1.1 bjh21 2324 1.1 bjh21 } 2325 1.1 bjh21 2326 1.1 bjh21 /* 2327 1.1 bjh21 ------------------------------------------------------------------------------- 2328 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than 2329 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2330 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 2331 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 2332 1.1 bjh21 ------------------------------------------------------------------------------- 2333 1.1 bjh21 */ 2334 1.1 bjh21 flag float64_lt_quiet( float64 a, float64 b ) 2335 1.1 bjh21 { 2336 1.1 bjh21 flag aSign, bSign; 2337 1.1 bjh21 2338 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2339 1.1 bjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2340 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2341 1.1 bjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2342 1.1 bjh21 ) { 2343 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2344 1.1 bjh21 float_raise( float_flag_invalid ); 2345 1.1 bjh21 } 2346 1.1 bjh21 return 0; 2347 1.1 bjh21 } 2348 1.1 bjh21 aSign = extractFloat64Sign( a ); 2349 1.1 bjh21 bSign = extractFloat64Sign( b ); 2350 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 2351 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 2352 1.1 bjh21 2353 1.1 bjh21 } 2354 1.1 bjh21 2355 1.1 bjh21 #endif 2356