1 1.16 nat /* $NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $ */ 2 1.1 bjh21 3 1.1 bjh21 /* 4 1.1 bjh21 * This version hacked for use with gcc -msoft-float by bjh21. 5 1.1 bjh21 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6 1.1 bjh21 * itself). 7 1.1 bjh21 */ 8 1.1 bjh21 9 1.1 bjh21 /* 10 1.1 bjh21 * Things you may want to define: 11 1.1 bjh21 * 12 1.1 bjh21 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13 1.1 bjh21 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14 1.1 bjh21 * properly renamed. 15 1.1 bjh21 */ 16 1.1 bjh21 17 1.1 bjh21 /* 18 1.1 bjh21 =============================================================================== 19 1.1 bjh21 20 1.1 bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-point 21 1.1 bjh21 Arithmetic Package, Release 2a. 22 1.1 bjh21 23 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the 24 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center 25 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the 26 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version 27 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector 28 1.1 bjh21 processor in collaboration with the University of California at Berkeley, 29 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 1.1 bjh21 arithmetic/SoftFloat.html'. 32 1.1 bjh21 33 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 1.1 bjh21 39 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as 40 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they 41 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of 42 1.1 bjh21 this code that are retained. 43 1.1 bjh21 44 1.1 bjh21 =============================================================================== 45 1.1 bjh21 */ 46 1.1 bjh21 47 1.1 bjh21 #include <sys/cdefs.h> 48 1.1 bjh21 #if defined(LIBC_SCCS) && !defined(lint) 49 1.16 nat __RCSID("$NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $"); 50 1.1 bjh21 #endif /* LIBC_SCCS and not lint */ 51 1.1 bjh21 52 1.1 bjh21 #ifdef SOFTFLOAT_FOR_GCC 53 1.1 bjh21 #include "softfloat-for-gcc.h" 54 1.1 bjh21 #endif 55 1.1 bjh21 56 1.1 bjh21 #include "milieu.h" 57 1.1 bjh21 #include "softfloat.h" 58 1.1 bjh21 59 1.1 bjh21 /* 60 1.1 bjh21 * Conversions between floats as stored in memory and floats as 61 1.1 bjh21 * SoftFloat uses them 62 1.1 bjh21 */ 63 1.1 bjh21 #ifndef FLOAT64_DEMANGLE 64 1.1 bjh21 #define FLOAT64_DEMANGLE(a) (a) 65 1.1 bjh21 #endif 66 1.1 bjh21 #ifndef FLOAT64_MANGLE 67 1.1 bjh21 #define FLOAT64_MANGLE(a) (a) 68 1.1 bjh21 #endif 69 1.1 bjh21 70 1.16 nat #ifndef X80SHIFT 71 1.16 nat #define X80SHIFT 0 72 1.16 nat #endif 73 1.1 bjh21 /* 74 1.1 bjh21 ------------------------------------------------------------------------------- 75 1.1 bjh21 Floating-point rounding mode, extended double-precision rounding precision, 76 1.1 bjh21 and exception flags. 77 1.1 bjh21 ------------------------------------------------------------------------------- 78 1.1 bjh21 */ 79 1.12 matt #ifndef set_float_rounding_mode 80 1.1 bjh21 fp_rnd float_rounding_mode = float_round_nearest_even; 81 1.1 bjh21 fp_except float_exception_flags = 0; 82 1.12 matt #endif 83 1.12 matt #ifndef set_float_exception_inexact_flag 84 1.12 matt #define set_float_exception_inexact_flag() \ 85 1.12 matt ((void)(float_exception_flags |= float_flag_inexact)) 86 1.12 matt #endif 87 1.1 bjh21 #ifdef FLOATX80 88 1.1 bjh21 int8 floatx80_rounding_precision = 80; 89 1.1 bjh21 #endif 90 1.1 bjh21 91 1.1 bjh21 /* 92 1.1 bjh21 ------------------------------------------------------------------------------- 93 1.1 bjh21 Primitive arithmetic functions, including multi-word arithmetic, and 94 1.1 bjh21 division and square root approximations. (Can be specialized to target if 95 1.1 bjh21 desired.) 96 1.1 bjh21 ------------------------------------------------------------------------------- 97 1.1 bjh21 */ 98 1.1 bjh21 #include "softfloat-macros" 99 1.1 bjh21 100 1.1 bjh21 /* 101 1.1 bjh21 ------------------------------------------------------------------------------- 102 1.1 bjh21 Functions and definitions to determine: (1) whether tininess for underflow 103 1.1 bjh21 is detected before or after rounding by default, (2) what (if anything) 104 1.1 bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished 105 1.1 bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 106 1.1 bjh21 are propagated from function inputs to output. These details are target- 107 1.1 bjh21 specific. 108 1.1 bjh21 ------------------------------------------------------------------------------- 109 1.1 bjh21 */ 110 1.1 bjh21 #include "softfloat-specialize" 111 1.1 bjh21 112 1.1 bjh21 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 113 1.1 bjh21 /* 114 1.1 bjh21 ------------------------------------------------------------------------------- 115 1.1 bjh21 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 116 1.1 bjh21 and 7, and returns the properly rounded 32-bit integer corresponding to the 117 1.1 bjh21 input. If `zSign' is 1, the input is negated before being converted to an 118 1.1 bjh21 integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 119 1.1 bjh21 is simply rounded to an integer, with the inexact exception raised if the 120 1.1 bjh21 input cannot be represented exactly as an integer. However, if the fixed- 121 1.1 bjh21 point input is too large, the invalid exception is raised and the largest 122 1.1 bjh21 positive or negative integer is returned. 123 1.1 bjh21 ------------------------------------------------------------------------------- 124 1.1 bjh21 */ 125 1.1 bjh21 static int32 roundAndPackInt32( flag zSign, bits64 absZ ) 126 1.1 bjh21 { 127 1.1 bjh21 int8 roundingMode; 128 1.1 bjh21 flag roundNearestEven; 129 1.1 bjh21 int8 roundIncrement, roundBits; 130 1.1 bjh21 int32 z; 131 1.1 bjh21 132 1.1 bjh21 roundingMode = float_rounding_mode; 133 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 134 1.1 bjh21 roundIncrement = 0x40; 135 1.1 bjh21 if ( ! roundNearestEven ) { 136 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 137 1.1 bjh21 roundIncrement = 0; 138 1.1 bjh21 } 139 1.1 bjh21 else { 140 1.1 bjh21 roundIncrement = 0x7F; 141 1.1 bjh21 if ( zSign ) { 142 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 143 1.1 bjh21 } 144 1.1 bjh21 else { 145 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 146 1.1 bjh21 } 147 1.1 bjh21 } 148 1.1 bjh21 } 149 1.10 christos roundBits = (int8)(absZ & 0x7F); 150 1.1 bjh21 absZ = ( absZ + roundIncrement )>>7; 151 1.1 bjh21 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 152 1.10 christos z = (int32)absZ; 153 1.1 bjh21 if ( zSign ) z = - z; 154 1.1 bjh21 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 155 1.1 bjh21 float_raise( float_flag_invalid ); 156 1.1 bjh21 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 157 1.1 bjh21 } 158 1.12 matt if ( roundBits ) set_float_exception_inexact_flag(); 159 1.1 bjh21 return z; 160 1.1 bjh21 161 1.1 bjh21 } 162 1.1 bjh21 163 1.1 bjh21 /* 164 1.1 bjh21 ------------------------------------------------------------------------------- 165 1.1 bjh21 Takes the 128-bit fixed-point value formed by concatenating `absZ0' and 166 1.1 bjh21 `absZ1', with binary point between bits 63 and 64 (between the input words), 167 1.1 bjh21 and returns the properly rounded 64-bit integer corresponding to the input. 168 1.1 bjh21 If `zSign' is 1, the input is negated before being converted to an integer. 169 1.1 bjh21 Ordinarily, the fixed-point input is simply rounded to an integer, with 170 1.1 bjh21 the inexact exception raised if the input cannot be represented exactly as 171 1.1 bjh21 an integer. However, if the fixed-point input is too large, the invalid 172 1.1 bjh21 exception is raised and the largest positive or negative integer is 173 1.1 bjh21 returned. 174 1.1 bjh21 ------------------------------------------------------------------------------- 175 1.1 bjh21 */ 176 1.1 bjh21 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) 177 1.1 bjh21 { 178 1.1 bjh21 int8 roundingMode; 179 1.1 bjh21 flag roundNearestEven, increment; 180 1.1 bjh21 int64 z; 181 1.1 bjh21 182 1.1 bjh21 roundingMode = float_rounding_mode; 183 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 184 1.1 bjh21 increment = ( (sbits64) absZ1 < 0 ); 185 1.1 bjh21 if ( ! roundNearestEven ) { 186 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 187 1.1 bjh21 increment = 0; 188 1.1 bjh21 } 189 1.1 bjh21 else { 190 1.1 bjh21 if ( zSign ) { 191 1.1 bjh21 increment = ( roundingMode == float_round_down ) && absZ1; 192 1.1 bjh21 } 193 1.1 bjh21 else { 194 1.1 bjh21 increment = ( roundingMode == float_round_up ) && absZ1; 195 1.1 bjh21 } 196 1.1 bjh21 } 197 1.1 bjh21 } 198 1.1 bjh21 if ( increment ) { 199 1.1 bjh21 ++absZ0; 200 1.1 bjh21 if ( absZ0 == 0 ) goto overflow; 201 1.1 bjh21 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); 202 1.1 bjh21 } 203 1.1 bjh21 z = absZ0; 204 1.1 bjh21 if ( zSign ) z = - z; 205 1.1 bjh21 if ( z && ( ( z < 0 ) ^ zSign ) ) { 206 1.1 bjh21 overflow: 207 1.1 bjh21 float_raise( float_flag_invalid ); 208 1.1 bjh21 return 209 1.1 bjh21 zSign ? (sbits64) LIT64( 0x8000000000000000 ) 210 1.1 bjh21 : LIT64( 0x7FFFFFFFFFFFFFFF ); 211 1.1 bjh21 } 212 1.12 matt if ( absZ1 ) set_float_exception_inexact_flag(); 213 1.1 bjh21 return z; 214 1.1 bjh21 215 1.1 bjh21 } 216 1.1 bjh21 #endif 217 1.1 bjh21 218 1.1 bjh21 /* 219 1.1 bjh21 ------------------------------------------------------------------------------- 220 1.1 bjh21 Returns the fraction bits of the single-precision floating-point value `a'. 221 1.1 bjh21 ------------------------------------------------------------------------------- 222 1.1 bjh21 */ 223 1.1 bjh21 INLINE bits32 extractFloat32Frac( float32 a ) 224 1.1 bjh21 { 225 1.1 bjh21 226 1.1 bjh21 return a & 0x007FFFFF; 227 1.1 bjh21 228 1.1 bjh21 } 229 1.1 bjh21 230 1.1 bjh21 /* 231 1.1 bjh21 ------------------------------------------------------------------------------- 232 1.1 bjh21 Returns the exponent bits of the single-precision floating-point value `a'. 233 1.1 bjh21 ------------------------------------------------------------------------------- 234 1.1 bjh21 */ 235 1.1 bjh21 INLINE int16 extractFloat32Exp( float32 a ) 236 1.1 bjh21 { 237 1.1 bjh21 238 1.1 bjh21 return ( a>>23 ) & 0xFF; 239 1.1 bjh21 240 1.1 bjh21 } 241 1.1 bjh21 242 1.1 bjh21 /* 243 1.1 bjh21 ------------------------------------------------------------------------------- 244 1.1 bjh21 Returns the sign bit of the single-precision floating-point value `a'. 245 1.1 bjh21 ------------------------------------------------------------------------------- 246 1.1 bjh21 */ 247 1.1 bjh21 INLINE flag extractFloat32Sign( float32 a ) 248 1.1 bjh21 { 249 1.1 bjh21 250 1.1 bjh21 return a>>31; 251 1.1 bjh21 252 1.1 bjh21 } 253 1.1 bjh21 254 1.1 bjh21 /* 255 1.1 bjh21 ------------------------------------------------------------------------------- 256 1.1 bjh21 Normalizes the subnormal single-precision floating-point value represented 257 1.1 bjh21 by the denormalized significand `aSig'. The normalized exponent and 258 1.1 bjh21 significand are stored at the locations pointed to by `zExpPtr' and 259 1.1 bjh21 `zSigPtr', respectively. 260 1.1 bjh21 ------------------------------------------------------------------------------- 261 1.1 bjh21 */ 262 1.1 bjh21 static void 263 1.1 bjh21 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 264 1.1 bjh21 { 265 1.1 bjh21 int8 shiftCount; 266 1.1 bjh21 267 1.1 bjh21 shiftCount = countLeadingZeros32( aSig ) - 8; 268 1.1 bjh21 *zSigPtr = aSig<<shiftCount; 269 1.1 bjh21 *zExpPtr = 1 - shiftCount; 270 1.1 bjh21 271 1.1 bjh21 } 272 1.1 bjh21 273 1.1 bjh21 /* 274 1.1 bjh21 ------------------------------------------------------------------------------- 275 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 276 1.1 bjh21 single-precision floating-point value, returning the result. After being 277 1.1 bjh21 shifted into the proper positions, the three fields are simply added 278 1.1 bjh21 together to form the result. This means that any integer portion of `zSig' 279 1.1 bjh21 will be added into the exponent. Since a properly normalized significand 280 1.1 bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less 281 1.1 bjh21 than the desired result exponent whenever `zSig' is a complete, normalized 282 1.1 bjh21 significand. 283 1.1 bjh21 ------------------------------------------------------------------------------- 284 1.1 bjh21 */ 285 1.1 bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 286 1.1 bjh21 { 287 1.1 bjh21 288 1.1 bjh21 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 289 1.1 bjh21 290 1.1 bjh21 } 291 1.1 bjh21 292 1.1 bjh21 /* 293 1.1 bjh21 ------------------------------------------------------------------------------- 294 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 295 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating- 296 1.1 bjh21 point value corresponding to the abstract input. Ordinarily, the abstract 297 1.1 bjh21 value is simply rounded and packed into the single-precision format, with 298 1.1 bjh21 the inexact exception raised if the abstract input cannot be represented 299 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and 300 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is 301 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to 302 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if 303 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal single- 304 1.1 bjh21 precision floating-point number. 305 1.1 bjh21 The input significand `zSig' has its binary point between bits 30 306 1.1 bjh21 and 29, which is 7 bits to the left of the usual location. This shifted 307 1.1 bjh21 significand must be normalized or smaller. If `zSig' is not normalized, 308 1.1 bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number, 309 1.1 bjh21 and it must not require rounding. In the usual case that `zSig' is 310 1.1 bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 311 1.1 bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for 312 1.1 bjh21 Binary Floating-Point Arithmetic. 313 1.1 bjh21 ------------------------------------------------------------------------------- 314 1.1 bjh21 */ 315 1.1 bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 316 1.1 bjh21 { 317 1.1 bjh21 int8 roundingMode; 318 1.1 bjh21 flag roundNearestEven; 319 1.1 bjh21 int8 roundIncrement, roundBits; 320 1.1 bjh21 flag isTiny; 321 1.1 bjh21 322 1.1 bjh21 roundingMode = float_rounding_mode; 323 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 324 1.1 bjh21 roundIncrement = 0x40; 325 1.1 bjh21 if ( ! roundNearestEven ) { 326 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 327 1.1 bjh21 roundIncrement = 0; 328 1.1 bjh21 } 329 1.1 bjh21 else { 330 1.1 bjh21 roundIncrement = 0x7F; 331 1.1 bjh21 if ( zSign ) { 332 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 333 1.1 bjh21 } 334 1.1 bjh21 else { 335 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 336 1.1 bjh21 } 337 1.1 bjh21 } 338 1.1 bjh21 } 339 1.1 bjh21 roundBits = zSig & 0x7F; 340 1.1 bjh21 if ( 0xFD <= (bits16) zExp ) { 341 1.1 bjh21 if ( ( 0xFD < zExp ) 342 1.1 bjh21 || ( ( zExp == 0xFD ) 343 1.1 bjh21 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 344 1.1 bjh21 ) { 345 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 346 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 347 1.1 bjh21 } 348 1.1 bjh21 if ( zExp < 0 ) { 349 1.1 bjh21 isTiny = 350 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 351 1.1 bjh21 || ( zExp < -1 ) 352 1.10 christos || ( zSig + roundIncrement < 0x80000000U ); 353 1.1 bjh21 shift32RightJamming( zSig, - zExp, &zSig ); 354 1.1 bjh21 zExp = 0; 355 1.1 bjh21 roundBits = zSig & 0x7F; 356 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 357 1.1 bjh21 } 358 1.1 bjh21 } 359 1.12 matt if ( roundBits ) set_float_exception_inexact_flag(); 360 1.1 bjh21 zSig = ( zSig + roundIncrement )>>7; 361 1.1 bjh21 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 362 1.1 bjh21 if ( zSig == 0 ) zExp = 0; 363 1.1 bjh21 return packFloat32( zSign, zExp, zSig ); 364 1.1 bjh21 365 1.1 bjh21 } 366 1.1 bjh21 367 1.1 bjh21 /* 368 1.1 bjh21 ------------------------------------------------------------------------------- 369 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 370 1.1 bjh21 and significand `zSig', and returns the proper single-precision floating- 371 1.1 bjh21 point value corresponding to the abstract input. This routine is just like 372 1.1 bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized. 373 1.1 bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 374 1.1 bjh21 floating-point exponent. 375 1.1 bjh21 ------------------------------------------------------------------------------- 376 1.1 bjh21 */ 377 1.1 bjh21 static float32 378 1.1 bjh21 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 379 1.1 bjh21 { 380 1.1 bjh21 int8 shiftCount; 381 1.1 bjh21 382 1.1 bjh21 shiftCount = countLeadingZeros32( zSig ) - 1; 383 1.1 bjh21 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 384 1.1 bjh21 385 1.1 bjh21 } 386 1.1 bjh21 387 1.1 bjh21 /* 388 1.1 bjh21 ------------------------------------------------------------------------------- 389 1.1 bjh21 Returns the fraction bits of the double-precision floating-point value `a'. 390 1.1 bjh21 ------------------------------------------------------------------------------- 391 1.1 bjh21 */ 392 1.1 bjh21 INLINE bits64 extractFloat64Frac( float64 a ) 393 1.1 bjh21 { 394 1.1 bjh21 395 1.1 bjh21 return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF ); 396 1.1 bjh21 397 1.1 bjh21 } 398 1.1 bjh21 399 1.1 bjh21 /* 400 1.1 bjh21 ------------------------------------------------------------------------------- 401 1.1 bjh21 Returns the exponent bits of the double-precision floating-point value `a'. 402 1.1 bjh21 ------------------------------------------------------------------------------- 403 1.1 bjh21 */ 404 1.1 bjh21 INLINE int16 extractFloat64Exp( float64 a ) 405 1.1 bjh21 { 406 1.1 bjh21 407 1.10 christos return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); 408 1.1 bjh21 409 1.1 bjh21 } 410 1.1 bjh21 411 1.1 bjh21 /* 412 1.1 bjh21 ------------------------------------------------------------------------------- 413 1.1 bjh21 Returns the sign bit of the double-precision floating-point value `a'. 414 1.1 bjh21 ------------------------------------------------------------------------------- 415 1.1 bjh21 */ 416 1.1 bjh21 INLINE flag extractFloat64Sign( float64 a ) 417 1.1 bjh21 { 418 1.1 bjh21 419 1.10 christos return (flag)(FLOAT64_DEMANGLE(a) >> 63); 420 1.1 bjh21 421 1.1 bjh21 } 422 1.1 bjh21 423 1.1 bjh21 /* 424 1.1 bjh21 ------------------------------------------------------------------------------- 425 1.1 bjh21 Normalizes the subnormal double-precision floating-point value represented 426 1.1 bjh21 by the denormalized significand `aSig'. The normalized exponent and 427 1.1 bjh21 significand are stored at the locations pointed to by `zExpPtr' and 428 1.1 bjh21 `zSigPtr', respectively. 429 1.1 bjh21 ------------------------------------------------------------------------------- 430 1.1 bjh21 */ 431 1.1 bjh21 static void 432 1.1 bjh21 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 433 1.1 bjh21 { 434 1.1 bjh21 int8 shiftCount; 435 1.1 bjh21 436 1.1 bjh21 shiftCount = countLeadingZeros64( aSig ) - 11; 437 1.1 bjh21 *zSigPtr = aSig<<shiftCount; 438 1.1 bjh21 *zExpPtr = 1 - shiftCount; 439 1.1 bjh21 440 1.1 bjh21 } 441 1.1 bjh21 442 1.1 bjh21 /* 443 1.1 bjh21 ------------------------------------------------------------------------------- 444 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 445 1.1 bjh21 double-precision floating-point value, returning the result. After being 446 1.1 bjh21 shifted into the proper positions, the three fields are simply added 447 1.1 bjh21 together to form the result. This means that any integer portion of `zSig' 448 1.1 bjh21 will be added into the exponent. Since a properly normalized significand 449 1.1 bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less 450 1.1 bjh21 than the desired result exponent whenever `zSig' is a complete, normalized 451 1.1 bjh21 significand. 452 1.1 bjh21 ------------------------------------------------------------------------------- 453 1.1 bjh21 */ 454 1.1 bjh21 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 455 1.1 bjh21 { 456 1.1 bjh21 457 1.1 bjh21 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 458 1.1 bjh21 ( ( (bits64) zExp )<<52 ) + zSig ); 459 1.1 bjh21 460 1.1 bjh21 } 461 1.1 bjh21 462 1.1 bjh21 /* 463 1.1 bjh21 ------------------------------------------------------------------------------- 464 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 465 1.1 bjh21 and significand `zSig', and returns the proper double-precision floating- 466 1.1 bjh21 point value corresponding to the abstract input. Ordinarily, the abstract 467 1.1 bjh21 value is simply rounded and packed into the double-precision format, with 468 1.1 bjh21 the inexact exception raised if the abstract input cannot be represented 469 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and 470 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is 471 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to 472 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if 473 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal double- 474 1.1 bjh21 precision floating-point number. 475 1.1 bjh21 The input significand `zSig' has its binary point between bits 62 476 1.1 bjh21 and 61, which is 10 bits to the left of the usual location. This shifted 477 1.1 bjh21 significand must be normalized or smaller. If `zSig' is not normalized, 478 1.1 bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number, 479 1.1 bjh21 and it must not require rounding. In the usual case that `zSig' is 480 1.1 bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 481 1.1 bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for 482 1.1 bjh21 Binary Floating-Point Arithmetic. 483 1.1 bjh21 ------------------------------------------------------------------------------- 484 1.1 bjh21 */ 485 1.1 bjh21 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 486 1.1 bjh21 { 487 1.1 bjh21 int8 roundingMode; 488 1.1 bjh21 flag roundNearestEven; 489 1.1 bjh21 int16 roundIncrement, roundBits; 490 1.1 bjh21 flag isTiny; 491 1.1 bjh21 492 1.1 bjh21 roundingMode = float_rounding_mode; 493 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 494 1.1 bjh21 roundIncrement = 0x200; 495 1.1 bjh21 if ( ! roundNearestEven ) { 496 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 497 1.1 bjh21 roundIncrement = 0; 498 1.1 bjh21 } 499 1.1 bjh21 else { 500 1.1 bjh21 roundIncrement = 0x3FF; 501 1.1 bjh21 if ( zSign ) { 502 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 503 1.1 bjh21 } 504 1.1 bjh21 else { 505 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 506 1.1 bjh21 } 507 1.1 bjh21 } 508 1.1 bjh21 } 509 1.10 christos roundBits = (int16)(zSig & 0x3FF); 510 1.1 bjh21 if ( 0x7FD <= (bits16) zExp ) { 511 1.1 bjh21 if ( ( 0x7FD < zExp ) 512 1.1 bjh21 || ( ( zExp == 0x7FD ) 513 1.1 bjh21 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 514 1.1 bjh21 ) { 515 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 516 1.1 bjh21 return FLOAT64_MANGLE( 517 1.1 bjh21 FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) - 518 1.1 bjh21 ( roundIncrement == 0 )); 519 1.1 bjh21 } 520 1.1 bjh21 if ( zExp < 0 ) { 521 1.1 bjh21 isTiny = 522 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 523 1.1 bjh21 || ( zExp < -1 ) 524 1.10 christos || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) ); 525 1.1 bjh21 shift64RightJamming( zSig, - zExp, &zSig ); 526 1.1 bjh21 zExp = 0; 527 1.10 christos roundBits = (int16)(zSig & 0x3FF); 528 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 529 1.1 bjh21 } 530 1.1 bjh21 } 531 1.12 matt if ( roundBits ) set_float_exception_inexact_flag(); 532 1.1 bjh21 zSig = ( zSig + roundIncrement )>>10; 533 1.1 bjh21 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 534 1.1 bjh21 if ( zSig == 0 ) zExp = 0; 535 1.1 bjh21 return packFloat64( zSign, zExp, zSig ); 536 1.1 bjh21 537 1.1 bjh21 } 538 1.1 bjh21 539 1.1 bjh21 /* 540 1.1 bjh21 ------------------------------------------------------------------------------- 541 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 542 1.1 bjh21 and significand `zSig', and returns the proper double-precision floating- 543 1.1 bjh21 point value corresponding to the abstract input. This routine is just like 544 1.1 bjh21 `roundAndPackFloat64' except that `zSig' does not have to be normalized. 545 1.1 bjh21 Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 546 1.1 bjh21 floating-point exponent. 547 1.1 bjh21 ------------------------------------------------------------------------------- 548 1.1 bjh21 */ 549 1.1 bjh21 static float64 550 1.1 bjh21 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 551 1.1 bjh21 { 552 1.1 bjh21 int8 shiftCount; 553 1.1 bjh21 554 1.1 bjh21 shiftCount = countLeadingZeros64( zSig ) - 1; 555 1.1 bjh21 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); 556 1.1 bjh21 557 1.1 bjh21 } 558 1.1 bjh21 559 1.1 bjh21 #ifdef FLOATX80 560 1.1 bjh21 561 1.1 bjh21 /* 562 1.1 bjh21 ------------------------------------------------------------------------------- 563 1.1 bjh21 Returns the fraction bits of the extended double-precision floating-point 564 1.1 bjh21 value `a'. 565 1.1 bjh21 ------------------------------------------------------------------------------- 566 1.1 bjh21 */ 567 1.1 bjh21 INLINE bits64 extractFloatx80Frac( floatx80 a ) 568 1.1 bjh21 { 569 1.1 bjh21 570 1.1 bjh21 return a.low; 571 1.1 bjh21 572 1.1 bjh21 } 573 1.1 bjh21 574 1.1 bjh21 /* 575 1.1 bjh21 ------------------------------------------------------------------------------- 576 1.1 bjh21 Returns the exponent bits of the extended double-precision floating-point 577 1.1 bjh21 value `a'. 578 1.1 bjh21 ------------------------------------------------------------------------------- 579 1.1 bjh21 */ 580 1.1 bjh21 INLINE int32 extractFloatx80Exp( floatx80 a ) 581 1.1 bjh21 { 582 1.1 bjh21 583 1.16 nat return (a.high>>X80SHIFT) & 0x7FFF; 584 1.1 bjh21 585 1.1 bjh21 } 586 1.1 bjh21 587 1.1 bjh21 /* 588 1.1 bjh21 ------------------------------------------------------------------------------- 589 1.1 bjh21 Returns the sign bit of the extended double-precision floating-point value 590 1.1 bjh21 `a'. 591 1.1 bjh21 ------------------------------------------------------------------------------- 592 1.1 bjh21 */ 593 1.1 bjh21 INLINE flag extractFloatx80Sign( floatx80 a ) 594 1.1 bjh21 { 595 1.1 bjh21 596 1.16 nat return a.high>>(15 + X80SHIFT); 597 1.1 bjh21 598 1.1 bjh21 } 599 1.1 bjh21 600 1.1 bjh21 /* 601 1.1 bjh21 ------------------------------------------------------------------------------- 602 1.1 bjh21 Normalizes the subnormal extended double-precision floating-point value 603 1.1 bjh21 represented by the denormalized significand `aSig'. The normalized exponent 604 1.1 bjh21 and significand are stored at the locations pointed to by `zExpPtr' and 605 1.1 bjh21 `zSigPtr', respectively. 606 1.1 bjh21 ------------------------------------------------------------------------------- 607 1.1 bjh21 */ 608 1.1 bjh21 static void 609 1.1 bjh21 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 610 1.1 bjh21 { 611 1.1 bjh21 int8 shiftCount; 612 1.1 bjh21 613 1.1 bjh21 shiftCount = countLeadingZeros64( aSig ); 614 1.1 bjh21 *zSigPtr = aSig<<shiftCount; 615 1.1 bjh21 *zExpPtr = 1 - shiftCount; 616 1.1 bjh21 617 1.1 bjh21 } 618 1.1 bjh21 619 1.1 bjh21 /* 620 1.1 bjh21 ------------------------------------------------------------------------------- 621 1.1 bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into an 622 1.1 bjh21 extended double-precision floating-point value, returning the result. 623 1.1 bjh21 ------------------------------------------------------------------------------- 624 1.1 bjh21 */ 625 1.1 bjh21 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 626 1.1 bjh21 { 627 1.1 bjh21 floatx80 z; 628 1.1 bjh21 629 1.16 nat z.low = (bits64)zSig; 630 1.16 nat z.high = (bits32)( ( ( (bits32) zSign )<<15 ) + zExp )<<X80SHIFT; 631 1.1 bjh21 return z; 632 1.1 bjh21 633 1.1 bjh21 } 634 1.1 bjh21 635 1.1 bjh21 /* 636 1.1 bjh21 ------------------------------------------------------------------------------- 637 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 638 1.1 bjh21 and extended significand formed by the concatenation of `zSig0' and `zSig1', 639 1.1 bjh21 and returns the proper extended double-precision floating-point value 640 1.1 bjh21 corresponding to the abstract input. Ordinarily, the abstract value is 641 1.1 bjh21 rounded and packed into the extended double-precision format, with the 642 1.1 bjh21 inexact exception raised if the abstract input cannot be represented 643 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and 644 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is 645 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to 646 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if 647 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal extended 648 1.1 bjh21 double-precision floating-point number. 649 1.1 bjh21 If `roundingPrecision' is 32 or 64, the result is rounded to the same 650 1.1 bjh21 number of bits as single or double precision, respectively. Otherwise, the 651 1.1 bjh21 result is rounded to the full precision of the extended double-precision 652 1.1 bjh21 format. 653 1.1 bjh21 The input significand must be normalized or smaller. If the input 654 1.1 bjh21 significand is not normalized, `zExp' must be 0; in that case, the result 655 1.1 bjh21 returned is a subnormal number, and it must not require rounding. The 656 1.1 bjh21 handling of underflow and overflow follows the IEC/IEEE Standard for Binary 657 1.1 bjh21 Floating-Point Arithmetic. 658 1.1 bjh21 ------------------------------------------------------------------------------- 659 1.1 bjh21 */ 660 1.1 bjh21 static floatx80 661 1.1 bjh21 roundAndPackFloatx80( 662 1.1 bjh21 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 663 1.1 bjh21 ) 664 1.1 bjh21 { 665 1.1 bjh21 int8 roundingMode; 666 1.1 bjh21 flag roundNearestEven, increment, isTiny; 667 1.14 martin uint64 roundIncrement, roundMask, roundBits; 668 1.1 bjh21 669 1.1 bjh21 roundingMode = float_rounding_mode; 670 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 671 1.1 bjh21 if ( roundingPrecision == 80 ) goto precision80; 672 1.1 bjh21 if ( roundingPrecision == 64 ) { 673 1.1 bjh21 roundIncrement = LIT64( 0x0000000000000400 ); 674 1.1 bjh21 roundMask = LIT64( 0x00000000000007FF ); 675 1.1 bjh21 } 676 1.1 bjh21 else if ( roundingPrecision == 32 ) { 677 1.1 bjh21 roundIncrement = LIT64( 0x0000008000000000 ); 678 1.1 bjh21 roundMask = LIT64( 0x000000FFFFFFFFFF ); 679 1.1 bjh21 } 680 1.1 bjh21 else { 681 1.1 bjh21 goto precision80; 682 1.1 bjh21 } 683 1.1 bjh21 zSig0 |= ( zSig1 != 0 ); 684 1.1 bjh21 if ( ! roundNearestEven ) { 685 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 686 1.1 bjh21 roundIncrement = 0; 687 1.1 bjh21 } 688 1.1 bjh21 else { 689 1.1 bjh21 roundIncrement = roundMask; 690 1.1 bjh21 if ( zSign ) { 691 1.1 bjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 692 1.1 bjh21 } 693 1.1 bjh21 else { 694 1.1 bjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 695 1.1 bjh21 } 696 1.1 bjh21 } 697 1.1 bjh21 } 698 1.1 bjh21 roundBits = zSig0 & roundMask; 699 1.1 bjh21 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 700 1.1 bjh21 if ( ( 0x7FFE < zExp ) 701 1.1 bjh21 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 702 1.1 bjh21 ) { 703 1.1 bjh21 goto overflow; 704 1.1 bjh21 } 705 1.1 bjh21 if ( zExp <= 0 ) { 706 1.1 bjh21 isTiny = 707 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 708 1.1 bjh21 || ( zExp < 0 ) 709 1.1 bjh21 || ( zSig0 <= zSig0 + roundIncrement ); 710 1.1 bjh21 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 711 1.1 bjh21 zExp = 0; 712 1.1 bjh21 roundBits = zSig0 & roundMask; 713 1.1 bjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 714 1.12 matt if ( roundBits ) set_float_exception_inexact_flag(); 715 1.1 bjh21 zSig0 += roundIncrement; 716 1.1 bjh21 if ( (sbits64) zSig0 < 0 ) zExp = 1; 717 1.1 bjh21 roundIncrement = roundMask + 1; 718 1.1 bjh21 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 719 1.1 bjh21 roundMask |= roundIncrement; 720 1.1 bjh21 } 721 1.1 bjh21 zSig0 &= ~ roundMask; 722 1.1 bjh21 return packFloatx80( zSign, zExp, zSig0 ); 723 1.1 bjh21 } 724 1.1 bjh21 } 725 1.12 matt if ( roundBits ) set_float_exception_inexact_flag(); 726 1.1 bjh21 zSig0 += roundIncrement; 727 1.1 bjh21 if ( zSig0 < roundIncrement ) { 728 1.1 bjh21 ++zExp; 729 1.1 bjh21 zSig0 = LIT64( 0x8000000000000000 ); 730 1.1 bjh21 } 731 1.1 bjh21 roundIncrement = roundMask + 1; 732 1.1 bjh21 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 733 1.1 bjh21 roundMask |= roundIncrement; 734 1.1 bjh21 } 735 1.1 bjh21 zSig0 &= ~ roundMask; 736 1.1 bjh21 if ( zSig0 == 0 ) zExp = 0; 737 1.1 bjh21 return packFloatx80( zSign, zExp, zSig0 ); 738 1.1 bjh21 precision80: 739 1.1 bjh21 increment = ( (sbits64) zSig1 < 0 ); 740 1.1 bjh21 if ( ! roundNearestEven ) { 741 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 742 1.1 bjh21 increment = 0; 743 1.1 bjh21 } 744 1.1 bjh21 else { 745 1.1 bjh21 if ( zSign ) { 746 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig1; 747 1.1 bjh21 } 748 1.1 bjh21 else { 749 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig1; 750 1.1 bjh21 } 751 1.1 bjh21 } 752 1.1 bjh21 } 753 1.1 bjh21 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 754 1.1 bjh21 if ( ( 0x7FFE < zExp ) 755 1.1 bjh21 || ( ( zExp == 0x7FFE ) 756 1.1 bjh21 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 757 1.1 bjh21 && increment 758 1.1 bjh21 ) 759 1.1 bjh21 ) { 760 1.1 bjh21 roundMask = 0; 761 1.1 bjh21 overflow: 762 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 763 1.1 bjh21 if ( ( roundingMode == float_round_to_zero ) 764 1.1 bjh21 || ( zSign && ( roundingMode == float_round_up ) ) 765 1.1 bjh21 || ( ! zSign && ( roundingMode == float_round_down ) ) 766 1.1 bjh21 ) { 767 1.1 bjh21 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 768 1.1 bjh21 } 769 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 770 1.1 bjh21 } 771 1.1 bjh21 if ( zExp <= 0 ) { 772 1.1 bjh21 isTiny = 773 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 774 1.1 bjh21 || ( zExp < 0 ) 775 1.1 bjh21 || ! increment 776 1.1 bjh21 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 777 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 778 1.1 bjh21 zExp = 0; 779 1.1 bjh21 if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 780 1.12 matt if ( zSig1 ) set_float_exception_inexact_flag(); 781 1.1 bjh21 if ( roundNearestEven ) { 782 1.1 bjh21 increment = ( (sbits64) zSig1 < 0 ); 783 1.1 bjh21 } 784 1.1 bjh21 else { 785 1.1 bjh21 if ( zSign ) { 786 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig1; 787 1.1 bjh21 } 788 1.1 bjh21 else { 789 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig1; 790 1.1 bjh21 } 791 1.1 bjh21 } 792 1.1 bjh21 if ( increment ) { 793 1.1 bjh21 ++zSig0; 794 1.1 bjh21 zSig0 &= 795 1.1 bjh21 ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 796 1.1 bjh21 if ( (sbits64) zSig0 < 0 ) zExp = 1; 797 1.1 bjh21 } 798 1.1 bjh21 return packFloatx80( zSign, zExp, zSig0 ); 799 1.1 bjh21 } 800 1.1 bjh21 } 801 1.12 matt if ( zSig1 ) set_float_exception_inexact_flag(); 802 1.1 bjh21 if ( increment ) { 803 1.1 bjh21 ++zSig0; 804 1.1 bjh21 if ( zSig0 == 0 ) { 805 1.1 bjh21 ++zExp; 806 1.1 bjh21 zSig0 = LIT64( 0x8000000000000000 ); 807 1.1 bjh21 } 808 1.1 bjh21 else { 809 1.1 bjh21 zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 810 1.1 bjh21 } 811 1.1 bjh21 } 812 1.1 bjh21 else { 813 1.1 bjh21 if ( zSig0 == 0 ) zExp = 0; 814 1.1 bjh21 } 815 1.1 bjh21 return packFloatx80( zSign, zExp, zSig0 ); 816 1.1 bjh21 817 1.1 bjh21 } 818 1.1 bjh21 819 1.1 bjh21 /* 820 1.1 bjh21 ------------------------------------------------------------------------------- 821 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent 822 1.1 bjh21 `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 823 1.1 bjh21 and returns the proper extended double-precision floating-point value 824 1.1 bjh21 corresponding to the abstract input. This routine is just like 825 1.1 bjh21 `roundAndPackFloatx80' except that the input significand does not have to be 826 1.1 bjh21 normalized. 827 1.1 bjh21 ------------------------------------------------------------------------------- 828 1.1 bjh21 */ 829 1.1 bjh21 static floatx80 830 1.1 bjh21 normalizeRoundAndPackFloatx80( 831 1.1 bjh21 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 832 1.1 bjh21 ) 833 1.1 bjh21 { 834 1.1 bjh21 int8 shiftCount; 835 1.1 bjh21 836 1.1 bjh21 if ( zSig0 == 0 ) { 837 1.1 bjh21 zSig0 = zSig1; 838 1.1 bjh21 zSig1 = 0; 839 1.1 bjh21 zExp -= 64; 840 1.1 bjh21 } 841 1.1 bjh21 shiftCount = countLeadingZeros64( zSig0 ); 842 1.1 bjh21 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 843 1.1 bjh21 zExp -= shiftCount; 844 1.1 bjh21 return 845 1.1 bjh21 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 846 1.1 bjh21 847 1.1 bjh21 } 848 1.1 bjh21 849 1.1 bjh21 #endif 850 1.1 bjh21 851 1.1 bjh21 #ifdef FLOAT128 852 1.1 bjh21 853 1.1 bjh21 /* 854 1.1 bjh21 ------------------------------------------------------------------------------- 855 1.1 bjh21 Returns the least-significant 64 fraction bits of the quadruple-precision 856 1.1 bjh21 floating-point value `a'. 857 1.1 bjh21 ------------------------------------------------------------------------------- 858 1.1 bjh21 */ 859 1.1 bjh21 INLINE bits64 extractFloat128Frac1( float128 a ) 860 1.1 bjh21 { 861 1.1 bjh21 862 1.1 bjh21 return a.low; 863 1.1 bjh21 864 1.1 bjh21 } 865 1.1 bjh21 866 1.1 bjh21 /* 867 1.1 bjh21 ------------------------------------------------------------------------------- 868 1.1 bjh21 Returns the most-significant 48 fraction bits of the quadruple-precision 869 1.1 bjh21 floating-point value `a'. 870 1.1 bjh21 ------------------------------------------------------------------------------- 871 1.1 bjh21 */ 872 1.1 bjh21 INLINE bits64 extractFloat128Frac0( float128 a ) 873 1.1 bjh21 { 874 1.1 bjh21 875 1.1 bjh21 return a.high & LIT64( 0x0000FFFFFFFFFFFF ); 876 1.1 bjh21 877 1.1 bjh21 } 878 1.1 bjh21 879 1.1 bjh21 /* 880 1.1 bjh21 ------------------------------------------------------------------------------- 881 1.1 bjh21 Returns the exponent bits of the quadruple-precision floating-point value 882 1.1 bjh21 `a'. 883 1.1 bjh21 ------------------------------------------------------------------------------- 884 1.1 bjh21 */ 885 1.1 bjh21 INLINE int32 extractFloat128Exp( float128 a ) 886 1.1 bjh21 { 887 1.1 bjh21 888 1.10 christos return (int32)((a.high >> 48) & 0x7FFF); 889 1.1 bjh21 890 1.1 bjh21 } 891 1.1 bjh21 892 1.1 bjh21 /* 893 1.1 bjh21 ------------------------------------------------------------------------------- 894 1.1 bjh21 Returns the sign bit of the quadruple-precision floating-point value `a'. 895 1.1 bjh21 ------------------------------------------------------------------------------- 896 1.1 bjh21 */ 897 1.1 bjh21 INLINE flag extractFloat128Sign( float128 a ) 898 1.1 bjh21 { 899 1.1 bjh21 900 1.10 christos return (flag)(a.high >> 63); 901 1.1 bjh21 902 1.1 bjh21 } 903 1.1 bjh21 904 1.1 bjh21 /* 905 1.1 bjh21 ------------------------------------------------------------------------------- 906 1.1 bjh21 Normalizes the subnormal quadruple-precision floating-point value 907 1.1 bjh21 represented by the denormalized significand formed by the concatenation of 908 1.1 bjh21 `aSig0' and `aSig1'. The normalized exponent is stored at the location 909 1.1 bjh21 pointed to by `zExpPtr'. The most significant 49 bits of the normalized 910 1.1 bjh21 significand are stored at the location pointed to by `zSig0Ptr', and the 911 1.1 bjh21 least significant 64 bits of the normalized significand are stored at the 912 1.1 bjh21 location pointed to by `zSig1Ptr'. 913 1.1 bjh21 ------------------------------------------------------------------------------- 914 1.1 bjh21 */ 915 1.1 bjh21 static void 916 1.1 bjh21 normalizeFloat128Subnormal( 917 1.1 bjh21 bits64 aSig0, 918 1.1 bjh21 bits64 aSig1, 919 1.1 bjh21 int32 *zExpPtr, 920 1.1 bjh21 bits64 *zSig0Ptr, 921 1.1 bjh21 bits64 *zSig1Ptr 922 1.1 bjh21 ) 923 1.1 bjh21 { 924 1.1 bjh21 int8 shiftCount; 925 1.1 bjh21 926 1.1 bjh21 if ( aSig0 == 0 ) { 927 1.1 bjh21 shiftCount = countLeadingZeros64( aSig1 ) - 15; 928 1.1 bjh21 if ( shiftCount < 0 ) { 929 1.1 bjh21 *zSig0Ptr = aSig1>>( - shiftCount ); 930 1.1 bjh21 *zSig1Ptr = aSig1<<( shiftCount & 63 ); 931 1.1 bjh21 } 932 1.1 bjh21 else { 933 1.1 bjh21 *zSig0Ptr = aSig1<<shiftCount; 934 1.1 bjh21 *zSig1Ptr = 0; 935 1.1 bjh21 } 936 1.1 bjh21 *zExpPtr = - shiftCount - 63; 937 1.1 bjh21 } 938 1.1 bjh21 else { 939 1.1 bjh21 shiftCount = countLeadingZeros64( aSig0 ) - 15; 940 1.1 bjh21 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 941 1.1 bjh21 *zExpPtr = 1 - shiftCount; 942 1.1 bjh21 } 943 1.1 bjh21 944 1.1 bjh21 } 945 1.1 bjh21 946 1.1 bjh21 /* 947 1.1 bjh21 ------------------------------------------------------------------------------- 948 1.1 bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed 949 1.1 bjh21 by the concatenation of `zSig0' and `zSig1' into a quadruple-precision 950 1.1 bjh21 floating-point value, returning the result. After being shifted into the 951 1.1 bjh21 proper positions, the three fields `zSign', `zExp', and `zSig0' are simply 952 1.1 bjh21 added together to form the most significant 32 bits of the result. This 953 1.1 bjh21 means that any integer portion of `zSig0' will be added into the exponent. 954 1.1 bjh21 Since a properly normalized significand will have an integer portion equal 955 1.1 bjh21 to 1, the `zExp' input should be 1 less than the desired result exponent 956 1.1 bjh21 whenever `zSig0' and `zSig1' concatenated form a complete, normalized 957 1.1 bjh21 significand. 958 1.1 bjh21 ------------------------------------------------------------------------------- 959 1.1 bjh21 */ 960 1.1 bjh21 INLINE float128 961 1.1 bjh21 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 962 1.1 bjh21 { 963 1.1 bjh21 float128 z; 964 1.1 bjh21 965 1.1 bjh21 z.low = zSig1; 966 1.1 bjh21 z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; 967 1.1 bjh21 return z; 968 1.1 bjh21 969 1.1 bjh21 } 970 1.1 bjh21 971 1.1 bjh21 /* 972 1.1 bjh21 ------------------------------------------------------------------------------- 973 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 974 1.1 bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1', 975 1.1 bjh21 and `zSig2', and returns the proper quadruple-precision floating-point value 976 1.1 bjh21 corresponding to the abstract input. Ordinarily, the abstract value is 977 1.1 bjh21 simply rounded and packed into the quadruple-precision format, with the 978 1.1 bjh21 inexact exception raised if the abstract input cannot be represented 979 1.1 bjh21 exactly. However, if the abstract value is too large, the overflow and 980 1.1 bjh21 inexact exceptions are raised and an infinity or maximal finite value is 981 1.1 bjh21 returned. If the abstract value is too small, the input value is rounded to 982 1.1 bjh21 a subnormal number, and the underflow and inexact exceptions are raised if 983 1.1 bjh21 the abstract input cannot be represented exactly as a subnormal quadruple- 984 1.1 bjh21 precision floating-point number. 985 1.1 bjh21 The input significand must be normalized or smaller. If the input 986 1.1 bjh21 significand is not normalized, `zExp' must be 0; in that case, the result 987 1.1 bjh21 returned is a subnormal number, and it must not require rounding. In the 988 1.1 bjh21 usual case that the input significand is normalized, `zExp' must be 1 less 989 1.1 bjh21 than the ``true'' floating-point exponent. The handling of underflow and 990 1.1 bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 991 1.1 bjh21 ------------------------------------------------------------------------------- 992 1.1 bjh21 */ 993 1.1 bjh21 static float128 994 1.1 bjh21 roundAndPackFloat128( 995 1.1 bjh21 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) 996 1.1 bjh21 { 997 1.1 bjh21 int8 roundingMode; 998 1.1 bjh21 flag roundNearestEven, increment, isTiny; 999 1.1 bjh21 1000 1.1 bjh21 roundingMode = float_rounding_mode; 1001 1.1 bjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 1002 1.1 bjh21 increment = ( (sbits64) zSig2 < 0 ); 1003 1.1 bjh21 if ( ! roundNearestEven ) { 1004 1.1 bjh21 if ( roundingMode == float_round_to_zero ) { 1005 1.1 bjh21 increment = 0; 1006 1.1 bjh21 } 1007 1.1 bjh21 else { 1008 1.1 bjh21 if ( zSign ) { 1009 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2; 1010 1.1 bjh21 } 1011 1.1 bjh21 else { 1012 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2; 1013 1.1 bjh21 } 1014 1.1 bjh21 } 1015 1.1 bjh21 } 1016 1.1 bjh21 if ( 0x7FFD <= (bits32) zExp ) { 1017 1.1 bjh21 if ( ( 0x7FFD < zExp ) 1018 1.1 bjh21 || ( ( zExp == 0x7FFD ) 1019 1.1 bjh21 && eq128( 1020 1.1 bjh21 LIT64( 0x0001FFFFFFFFFFFF ), 1021 1.1 bjh21 LIT64( 0xFFFFFFFFFFFFFFFF ), 1022 1.1 bjh21 zSig0, 1023 1.1 bjh21 zSig1 1024 1.1 bjh21 ) 1025 1.1 bjh21 && increment 1026 1.1 bjh21 ) 1027 1.1 bjh21 ) { 1028 1.1 bjh21 float_raise( float_flag_overflow | float_flag_inexact ); 1029 1.1 bjh21 if ( ( roundingMode == float_round_to_zero ) 1030 1.1 bjh21 || ( zSign && ( roundingMode == float_round_up ) ) 1031 1.1 bjh21 || ( ! zSign && ( roundingMode == float_round_down ) ) 1032 1.1 bjh21 ) { 1033 1.1 bjh21 return 1034 1.1 bjh21 packFloat128( 1035 1.1 bjh21 zSign, 1036 1.1 bjh21 0x7FFE, 1037 1.1 bjh21 LIT64( 0x0000FFFFFFFFFFFF ), 1038 1.1 bjh21 LIT64( 0xFFFFFFFFFFFFFFFF ) 1039 1.1 bjh21 ); 1040 1.1 bjh21 } 1041 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 1042 1.1 bjh21 } 1043 1.1 bjh21 if ( zExp < 0 ) { 1044 1.1 bjh21 isTiny = 1045 1.1 bjh21 ( float_detect_tininess == float_tininess_before_rounding ) 1046 1.1 bjh21 || ( zExp < -1 ) 1047 1.1 bjh21 || ! increment 1048 1.1 bjh21 || lt128( 1049 1.1 bjh21 zSig0, 1050 1.1 bjh21 zSig1, 1051 1.1 bjh21 LIT64( 0x0001FFFFFFFFFFFF ), 1052 1.1 bjh21 LIT64( 0xFFFFFFFFFFFFFFFF ) 1053 1.1 bjh21 ); 1054 1.1 bjh21 shift128ExtraRightJamming( 1055 1.1 bjh21 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 1056 1.1 bjh21 zExp = 0; 1057 1.1 bjh21 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 1058 1.1 bjh21 if ( roundNearestEven ) { 1059 1.1 bjh21 increment = ( (sbits64) zSig2 < 0 ); 1060 1.1 bjh21 } 1061 1.1 bjh21 else { 1062 1.1 bjh21 if ( zSign ) { 1063 1.1 bjh21 increment = ( roundingMode == float_round_down ) && zSig2; 1064 1.1 bjh21 } 1065 1.1 bjh21 else { 1066 1.1 bjh21 increment = ( roundingMode == float_round_up ) && zSig2; 1067 1.1 bjh21 } 1068 1.1 bjh21 } 1069 1.1 bjh21 } 1070 1.1 bjh21 } 1071 1.12 matt if ( zSig2 ) set_float_exception_inexact_flag(); 1072 1.1 bjh21 if ( increment ) { 1073 1.1 bjh21 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 1074 1.1 bjh21 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 1075 1.1 bjh21 } 1076 1.1 bjh21 else { 1077 1.1 bjh21 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 1078 1.1 bjh21 } 1079 1.1 bjh21 return packFloat128( zSign, zExp, zSig0, zSig1 ); 1080 1.1 bjh21 1081 1.1 bjh21 } 1082 1.1 bjh21 1083 1.1 bjh21 /* 1084 1.1 bjh21 ------------------------------------------------------------------------------- 1085 1.1 bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1086 1.1 bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and 1087 1.1 bjh21 returns the proper quadruple-precision floating-point value corresponding 1088 1.1 bjh21 to the abstract input. This routine is just like `roundAndPackFloat128' 1089 1.1 bjh21 except that the input significand has fewer bits and does not have to be 1090 1.1 bjh21 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 1091 1.1 bjh21 point exponent. 1092 1.1 bjh21 ------------------------------------------------------------------------------- 1093 1.1 bjh21 */ 1094 1.1 bjh21 static float128 1095 1.1 bjh21 normalizeRoundAndPackFloat128( 1096 1.1 bjh21 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 1097 1.1 bjh21 { 1098 1.1 bjh21 int8 shiftCount; 1099 1.1 bjh21 bits64 zSig2; 1100 1.1 bjh21 1101 1.1 bjh21 if ( zSig0 == 0 ) { 1102 1.1 bjh21 zSig0 = zSig1; 1103 1.1 bjh21 zSig1 = 0; 1104 1.1 bjh21 zExp -= 64; 1105 1.1 bjh21 } 1106 1.1 bjh21 shiftCount = countLeadingZeros64( zSig0 ) - 15; 1107 1.1 bjh21 if ( 0 <= shiftCount ) { 1108 1.1 bjh21 zSig2 = 0; 1109 1.1 bjh21 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1110 1.1 bjh21 } 1111 1.1 bjh21 else { 1112 1.1 bjh21 shift128ExtraRightJamming( 1113 1.1 bjh21 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 1114 1.1 bjh21 } 1115 1.1 bjh21 zExp -= shiftCount; 1116 1.1 bjh21 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 1117 1.1 bjh21 1118 1.1 bjh21 } 1119 1.1 bjh21 1120 1.1 bjh21 #endif 1121 1.1 bjh21 1122 1.1 bjh21 /* 1123 1.1 bjh21 ------------------------------------------------------------------------------- 1124 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' 1125 1.1 bjh21 to the single-precision floating-point format. The conversion is performed 1126 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1127 1.1 bjh21 ------------------------------------------------------------------------------- 1128 1.1 bjh21 */ 1129 1.1 bjh21 float32 int32_to_float32( int32 a ) 1130 1.1 bjh21 { 1131 1.1 bjh21 flag zSign; 1132 1.1 bjh21 1133 1.1 bjh21 if ( a == 0 ) return 0; 1134 1.1 bjh21 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 1135 1.1 bjh21 zSign = ( a < 0 ); 1136 1.10 christos return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a)); 1137 1.1 bjh21 1138 1.1 bjh21 } 1139 1.1 bjh21 1140 1.6 matt float32 uint32_to_float32( uint32 a ) 1141 1.6 matt { 1142 1.6 matt if ( a == 0 ) return 0; 1143 1.6 matt if ( a & (bits32) 0x80000000 ) 1144 1.6 matt return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 ); 1145 1.6 matt return normalizeRoundAndPackFloat32( 0, 0x9C, a ); 1146 1.6 matt } 1147 1.6 matt 1148 1.6 matt 1149 1.1 bjh21 /* 1150 1.1 bjh21 ------------------------------------------------------------------------------- 1151 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' 1152 1.1 bjh21 to the double-precision floating-point format. The conversion is performed 1153 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1154 1.1 bjh21 ------------------------------------------------------------------------------- 1155 1.1 bjh21 */ 1156 1.1 bjh21 float64 int32_to_float64( int32 a ) 1157 1.1 bjh21 { 1158 1.1 bjh21 flag zSign; 1159 1.1 bjh21 uint32 absA; 1160 1.1 bjh21 int8 shiftCount; 1161 1.1 bjh21 bits64 zSig; 1162 1.1 bjh21 1163 1.1 bjh21 if ( a == 0 ) return 0; 1164 1.1 bjh21 zSign = ( a < 0 ); 1165 1.1 bjh21 absA = zSign ? - a : a; 1166 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) + 21; 1167 1.1 bjh21 zSig = absA; 1168 1.1 bjh21 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); 1169 1.1 bjh21 1170 1.1 bjh21 } 1171 1.1 bjh21 1172 1.6 matt float64 uint32_to_float64( uint32 a ) 1173 1.6 matt { 1174 1.6 matt int8 shiftCount; 1175 1.6 matt bits64 zSig = a; 1176 1.6 matt 1177 1.6 matt if ( a == 0 ) return 0; 1178 1.6 matt shiftCount = countLeadingZeros32( a ) + 21; 1179 1.6 matt return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount ); 1180 1.6 matt 1181 1.6 matt } 1182 1.6 matt 1183 1.1 bjh21 #ifdef FLOATX80 1184 1.1 bjh21 1185 1.1 bjh21 /* 1186 1.1 bjh21 ------------------------------------------------------------------------------- 1187 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' 1188 1.1 bjh21 to the extended double-precision floating-point format. The conversion 1189 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1190 1.1 bjh21 Arithmetic. 1191 1.1 bjh21 ------------------------------------------------------------------------------- 1192 1.1 bjh21 */ 1193 1.1 bjh21 floatx80 int32_to_floatx80( int32 a ) 1194 1.1 bjh21 { 1195 1.1 bjh21 flag zSign; 1196 1.1 bjh21 uint32 absA; 1197 1.1 bjh21 int8 shiftCount; 1198 1.1 bjh21 bits64 zSig; 1199 1.1 bjh21 1200 1.1 bjh21 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1201 1.1 bjh21 zSign = ( a < 0 ); 1202 1.1 bjh21 absA = zSign ? - a : a; 1203 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) + 32; 1204 1.1 bjh21 zSig = absA; 1205 1.1 bjh21 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 1206 1.1 bjh21 1207 1.1 bjh21 } 1208 1.1 bjh21 1209 1.6 matt floatx80 uint32_to_floatx80( uint32 a ) 1210 1.6 matt { 1211 1.6 matt int8 shiftCount; 1212 1.6 matt bits64 zSig = a; 1213 1.6 matt 1214 1.6 matt if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1215 1.6 matt shiftCount = countLeadingZeros32( a ) + 32; 1216 1.6 matt return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount ); 1217 1.6 matt 1218 1.6 matt } 1219 1.6 matt 1220 1.1 bjh21 #endif 1221 1.1 bjh21 1222 1.1 bjh21 #ifdef FLOAT128 1223 1.1 bjh21 1224 1.1 bjh21 /* 1225 1.1 bjh21 ------------------------------------------------------------------------------- 1226 1.1 bjh21 Returns the result of converting the 32-bit two's complement integer `a' to 1227 1.1 bjh21 the quadruple-precision floating-point format. The conversion is performed 1228 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1229 1.1 bjh21 ------------------------------------------------------------------------------- 1230 1.1 bjh21 */ 1231 1.1 bjh21 float128 int32_to_float128( int32 a ) 1232 1.1 bjh21 { 1233 1.1 bjh21 flag zSign; 1234 1.1 bjh21 uint32 absA; 1235 1.1 bjh21 int8 shiftCount; 1236 1.1 bjh21 bits64 zSig0; 1237 1.1 bjh21 1238 1.1 bjh21 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1239 1.1 bjh21 zSign = ( a < 0 ); 1240 1.1 bjh21 absA = zSign ? - a : a; 1241 1.1 bjh21 shiftCount = countLeadingZeros32( absA ) + 17; 1242 1.1 bjh21 zSig0 = absA; 1243 1.1 bjh21 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1244 1.1 bjh21 1245 1.1 bjh21 } 1246 1.1 bjh21 1247 1.6 matt float128 uint32_to_float128( uint32 a ) 1248 1.6 matt { 1249 1.6 matt int8 shiftCount; 1250 1.6 matt bits64 zSig0 = a; 1251 1.6 matt 1252 1.6 matt if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1253 1.6 matt shiftCount = countLeadingZeros32( a ) + 17; 1254 1.6 matt return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1255 1.6 matt 1256 1.6 matt } 1257 1.6 matt 1258 1.1 bjh21 #endif 1259 1.1 bjh21 1260 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ 1261 1.1 bjh21 /* 1262 1.1 bjh21 ------------------------------------------------------------------------------- 1263 1.1 bjh21 Returns the result of converting the 64-bit two's complement integer `a' 1264 1.1 bjh21 to the single-precision floating-point format. The conversion is performed 1265 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1266 1.1 bjh21 ------------------------------------------------------------------------------- 1267 1.1 bjh21 */ 1268 1.1 bjh21 float32 int64_to_float32( int64 a ) 1269 1.1 bjh21 { 1270 1.1 bjh21 flag zSign; 1271 1.1 bjh21 uint64 absA; 1272 1.1 bjh21 int8 shiftCount; 1273 1.1 bjh21 1274 1.1 bjh21 if ( a == 0 ) return 0; 1275 1.1 bjh21 zSign = ( a < 0 ); 1276 1.1 bjh21 absA = zSign ? - a : a; 1277 1.1 bjh21 shiftCount = countLeadingZeros64( absA ) - 40; 1278 1.1 bjh21 if ( 0 <= shiftCount ) { 1279 1.1 bjh21 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); 1280 1.1 bjh21 } 1281 1.1 bjh21 else { 1282 1.1 bjh21 shiftCount += 7; 1283 1.1 bjh21 if ( shiftCount < 0 ) { 1284 1.1 bjh21 shift64RightJamming( absA, - shiftCount, &absA ); 1285 1.1 bjh21 } 1286 1.1 bjh21 else { 1287 1.1 bjh21 absA <<= shiftCount; 1288 1.1 bjh21 } 1289 1.1 bjh21 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA ); 1290 1.1 bjh21 } 1291 1.1 bjh21 1292 1.1 bjh21 } 1293 1.1 bjh21 1294 1.1 bjh21 /* 1295 1.1 bjh21 ------------------------------------------------------------------------------- 1296 1.1 bjh21 Returns the result of converting the 64-bit two's complement integer `a' 1297 1.1 bjh21 to the double-precision floating-point format. The conversion is performed 1298 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1299 1.1 bjh21 ------------------------------------------------------------------------------- 1300 1.1 bjh21 */ 1301 1.1 bjh21 float64 int64_to_float64( int64 a ) 1302 1.1 bjh21 { 1303 1.1 bjh21 flag zSign; 1304 1.1 bjh21 1305 1.1 bjh21 if ( a == 0 ) return 0; 1306 1.1 bjh21 if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { 1307 1.1 bjh21 return packFloat64( 1, 0x43E, 0 ); 1308 1.1 bjh21 } 1309 1.1 bjh21 zSign = ( a < 0 ); 1310 1.1 bjh21 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a ); 1311 1.1 bjh21 1312 1.1 bjh21 } 1313 1.1 bjh21 1314 1.1 bjh21 #ifdef FLOATX80 1315 1.1 bjh21 1316 1.1 bjh21 /* 1317 1.1 bjh21 ------------------------------------------------------------------------------- 1318 1.1 bjh21 Returns the result of converting the 64-bit two's complement integer `a' 1319 1.1 bjh21 to the extended double-precision floating-point format. The conversion 1320 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1321 1.1 bjh21 Arithmetic. 1322 1.1 bjh21 ------------------------------------------------------------------------------- 1323 1.1 bjh21 */ 1324 1.1 bjh21 floatx80 int64_to_floatx80( int64 a ) 1325 1.1 bjh21 { 1326 1.1 bjh21 flag zSign; 1327 1.1 bjh21 uint64 absA; 1328 1.1 bjh21 int8 shiftCount; 1329 1.1 bjh21 1330 1.1 bjh21 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1331 1.1 bjh21 zSign = ( a < 0 ); 1332 1.1 bjh21 absA = zSign ? - a : a; 1333 1.1 bjh21 shiftCount = countLeadingZeros64( absA ); 1334 1.1 bjh21 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 1335 1.1 bjh21 1336 1.1 bjh21 } 1337 1.1 bjh21 1338 1.1 bjh21 #endif 1339 1.1 bjh21 1340 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1341 1.1 bjh21 1342 1.1 bjh21 #ifdef FLOAT128 1343 1.1 bjh21 1344 1.1 bjh21 /* 1345 1.1 bjh21 ------------------------------------------------------------------------------- 1346 1.1 bjh21 Returns the result of converting the 64-bit two's complement integer `a' to 1347 1.1 bjh21 the quadruple-precision floating-point format. The conversion is performed 1348 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1349 1.1 bjh21 ------------------------------------------------------------------------------- 1350 1.1 bjh21 */ 1351 1.1 bjh21 float128 int64_to_float128( int64 a ) 1352 1.1 bjh21 { 1353 1.1 bjh21 flag zSign; 1354 1.1 bjh21 uint64 absA; 1355 1.1 bjh21 int8 shiftCount; 1356 1.1 bjh21 int32 zExp; 1357 1.1 bjh21 bits64 zSig0, zSig1; 1358 1.1 bjh21 1359 1.1 bjh21 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1360 1.1 bjh21 zSign = ( a < 0 ); 1361 1.1 bjh21 absA = zSign ? - a : a; 1362 1.1 bjh21 shiftCount = countLeadingZeros64( absA ) + 49; 1363 1.1 bjh21 zExp = 0x406E - shiftCount; 1364 1.1 bjh21 if ( 64 <= shiftCount ) { 1365 1.1 bjh21 zSig1 = 0; 1366 1.1 bjh21 zSig0 = absA; 1367 1.1 bjh21 shiftCount -= 64; 1368 1.1 bjh21 } 1369 1.1 bjh21 else { 1370 1.1 bjh21 zSig1 = absA; 1371 1.1 bjh21 zSig0 = 0; 1372 1.1 bjh21 } 1373 1.1 bjh21 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1374 1.1 bjh21 return packFloat128( zSign, zExp, zSig0, zSig1 ); 1375 1.1 bjh21 1376 1.1 bjh21 } 1377 1.1 bjh21 1378 1.1 bjh21 #endif 1379 1.1 bjh21 1380 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1381 1.1 bjh21 /* 1382 1.1 bjh21 ------------------------------------------------------------------------------- 1383 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1384 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 1385 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1386 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 1387 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 1388 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 1389 1.1 bjh21 largest integer with the same sign as `a' is returned. 1390 1.1 bjh21 ------------------------------------------------------------------------------- 1391 1.1 bjh21 */ 1392 1.1 bjh21 int32 float32_to_int32( float32 a ) 1393 1.1 bjh21 { 1394 1.1 bjh21 flag aSign; 1395 1.1 bjh21 int16 aExp, shiftCount; 1396 1.1 bjh21 bits32 aSig; 1397 1.1 bjh21 bits64 aSig64; 1398 1.1 bjh21 1399 1.1 bjh21 aSig = extractFloat32Frac( a ); 1400 1.1 bjh21 aExp = extractFloat32Exp( a ); 1401 1.1 bjh21 aSign = extractFloat32Sign( a ); 1402 1.1 bjh21 if ( ( aExp == 0xFF ) && aSig ) aSign = 0; 1403 1.1 bjh21 if ( aExp ) aSig |= 0x00800000; 1404 1.1 bjh21 shiftCount = 0xAF - aExp; 1405 1.1 bjh21 aSig64 = aSig; 1406 1.1 bjh21 aSig64 <<= 32; 1407 1.1 bjh21 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); 1408 1.1 bjh21 return roundAndPackInt32( aSign, aSig64 ); 1409 1.1 bjh21 1410 1.1 bjh21 } 1411 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1412 1.1 bjh21 1413 1.1 bjh21 /* 1414 1.1 bjh21 ------------------------------------------------------------------------------- 1415 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1416 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 1417 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1418 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 1419 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1420 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 1421 1.1 bjh21 returned. 1422 1.1 bjh21 ------------------------------------------------------------------------------- 1423 1.1 bjh21 */ 1424 1.1 bjh21 int32 float32_to_int32_round_to_zero( float32 a ) 1425 1.1 bjh21 { 1426 1.1 bjh21 flag aSign; 1427 1.1 bjh21 int16 aExp, shiftCount; 1428 1.1 bjh21 bits32 aSig; 1429 1.1 bjh21 int32 z; 1430 1.1 bjh21 1431 1.1 bjh21 aSig = extractFloat32Frac( a ); 1432 1.1 bjh21 aExp = extractFloat32Exp( a ); 1433 1.1 bjh21 aSign = extractFloat32Sign( a ); 1434 1.1 bjh21 shiftCount = aExp - 0x9E; 1435 1.1 bjh21 if ( 0 <= shiftCount ) { 1436 1.1 bjh21 if ( a != 0xCF000000 ) { 1437 1.1 bjh21 float_raise( float_flag_invalid ); 1438 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 1439 1.1 bjh21 } 1440 1.1 bjh21 return (sbits32) 0x80000000; 1441 1.1 bjh21 } 1442 1.1 bjh21 else if ( aExp <= 0x7E ) { 1443 1.12 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 1444 1.1 bjh21 return 0; 1445 1.1 bjh21 } 1446 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8; 1447 1.1 bjh21 z = aSig>>( - shiftCount ); 1448 1.1 bjh21 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 1449 1.12 matt set_float_exception_inexact_flag(); 1450 1.1 bjh21 } 1451 1.1 bjh21 if ( aSign ) z = - z; 1452 1.1 bjh21 return z; 1453 1.1 bjh21 1454 1.1 bjh21 } 1455 1.1 bjh21 1456 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ 1457 1.1 bjh21 /* 1458 1.1 bjh21 ------------------------------------------------------------------------------- 1459 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1460 1.1 bjh21 `a' to the 64-bit two's complement integer format. The conversion is 1461 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1462 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 1463 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 1464 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 1465 1.1 bjh21 largest integer with the same sign as `a' is returned. 1466 1.1 bjh21 ------------------------------------------------------------------------------- 1467 1.1 bjh21 */ 1468 1.1 bjh21 int64 float32_to_int64( float32 a ) 1469 1.1 bjh21 { 1470 1.1 bjh21 flag aSign; 1471 1.1 bjh21 int16 aExp, shiftCount; 1472 1.1 bjh21 bits32 aSig; 1473 1.1 bjh21 bits64 aSig64, aSigExtra; 1474 1.1 bjh21 1475 1.1 bjh21 aSig = extractFloat32Frac( a ); 1476 1.1 bjh21 aExp = extractFloat32Exp( a ); 1477 1.1 bjh21 aSign = extractFloat32Sign( a ); 1478 1.1 bjh21 shiftCount = 0xBE - aExp; 1479 1.1 bjh21 if ( shiftCount < 0 ) { 1480 1.1 bjh21 float_raise( float_flag_invalid ); 1481 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1482 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 1483 1.1 bjh21 } 1484 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 1485 1.1 bjh21 } 1486 1.1 bjh21 if ( aExp ) aSig |= 0x00800000; 1487 1.1 bjh21 aSig64 = aSig; 1488 1.1 bjh21 aSig64 <<= 40; 1489 1.1 bjh21 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); 1490 1.1 bjh21 return roundAndPackInt64( aSign, aSig64, aSigExtra ); 1491 1.1 bjh21 1492 1.1 bjh21 } 1493 1.1 bjh21 1494 1.1 bjh21 /* 1495 1.1 bjh21 ------------------------------------------------------------------------------- 1496 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1497 1.1 bjh21 `a' to the 64-bit two's complement integer format. The conversion is 1498 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1499 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. If 1500 1.1 bjh21 `a' is a NaN, the largest positive integer is returned. Otherwise, if the 1501 1.1 bjh21 conversion overflows, the largest integer with the same sign as `a' is 1502 1.1 bjh21 returned. 1503 1.1 bjh21 ------------------------------------------------------------------------------- 1504 1.1 bjh21 */ 1505 1.1 bjh21 int64 float32_to_int64_round_to_zero( float32 a ) 1506 1.1 bjh21 { 1507 1.1 bjh21 flag aSign; 1508 1.1 bjh21 int16 aExp, shiftCount; 1509 1.1 bjh21 bits32 aSig; 1510 1.1 bjh21 bits64 aSig64; 1511 1.1 bjh21 int64 z; 1512 1.1 bjh21 1513 1.1 bjh21 aSig = extractFloat32Frac( a ); 1514 1.1 bjh21 aExp = extractFloat32Exp( a ); 1515 1.1 bjh21 aSign = extractFloat32Sign( a ); 1516 1.1 bjh21 shiftCount = aExp - 0xBE; 1517 1.1 bjh21 if ( 0 <= shiftCount ) { 1518 1.1 bjh21 if ( a != 0xDF000000 ) { 1519 1.1 bjh21 float_raise( float_flag_invalid ); 1520 1.1 bjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1521 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 1522 1.1 bjh21 } 1523 1.1 bjh21 } 1524 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 1525 1.1 bjh21 } 1526 1.1 bjh21 else if ( aExp <= 0x7E ) { 1527 1.12 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 1528 1.1 bjh21 return 0; 1529 1.1 bjh21 } 1530 1.1 bjh21 aSig64 = aSig | 0x00800000; 1531 1.1 bjh21 aSig64 <<= 40; 1532 1.1 bjh21 z = aSig64>>( - shiftCount ); 1533 1.1 bjh21 if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { 1534 1.12 matt set_float_exception_inexact_flag(); 1535 1.1 bjh21 } 1536 1.1 bjh21 if ( aSign ) z = - z; 1537 1.1 bjh21 return z; 1538 1.1 bjh21 1539 1.1 bjh21 } 1540 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1541 1.1 bjh21 1542 1.1 bjh21 /* 1543 1.1 bjh21 ------------------------------------------------------------------------------- 1544 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1545 1.1 bjh21 `a' to the double-precision floating-point format. The conversion is 1546 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1547 1.1 bjh21 Arithmetic. 1548 1.1 bjh21 ------------------------------------------------------------------------------- 1549 1.1 bjh21 */ 1550 1.1 bjh21 float64 float32_to_float64( float32 a ) 1551 1.1 bjh21 { 1552 1.1 bjh21 flag aSign; 1553 1.1 bjh21 int16 aExp; 1554 1.1 bjh21 bits32 aSig; 1555 1.1 bjh21 1556 1.1 bjh21 aSig = extractFloat32Frac( a ); 1557 1.1 bjh21 aExp = extractFloat32Exp( a ); 1558 1.1 bjh21 aSign = extractFloat32Sign( a ); 1559 1.1 bjh21 if ( aExp == 0xFF ) { 1560 1.1 bjh21 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 1561 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0 ); 1562 1.1 bjh21 } 1563 1.1 bjh21 if ( aExp == 0 ) { 1564 1.1 bjh21 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); 1565 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1566 1.1 bjh21 --aExp; 1567 1.1 bjh21 } 1568 1.1 bjh21 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); 1569 1.1 bjh21 1570 1.1 bjh21 } 1571 1.1 bjh21 1572 1.1 bjh21 #ifdef FLOATX80 1573 1.1 bjh21 1574 1.1 bjh21 /* 1575 1.1 bjh21 ------------------------------------------------------------------------------- 1576 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1577 1.1 bjh21 `a' to the extended double-precision floating-point format. The conversion 1578 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1579 1.1 bjh21 Arithmetic. 1580 1.1 bjh21 ------------------------------------------------------------------------------- 1581 1.1 bjh21 */ 1582 1.1 bjh21 floatx80 float32_to_floatx80( float32 a ) 1583 1.1 bjh21 { 1584 1.1 bjh21 flag aSign; 1585 1.1 bjh21 int16 aExp; 1586 1.1 bjh21 bits32 aSig; 1587 1.1 bjh21 1588 1.1 bjh21 aSig = extractFloat32Frac( a ); 1589 1.1 bjh21 aExp = extractFloat32Exp( a ); 1590 1.1 bjh21 aSign = extractFloat32Sign( a ); 1591 1.1 bjh21 if ( aExp == 0xFF ) { 1592 1.1 bjh21 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 1593 1.1 bjh21 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 1594 1.1 bjh21 } 1595 1.1 bjh21 if ( aExp == 0 ) { 1596 1.1 bjh21 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 1597 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1598 1.1 bjh21 } 1599 1.1 bjh21 aSig |= 0x00800000; 1600 1.1 bjh21 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); 1601 1.1 bjh21 1602 1.1 bjh21 } 1603 1.1 bjh21 1604 1.1 bjh21 #endif 1605 1.1 bjh21 1606 1.1 bjh21 #ifdef FLOAT128 1607 1.1 bjh21 1608 1.1 bjh21 /* 1609 1.1 bjh21 ------------------------------------------------------------------------------- 1610 1.1 bjh21 Returns the result of converting the single-precision floating-point value 1611 1.1 bjh21 `a' to the double-precision floating-point format. The conversion is 1612 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1613 1.1 bjh21 Arithmetic. 1614 1.1 bjh21 ------------------------------------------------------------------------------- 1615 1.1 bjh21 */ 1616 1.1 bjh21 float128 float32_to_float128( float32 a ) 1617 1.1 bjh21 { 1618 1.1 bjh21 flag aSign; 1619 1.1 bjh21 int16 aExp; 1620 1.1 bjh21 bits32 aSig; 1621 1.1 bjh21 1622 1.1 bjh21 aSig = extractFloat32Frac( a ); 1623 1.1 bjh21 aExp = extractFloat32Exp( a ); 1624 1.1 bjh21 aSign = extractFloat32Sign( a ); 1625 1.1 bjh21 if ( aExp == 0xFF ) { 1626 1.1 bjh21 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); 1627 1.1 bjh21 return packFloat128( aSign, 0x7FFF, 0, 0 ); 1628 1.1 bjh21 } 1629 1.1 bjh21 if ( aExp == 0 ) { 1630 1.1 bjh21 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 1631 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1632 1.1 bjh21 --aExp; 1633 1.1 bjh21 } 1634 1.1 bjh21 return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); 1635 1.1 bjh21 1636 1.1 bjh21 } 1637 1.1 bjh21 1638 1.1 bjh21 #endif 1639 1.1 bjh21 1640 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1641 1.1 bjh21 /* 1642 1.1 bjh21 ------------------------------------------------------------------------------- 1643 1.1 bjh21 Rounds the single-precision floating-point value `a' to an integer, and 1644 1.1 bjh21 returns the result as a single-precision floating-point value. The 1645 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary 1646 1.1 bjh21 Floating-Point Arithmetic. 1647 1.1 bjh21 ------------------------------------------------------------------------------- 1648 1.1 bjh21 */ 1649 1.1 bjh21 float32 float32_round_to_int( float32 a ) 1650 1.1 bjh21 { 1651 1.1 bjh21 flag aSign; 1652 1.1 bjh21 int16 aExp; 1653 1.1 bjh21 bits32 lastBitMask, roundBitsMask; 1654 1.1 bjh21 int8 roundingMode; 1655 1.1 bjh21 float32 z; 1656 1.1 bjh21 1657 1.1 bjh21 aExp = extractFloat32Exp( a ); 1658 1.1 bjh21 if ( 0x96 <= aExp ) { 1659 1.1 bjh21 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 1660 1.1 bjh21 return propagateFloat32NaN( a, a ); 1661 1.1 bjh21 } 1662 1.1 bjh21 return a; 1663 1.1 bjh21 } 1664 1.1 bjh21 if ( aExp <= 0x7E ) { 1665 1.1 bjh21 if ( (bits32) ( a<<1 ) == 0 ) return a; 1666 1.12 matt set_float_exception_inexact_flag(); 1667 1.1 bjh21 aSign = extractFloat32Sign( a ); 1668 1.1 bjh21 switch ( float_rounding_mode ) { 1669 1.1 bjh21 case float_round_nearest_even: 1670 1.1 bjh21 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 1671 1.1 bjh21 return packFloat32( aSign, 0x7F, 0 ); 1672 1.1 bjh21 } 1673 1.1 bjh21 break; 1674 1.1 bjh21 case float_round_to_zero: 1675 1.1 bjh21 break; 1676 1.1 bjh21 case float_round_down: 1677 1.1 bjh21 return aSign ? 0xBF800000 : 0; 1678 1.1 bjh21 case float_round_up: 1679 1.1 bjh21 return aSign ? 0x80000000 : 0x3F800000; 1680 1.1 bjh21 } 1681 1.1 bjh21 return packFloat32( aSign, 0, 0 ); 1682 1.1 bjh21 } 1683 1.1 bjh21 lastBitMask = 1; 1684 1.1 bjh21 lastBitMask <<= 0x96 - aExp; 1685 1.1 bjh21 roundBitsMask = lastBitMask - 1; 1686 1.1 bjh21 z = a; 1687 1.1 bjh21 roundingMode = float_rounding_mode; 1688 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 1689 1.1 bjh21 z += lastBitMask>>1; 1690 1.1 bjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 1691 1.1 bjh21 } 1692 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 1693 1.1 bjh21 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 1694 1.1 bjh21 z += roundBitsMask; 1695 1.1 bjh21 } 1696 1.1 bjh21 } 1697 1.1 bjh21 z &= ~ roundBitsMask; 1698 1.12 matt if ( z != a ) set_float_exception_inexact_flag(); 1699 1.1 bjh21 return z; 1700 1.1 bjh21 1701 1.1 bjh21 } 1702 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1703 1.1 bjh21 1704 1.1 bjh21 /* 1705 1.1 bjh21 ------------------------------------------------------------------------------- 1706 1.1 bjh21 Returns the result of adding the absolute values of the single-precision 1707 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1708 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN. 1709 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 1710 1.1 bjh21 Floating-Point Arithmetic. 1711 1.1 bjh21 ------------------------------------------------------------------------------- 1712 1.1 bjh21 */ 1713 1.1 bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 1714 1.1 bjh21 { 1715 1.1 bjh21 int16 aExp, bExp, zExp; 1716 1.1 bjh21 bits32 aSig, bSig, zSig; 1717 1.1 bjh21 int16 expDiff; 1718 1.1 bjh21 1719 1.1 bjh21 aSig = extractFloat32Frac( a ); 1720 1.1 bjh21 aExp = extractFloat32Exp( a ); 1721 1.1 bjh21 bSig = extractFloat32Frac( b ); 1722 1.1 bjh21 bExp = extractFloat32Exp( b ); 1723 1.1 bjh21 expDiff = aExp - bExp; 1724 1.1 bjh21 aSig <<= 6; 1725 1.1 bjh21 bSig <<= 6; 1726 1.1 bjh21 if ( 0 < expDiff ) { 1727 1.1 bjh21 if ( aExp == 0xFF ) { 1728 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 1729 1.1 bjh21 return a; 1730 1.1 bjh21 } 1731 1.1 bjh21 if ( bExp == 0 ) { 1732 1.1 bjh21 --expDiff; 1733 1.1 bjh21 } 1734 1.1 bjh21 else { 1735 1.1 bjh21 bSig |= 0x20000000; 1736 1.1 bjh21 } 1737 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig ); 1738 1.1 bjh21 zExp = aExp; 1739 1.1 bjh21 } 1740 1.1 bjh21 else if ( expDiff < 0 ) { 1741 1.1 bjh21 if ( bExp == 0xFF ) { 1742 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1743 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1744 1.1 bjh21 } 1745 1.1 bjh21 if ( aExp == 0 ) { 1746 1.1 bjh21 ++expDiff; 1747 1.1 bjh21 } 1748 1.1 bjh21 else { 1749 1.1 bjh21 aSig |= 0x20000000; 1750 1.1 bjh21 } 1751 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 1752 1.1 bjh21 zExp = bExp; 1753 1.1 bjh21 } 1754 1.1 bjh21 else { 1755 1.1 bjh21 if ( aExp == 0xFF ) { 1756 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1757 1.1 bjh21 return a; 1758 1.1 bjh21 } 1759 1.1 bjh21 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 1760 1.1 bjh21 zSig = 0x40000000 + aSig + bSig; 1761 1.1 bjh21 zExp = aExp; 1762 1.1 bjh21 goto roundAndPack; 1763 1.1 bjh21 } 1764 1.1 bjh21 aSig |= 0x20000000; 1765 1.1 bjh21 zSig = ( aSig + bSig )<<1; 1766 1.1 bjh21 --zExp; 1767 1.1 bjh21 if ( (sbits32) zSig < 0 ) { 1768 1.1 bjh21 zSig = aSig + bSig; 1769 1.1 bjh21 ++zExp; 1770 1.1 bjh21 } 1771 1.1 bjh21 roundAndPack: 1772 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 1773 1.1 bjh21 1774 1.1 bjh21 } 1775 1.1 bjh21 1776 1.1 bjh21 /* 1777 1.1 bjh21 ------------------------------------------------------------------------------- 1778 1.1 bjh21 Returns the result of subtracting the absolute values of the single- 1779 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 1780 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 1781 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 1782 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 1783 1.1 bjh21 ------------------------------------------------------------------------------- 1784 1.1 bjh21 */ 1785 1.1 bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 1786 1.1 bjh21 { 1787 1.1 bjh21 int16 aExp, bExp, zExp; 1788 1.1 bjh21 bits32 aSig, bSig, zSig; 1789 1.1 bjh21 int16 expDiff; 1790 1.1 bjh21 1791 1.1 bjh21 aSig = extractFloat32Frac( a ); 1792 1.1 bjh21 aExp = extractFloat32Exp( a ); 1793 1.1 bjh21 bSig = extractFloat32Frac( b ); 1794 1.1 bjh21 bExp = extractFloat32Exp( b ); 1795 1.1 bjh21 expDiff = aExp - bExp; 1796 1.1 bjh21 aSig <<= 7; 1797 1.1 bjh21 bSig <<= 7; 1798 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 1799 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 1800 1.1 bjh21 if ( aExp == 0xFF ) { 1801 1.1 bjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1802 1.1 bjh21 float_raise( float_flag_invalid ); 1803 1.1 bjh21 return float32_default_nan; 1804 1.1 bjh21 } 1805 1.1 bjh21 if ( aExp == 0 ) { 1806 1.1 bjh21 aExp = 1; 1807 1.1 bjh21 bExp = 1; 1808 1.1 bjh21 } 1809 1.1 bjh21 if ( bSig < aSig ) goto aBigger; 1810 1.1 bjh21 if ( aSig < bSig ) goto bBigger; 1811 1.1 bjh21 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 1812 1.1 bjh21 bExpBigger: 1813 1.1 bjh21 if ( bExp == 0xFF ) { 1814 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1815 1.1 bjh21 return packFloat32( zSign ^ 1, 0xFF, 0 ); 1816 1.1 bjh21 } 1817 1.1 bjh21 if ( aExp == 0 ) { 1818 1.1 bjh21 ++expDiff; 1819 1.1 bjh21 } 1820 1.1 bjh21 else { 1821 1.1 bjh21 aSig |= 0x40000000; 1822 1.1 bjh21 } 1823 1.1 bjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 1824 1.1 bjh21 bSig |= 0x40000000; 1825 1.1 bjh21 bBigger: 1826 1.1 bjh21 zSig = bSig - aSig; 1827 1.1 bjh21 zExp = bExp; 1828 1.1 bjh21 zSign ^= 1; 1829 1.1 bjh21 goto normalizeRoundAndPack; 1830 1.1 bjh21 aExpBigger: 1831 1.1 bjh21 if ( aExp == 0xFF ) { 1832 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 1833 1.1 bjh21 return a; 1834 1.1 bjh21 } 1835 1.1 bjh21 if ( bExp == 0 ) { 1836 1.1 bjh21 --expDiff; 1837 1.1 bjh21 } 1838 1.1 bjh21 else { 1839 1.1 bjh21 bSig |= 0x40000000; 1840 1.1 bjh21 } 1841 1.1 bjh21 shift32RightJamming( bSig, expDiff, &bSig ); 1842 1.1 bjh21 aSig |= 0x40000000; 1843 1.1 bjh21 aBigger: 1844 1.1 bjh21 zSig = aSig - bSig; 1845 1.1 bjh21 zExp = aExp; 1846 1.1 bjh21 normalizeRoundAndPack: 1847 1.1 bjh21 --zExp; 1848 1.1 bjh21 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 1849 1.1 bjh21 1850 1.1 bjh21 } 1851 1.1 bjh21 1852 1.1 bjh21 /* 1853 1.1 bjh21 ------------------------------------------------------------------------------- 1854 1.1 bjh21 Returns the result of adding the single-precision floating-point values `a' 1855 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 1856 1.1 bjh21 Binary Floating-Point Arithmetic. 1857 1.1 bjh21 ------------------------------------------------------------------------------- 1858 1.1 bjh21 */ 1859 1.1 bjh21 float32 float32_add( float32 a, float32 b ) 1860 1.1 bjh21 { 1861 1.1 bjh21 flag aSign, bSign; 1862 1.1 bjh21 1863 1.1 bjh21 aSign = extractFloat32Sign( a ); 1864 1.1 bjh21 bSign = extractFloat32Sign( b ); 1865 1.1 bjh21 if ( aSign == bSign ) { 1866 1.1 bjh21 return addFloat32Sigs( a, b, aSign ); 1867 1.1 bjh21 } 1868 1.1 bjh21 else { 1869 1.1 bjh21 return subFloat32Sigs( a, b, aSign ); 1870 1.1 bjh21 } 1871 1.1 bjh21 1872 1.1 bjh21 } 1873 1.1 bjh21 1874 1.1 bjh21 /* 1875 1.1 bjh21 ------------------------------------------------------------------------------- 1876 1.1 bjh21 Returns the result of subtracting the single-precision floating-point values 1877 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1878 1.1 bjh21 for Binary Floating-Point Arithmetic. 1879 1.1 bjh21 ------------------------------------------------------------------------------- 1880 1.1 bjh21 */ 1881 1.1 bjh21 float32 float32_sub( float32 a, float32 b ) 1882 1.1 bjh21 { 1883 1.1 bjh21 flag aSign, bSign; 1884 1.1 bjh21 1885 1.1 bjh21 aSign = extractFloat32Sign( a ); 1886 1.1 bjh21 bSign = extractFloat32Sign( b ); 1887 1.1 bjh21 if ( aSign == bSign ) { 1888 1.1 bjh21 return subFloat32Sigs( a, b, aSign ); 1889 1.1 bjh21 } 1890 1.1 bjh21 else { 1891 1.1 bjh21 return addFloat32Sigs( a, b, aSign ); 1892 1.1 bjh21 } 1893 1.1 bjh21 1894 1.1 bjh21 } 1895 1.1 bjh21 1896 1.1 bjh21 /* 1897 1.1 bjh21 ------------------------------------------------------------------------------- 1898 1.1 bjh21 Returns the result of multiplying the single-precision floating-point values 1899 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1900 1.1 bjh21 for Binary Floating-Point Arithmetic. 1901 1.1 bjh21 ------------------------------------------------------------------------------- 1902 1.1 bjh21 */ 1903 1.1 bjh21 float32 float32_mul( float32 a, float32 b ) 1904 1.1 bjh21 { 1905 1.1 bjh21 flag aSign, bSign, zSign; 1906 1.1 bjh21 int16 aExp, bExp, zExp; 1907 1.1 bjh21 bits32 aSig, bSig; 1908 1.1 bjh21 bits64 zSig64; 1909 1.1 bjh21 bits32 zSig; 1910 1.1 bjh21 1911 1.1 bjh21 aSig = extractFloat32Frac( a ); 1912 1.1 bjh21 aExp = extractFloat32Exp( a ); 1913 1.1 bjh21 aSign = extractFloat32Sign( a ); 1914 1.1 bjh21 bSig = extractFloat32Frac( b ); 1915 1.1 bjh21 bExp = extractFloat32Exp( b ); 1916 1.1 bjh21 bSign = extractFloat32Sign( b ); 1917 1.1 bjh21 zSign = aSign ^ bSign; 1918 1.1 bjh21 if ( aExp == 0xFF ) { 1919 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1920 1.1 bjh21 return propagateFloat32NaN( a, b ); 1921 1.1 bjh21 } 1922 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) { 1923 1.1 bjh21 float_raise( float_flag_invalid ); 1924 1.1 bjh21 return float32_default_nan; 1925 1.1 bjh21 } 1926 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1927 1.1 bjh21 } 1928 1.1 bjh21 if ( bExp == 0xFF ) { 1929 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1930 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 1931 1.1 bjh21 float_raise( float_flag_invalid ); 1932 1.1 bjh21 return float32_default_nan; 1933 1.1 bjh21 } 1934 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1935 1.1 bjh21 } 1936 1.1 bjh21 if ( aExp == 0 ) { 1937 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1938 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1939 1.1 bjh21 } 1940 1.1 bjh21 if ( bExp == 0 ) { 1941 1.1 bjh21 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1942 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1943 1.1 bjh21 } 1944 1.1 bjh21 zExp = aExp + bExp - 0x7F; 1945 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7; 1946 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8; 1947 1.1 bjh21 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); 1948 1.10 christos zSig = (bits32)zSig64; 1949 1.1 bjh21 if ( 0 <= (sbits32) ( zSig<<1 ) ) { 1950 1.1 bjh21 zSig <<= 1; 1951 1.1 bjh21 --zExp; 1952 1.1 bjh21 } 1953 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 1954 1.1 bjh21 1955 1.1 bjh21 } 1956 1.1 bjh21 1957 1.1 bjh21 /* 1958 1.1 bjh21 ------------------------------------------------------------------------------- 1959 1.1 bjh21 Returns the result of dividing the single-precision floating-point value `a' 1960 1.1 bjh21 by the corresponding value `b'. The operation is performed according to the 1961 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1962 1.1 bjh21 ------------------------------------------------------------------------------- 1963 1.1 bjh21 */ 1964 1.1 bjh21 float32 float32_div( float32 a, float32 b ) 1965 1.1 bjh21 { 1966 1.1 bjh21 flag aSign, bSign, zSign; 1967 1.1 bjh21 int16 aExp, bExp, zExp; 1968 1.1 bjh21 bits32 aSig, bSig, zSig; 1969 1.1 bjh21 1970 1.1 bjh21 aSig = extractFloat32Frac( a ); 1971 1.1 bjh21 aExp = extractFloat32Exp( a ); 1972 1.1 bjh21 aSign = extractFloat32Sign( a ); 1973 1.1 bjh21 bSig = extractFloat32Frac( b ); 1974 1.1 bjh21 bExp = extractFloat32Exp( b ); 1975 1.1 bjh21 bSign = extractFloat32Sign( b ); 1976 1.1 bjh21 zSign = aSign ^ bSign; 1977 1.1 bjh21 if ( aExp == 0xFF ) { 1978 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 1979 1.1 bjh21 if ( bExp == 0xFF ) { 1980 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1981 1.1 bjh21 float_raise( float_flag_invalid ); 1982 1.1 bjh21 return float32_default_nan; 1983 1.1 bjh21 } 1984 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1985 1.1 bjh21 } 1986 1.1 bjh21 if ( bExp == 0xFF ) { 1987 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1988 1.1 bjh21 return packFloat32( zSign, 0, 0 ); 1989 1.1 bjh21 } 1990 1.1 bjh21 if ( bExp == 0 ) { 1991 1.1 bjh21 if ( bSig == 0 ) { 1992 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 1993 1.1 bjh21 float_raise( float_flag_invalid ); 1994 1.1 bjh21 return float32_default_nan; 1995 1.1 bjh21 } 1996 1.1 bjh21 float_raise( float_flag_divbyzero ); 1997 1.1 bjh21 return packFloat32( zSign, 0xFF, 0 ); 1998 1.1 bjh21 } 1999 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 2000 1.1 bjh21 } 2001 1.1 bjh21 if ( aExp == 0 ) { 2002 1.1 bjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 2003 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2004 1.1 bjh21 } 2005 1.1 bjh21 zExp = aExp - bExp + 0x7D; 2006 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<7; 2007 1.1 bjh21 bSig = ( bSig | 0x00800000 )<<8; 2008 1.1 bjh21 if ( bSig <= ( aSig + aSig ) ) { 2009 1.1 bjh21 aSig >>= 1; 2010 1.1 bjh21 ++zExp; 2011 1.1 bjh21 } 2012 1.11 matt zSig = (bits32)((((bits64) aSig) << 32) / bSig); 2013 1.1 bjh21 if ( ( zSig & 0x3F ) == 0 ) { 2014 1.1 bjh21 zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); 2015 1.1 bjh21 } 2016 1.1 bjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 2017 1.1 bjh21 2018 1.1 bjh21 } 2019 1.1 bjh21 2020 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2021 1.1 bjh21 /* 2022 1.1 bjh21 ------------------------------------------------------------------------------- 2023 1.1 bjh21 Returns the remainder of the single-precision floating-point value `a' 2024 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed 2025 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2026 1.1 bjh21 ------------------------------------------------------------------------------- 2027 1.1 bjh21 */ 2028 1.1 bjh21 float32 float32_rem( float32 a, float32 b ) 2029 1.1 bjh21 { 2030 1.1 bjh21 flag aSign, bSign, zSign; 2031 1.1 bjh21 int16 aExp, bExp, expDiff; 2032 1.1 bjh21 bits32 aSig, bSig; 2033 1.1 bjh21 bits32 q; 2034 1.1 bjh21 bits64 aSig64, bSig64, q64; 2035 1.1 bjh21 bits32 alternateASig; 2036 1.1 bjh21 sbits32 sigMean; 2037 1.1 bjh21 2038 1.1 bjh21 aSig = extractFloat32Frac( a ); 2039 1.1 bjh21 aExp = extractFloat32Exp( a ); 2040 1.1 bjh21 aSign = extractFloat32Sign( a ); 2041 1.1 bjh21 bSig = extractFloat32Frac( b ); 2042 1.1 bjh21 bExp = extractFloat32Exp( b ); 2043 1.1 bjh21 bSign = extractFloat32Sign( b ); 2044 1.1 bjh21 if ( aExp == 0xFF ) { 2045 1.1 bjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 2046 1.1 bjh21 return propagateFloat32NaN( a, b ); 2047 1.1 bjh21 } 2048 1.1 bjh21 float_raise( float_flag_invalid ); 2049 1.1 bjh21 return float32_default_nan; 2050 1.1 bjh21 } 2051 1.1 bjh21 if ( bExp == 0xFF ) { 2052 1.1 bjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 2053 1.1 bjh21 return a; 2054 1.1 bjh21 } 2055 1.1 bjh21 if ( bExp == 0 ) { 2056 1.1 bjh21 if ( bSig == 0 ) { 2057 1.1 bjh21 float_raise( float_flag_invalid ); 2058 1.1 bjh21 return float32_default_nan; 2059 1.1 bjh21 } 2060 1.1 bjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 2061 1.1 bjh21 } 2062 1.1 bjh21 if ( aExp == 0 ) { 2063 1.1 bjh21 if ( aSig == 0 ) return a; 2064 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2065 1.1 bjh21 } 2066 1.1 bjh21 expDiff = aExp - bExp; 2067 1.1 bjh21 aSig |= 0x00800000; 2068 1.1 bjh21 bSig |= 0x00800000; 2069 1.1 bjh21 if ( expDiff < 32 ) { 2070 1.1 bjh21 aSig <<= 8; 2071 1.1 bjh21 bSig <<= 8; 2072 1.1 bjh21 if ( expDiff < 0 ) { 2073 1.1 bjh21 if ( expDiff < -1 ) return a; 2074 1.1 bjh21 aSig >>= 1; 2075 1.1 bjh21 } 2076 1.1 bjh21 q = ( bSig <= aSig ); 2077 1.1 bjh21 if ( q ) aSig -= bSig; 2078 1.1 bjh21 if ( 0 < expDiff ) { 2079 1.1 bjh21 q = ( ( (bits64) aSig )<<32 ) / bSig; 2080 1.1 bjh21 q >>= 32 - expDiff; 2081 1.1 bjh21 bSig >>= 2; 2082 1.1 bjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 2083 1.1 bjh21 } 2084 1.1 bjh21 else { 2085 1.1 bjh21 aSig >>= 2; 2086 1.1 bjh21 bSig >>= 2; 2087 1.1 bjh21 } 2088 1.1 bjh21 } 2089 1.1 bjh21 else { 2090 1.1 bjh21 if ( bSig <= aSig ) aSig -= bSig; 2091 1.1 bjh21 aSig64 = ( (bits64) aSig )<<40; 2092 1.1 bjh21 bSig64 = ( (bits64) bSig )<<40; 2093 1.1 bjh21 expDiff -= 64; 2094 1.1 bjh21 while ( 0 < expDiff ) { 2095 1.1 bjh21 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2096 1.1 bjh21 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2097 1.1 bjh21 aSig64 = - ( ( bSig * q64 )<<38 ); 2098 1.1 bjh21 expDiff -= 62; 2099 1.1 bjh21 } 2100 1.1 bjh21 expDiff += 64; 2101 1.1 bjh21 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2102 1.1 bjh21 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2103 1.1 bjh21 q = q64>>( 64 - expDiff ); 2104 1.1 bjh21 bSig <<= 6; 2105 1.1 bjh21 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 2106 1.1 bjh21 } 2107 1.1 bjh21 do { 2108 1.1 bjh21 alternateASig = aSig; 2109 1.1 bjh21 ++q; 2110 1.1 bjh21 aSig -= bSig; 2111 1.1 bjh21 } while ( 0 <= (sbits32) aSig ); 2112 1.1 bjh21 sigMean = aSig + alternateASig; 2113 1.1 bjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 2114 1.1 bjh21 aSig = alternateASig; 2115 1.1 bjh21 } 2116 1.1 bjh21 zSign = ( (sbits32) aSig < 0 ); 2117 1.1 bjh21 if ( zSign ) aSig = - aSig; 2118 1.1 bjh21 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 2119 1.1 bjh21 2120 1.1 bjh21 } 2121 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 2122 1.1 bjh21 2123 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2124 1.1 bjh21 /* 2125 1.1 bjh21 ------------------------------------------------------------------------------- 2126 1.1 bjh21 Returns the square root of the single-precision floating-point value `a'. 2127 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary 2128 1.1 bjh21 Floating-Point Arithmetic. 2129 1.1 bjh21 ------------------------------------------------------------------------------- 2130 1.1 bjh21 */ 2131 1.1 bjh21 float32 float32_sqrt( float32 a ) 2132 1.1 bjh21 { 2133 1.1 bjh21 flag aSign; 2134 1.1 bjh21 int16 aExp, zExp; 2135 1.1 bjh21 bits32 aSig, zSig; 2136 1.1 bjh21 bits64 rem, term; 2137 1.1 bjh21 2138 1.1 bjh21 aSig = extractFloat32Frac( a ); 2139 1.1 bjh21 aExp = extractFloat32Exp( a ); 2140 1.1 bjh21 aSign = extractFloat32Sign( a ); 2141 1.1 bjh21 if ( aExp == 0xFF ) { 2142 1.1 bjh21 if ( aSig ) return propagateFloat32NaN( a, 0 ); 2143 1.1 bjh21 if ( ! aSign ) return a; 2144 1.1 bjh21 float_raise( float_flag_invalid ); 2145 1.1 bjh21 return float32_default_nan; 2146 1.1 bjh21 } 2147 1.1 bjh21 if ( aSign ) { 2148 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) return a; 2149 1.1 bjh21 float_raise( float_flag_invalid ); 2150 1.1 bjh21 return float32_default_nan; 2151 1.1 bjh21 } 2152 1.1 bjh21 if ( aExp == 0 ) { 2153 1.1 bjh21 if ( aSig == 0 ) return 0; 2154 1.1 bjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2155 1.1 bjh21 } 2156 1.1 bjh21 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 2157 1.1 bjh21 aSig = ( aSig | 0x00800000 )<<8; 2158 1.1 bjh21 zSig = estimateSqrt32( aExp, aSig ) + 2; 2159 1.1 bjh21 if ( ( zSig & 0x7F ) <= 5 ) { 2160 1.1 bjh21 if ( zSig < 2 ) { 2161 1.1 bjh21 zSig = 0x7FFFFFFF; 2162 1.1 bjh21 goto roundAndPack; 2163 1.1 bjh21 } 2164 1.1 bjh21 aSig >>= aExp & 1; 2165 1.1 bjh21 term = ( (bits64) zSig ) * zSig; 2166 1.1 bjh21 rem = ( ( (bits64) aSig )<<32 ) - term; 2167 1.1 bjh21 while ( (sbits64) rem < 0 ) { 2168 1.1 bjh21 --zSig; 2169 1.1 bjh21 rem += ( ( (bits64) zSig )<<1 ) | 1; 2170 1.1 bjh21 } 2171 1.1 bjh21 zSig |= ( rem != 0 ); 2172 1.1 bjh21 } 2173 1.1 bjh21 shift32RightJamming( zSig, 1, &zSig ); 2174 1.1 bjh21 roundAndPack: 2175 1.1 bjh21 return roundAndPackFloat32( 0, zExp, zSig ); 2176 1.1 bjh21 2177 1.1 bjh21 } 2178 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 2179 1.1 bjh21 2180 1.1 bjh21 /* 2181 1.1 bjh21 ------------------------------------------------------------------------------- 2182 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to 2183 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2184 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2185 1.1 bjh21 ------------------------------------------------------------------------------- 2186 1.1 bjh21 */ 2187 1.1 bjh21 flag float32_eq( float32 a, float32 b ) 2188 1.1 bjh21 { 2189 1.1 bjh21 2190 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2191 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2192 1.1 bjh21 ) { 2193 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2194 1.1 bjh21 float_raise( float_flag_invalid ); 2195 1.1 bjh21 } 2196 1.1 bjh21 return 0; 2197 1.1 bjh21 } 2198 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2199 1.1 bjh21 2200 1.1 bjh21 } 2201 1.1 bjh21 2202 1.1 bjh21 /* 2203 1.1 bjh21 ------------------------------------------------------------------------------- 2204 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 2205 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 2206 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2207 1.1 bjh21 Arithmetic. 2208 1.1 bjh21 ------------------------------------------------------------------------------- 2209 1.1 bjh21 */ 2210 1.1 bjh21 flag float32_le( float32 a, float32 b ) 2211 1.1 bjh21 { 2212 1.1 bjh21 flag aSign, bSign; 2213 1.1 bjh21 2214 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2215 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2216 1.1 bjh21 ) { 2217 1.1 bjh21 float_raise( float_flag_invalid ); 2218 1.1 bjh21 return 0; 2219 1.1 bjh21 } 2220 1.1 bjh21 aSign = extractFloat32Sign( a ); 2221 1.1 bjh21 bSign = extractFloat32Sign( b ); 2222 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2223 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 2224 1.1 bjh21 2225 1.1 bjh21 } 2226 1.1 bjh21 2227 1.1 bjh21 /* 2228 1.1 bjh21 ------------------------------------------------------------------------------- 2229 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 2230 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2231 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2232 1.1 bjh21 ------------------------------------------------------------------------------- 2233 1.1 bjh21 */ 2234 1.1 bjh21 flag float32_lt( float32 a, float32 b ) 2235 1.1 bjh21 { 2236 1.1 bjh21 flag aSign, bSign; 2237 1.1 bjh21 2238 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2239 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2240 1.1 bjh21 ) { 2241 1.1 bjh21 float_raise( float_flag_invalid ); 2242 1.1 bjh21 return 0; 2243 1.1 bjh21 } 2244 1.1 bjh21 aSign = extractFloat32Sign( a ); 2245 1.1 bjh21 bSign = extractFloat32Sign( b ); 2246 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2247 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 2248 1.1 bjh21 2249 1.1 bjh21 } 2250 1.1 bjh21 2251 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2252 1.1 bjh21 /* 2253 1.1 bjh21 ------------------------------------------------------------------------------- 2254 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is equal to 2255 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 2256 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 2257 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2258 1.1 bjh21 ------------------------------------------------------------------------------- 2259 1.1 bjh21 */ 2260 1.1 bjh21 flag float32_eq_signaling( float32 a, float32 b ) 2261 1.1 bjh21 { 2262 1.1 bjh21 2263 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2264 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2265 1.1 bjh21 ) { 2266 1.1 bjh21 float_raise( float_flag_invalid ); 2267 1.1 bjh21 return 0; 2268 1.1 bjh21 } 2269 1.1 bjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2270 1.1 bjh21 2271 1.1 bjh21 } 2272 1.1 bjh21 2273 1.1 bjh21 /* 2274 1.1 bjh21 ------------------------------------------------------------------------------- 2275 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than or 2276 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2277 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the 2278 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2279 1.1 bjh21 ------------------------------------------------------------------------------- 2280 1.1 bjh21 */ 2281 1.1 bjh21 flag float32_le_quiet( float32 a, float32 b ) 2282 1.1 bjh21 { 2283 1.1 bjh21 flag aSign, bSign; 2284 1.1 bjh21 2285 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2286 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2287 1.1 bjh21 ) { 2288 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2289 1.1 bjh21 float_raise( float_flag_invalid ); 2290 1.1 bjh21 } 2291 1.1 bjh21 return 0; 2292 1.1 bjh21 } 2293 1.1 bjh21 aSign = extractFloat32Sign( a ); 2294 1.1 bjh21 bSign = extractFloat32Sign( b ); 2295 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2296 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 2297 1.1 bjh21 2298 1.1 bjh21 } 2299 1.1 bjh21 2300 1.1 bjh21 /* 2301 1.1 bjh21 ------------------------------------------------------------------------------- 2302 1.1 bjh21 Returns 1 if the single-precision floating-point value `a' is less than 2303 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2304 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 2305 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 2306 1.1 bjh21 ------------------------------------------------------------------------------- 2307 1.1 bjh21 */ 2308 1.1 bjh21 flag float32_lt_quiet( float32 a, float32 b ) 2309 1.1 bjh21 { 2310 1.1 bjh21 flag aSign, bSign; 2311 1.1 bjh21 2312 1.1 bjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2313 1.1 bjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2314 1.1 bjh21 ) { 2315 1.1 bjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2316 1.1 bjh21 float_raise( float_flag_invalid ); 2317 1.1 bjh21 } 2318 1.1 bjh21 return 0; 2319 1.1 bjh21 } 2320 1.1 bjh21 aSign = extractFloat32Sign( a ); 2321 1.1 bjh21 bSign = extractFloat32Sign( b ); 2322 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2323 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 2324 1.1 bjh21 2325 1.1 bjh21 } 2326 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 2327 1.1 bjh21 2328 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2329 1.1 bjh21 /* 2330 1.1 bjh21 ------------------------------------------------------------------------------- 2331 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2332 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 2333 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2334 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 2335 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 2336 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 2337 1.1 bjh21 largest integer with the same sign as `a' is returned. 2338 1.1 bjh21 ------------------------------------------------------------------------------- 2339 1.1 bjh21 */ 2340 1.1 bjh21 int32 float64_to_int32( float64 a ) 2341 1.1 bjh21 { 2342 1.1 bjh21 flag aSign; 2343 1.1 bjh21 int16 aExp, shiftCount; 2344 1.1 bjh21 bits64 aSig; 2345 1.1 bjh21 2346 1.1 bjh21 aSig = extractFloat64Frac( a ); 2347 1.1 bjh21 aExp = extractFloat64Exp( a ); 2348 1.1 bjh21 aSign = extractFloat64Sign( a ); 2349 1.1 bjh21 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2350 1.1 bjh21 if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2351 1.1 bjh21 shiftCount = 0x42C - aExp; 2352 1.1 bjh21 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 2353 1.1 bjh21 return roundAndPackInt32( aSign, aSig ); 2354 1.1 bjh21 2355 1.1 bjh21 } 2356 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 2357 1.1 bjh21 2358 1.1 bjh21 /* 2359 1.1 bjh21 ------------------------------------------------------------------------------- 2360 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2361 1.1 bjh21 `a' to the 32-bit two's complement integer format. The conversion is 2362 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2363 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 2364 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 2365 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 2366 1.1 bjh21 returned. 2367 1.1 bjh21 ------------------------------------------------------------------------------- 2368 1.1 bjh21 */ 2369 1.1 bjh21 int32 float64_to_int32_round_to_zero( float64 a ) 2370 1.1 bjh21 { 2371 1.1 bjh21 flag aSign; 2372 1.1 bjh21 int16 aExp, shiftCount; 2373 1.1 bjh21 bits64 aSig, savedASig; 2374 1.1 bjh21 int32 z; 2375 1.1 bjh21 2376 1.1 bjh21 aSig = extractFloat64Frac( a ); 2377 1.1 bjh21 aExp = extractFloat64Exp( a ); 2378 1.1 bjh21 aSign = extractFloat64Sign( a ); 2379 1.1 bjh21 if ( 0x41E < aExp ) { 2380 1.1 bjh21 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2381 1.1 bjh21 goto invalid; 2382 1.1 bjh21 } 2383 1.1 bjh21 else if ( aExp < 0x3FF ) { 2384 1.12 matt if ( aExp || aSig ) set_float_exception_inexact_flag(); 2385 1.1 bjh21 return 0; 2386 1.1 bjh21 } 2387 1.1 bjh21 aSig |= LIT64( 0x0010000000000000 ); 2388 1.1 bjh21 shiftCount = 0x433 - aExp; 2389 1.1 bjh21 savedASig = aSig; 2390 1.1 bjh21 aSig >>= shiftCount; 2391 1.10 christos z = (int32)aSig; 2392 1.1 bjh21 if ( aSign ) z = - z; 2393 1.1 bjh21 if ( ( z < 0 ) ^ aSign ) { 2394 1.1 bjh21 invalid: 2395 1.1 bjh21 float_raise( float_flag_invalid ); 2396 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 2397 1.1 bjh21 } 2398 1.1 bjh21 if ( ( aSig<<shiftCount ) != savedASig ) { 2399 1.12 matt set_float_exception_inexact_flag(); 2400 1.1 bjh21 } 2401 1.1 bjh21 return z; 2402 1.1 bjh21 2403 1.1 bjh21 } 2404 1.1 bjh21 2405 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2406 1.1 bjh21 /* 2407 1.1 bjh21 ------------------------------------------------------------------------------- 2408 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2409 1.1 bjh21 `a' to the 64-bit two's complement integer format. The conversion is 2410 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2411 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 2412 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 2413 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 2414 1.1 bjh21 largest integer with the same sign as `a' is returned. 2415 1.1 bjh21 ------------------------------------------------------------------------------- 2416 1.1 bjh21 */ 2417 1.1 bjh21 int64 float64_to_int64( float64 a ) 2418 1.1 bjh21 { 2419 1.1 bjh21 flag aSign; 2420 1.1 bjh21 int16 aExp, shiftCount; 2421 1.1 bjh21 bits64 aSig, aSigExtra; 2422 1.1 bjh21 2423 1.1 bjh21 aSig = extractFloat64Frac( a ); 2424 1.1 bjh21 aExp = extractFloat64Exp( a ); 2425 1.1 bjh21 aSign = extractFloat64Sign( a ); 2426 1.1 bjh21 if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2427 1.1 bjh21 shiftCount = 0x433 - aExp; 2428 1.1 bjh21 if ( shiftCount <= 0 ) { 2429 1.1 bjh21 if ( 0x43E < aExp ) { 2430 1.1 bjh21 float_raise( float_flag_invalid ); 2431 1.1 bjh21 if ( ! aSign 2432 1.1 bjh21 || ( ( aExp == 0x7FF ) 2433 1.1 bjh21 && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2434 1.1 bjh21 ) { 2435 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 2436 1.1 bjh21 } 2437 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 2438 1.1 bjh21 } 2439 1.1 bjh21 aSigExtra = 0; 2440 1.1 bjh21 aSig <<= - shiftCount; 2441 1.1 bjh21 } 2442 1.1 bjh21 else { 2443 1.1 bjh21 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 2444 1.1 bjh21 } 2445 1.1 bjh21 return roundAndPackInt64( aSign, aSig, aSigExtra ); 2446 1.1 bjh21 2447 1.1 bjh21 } 2448 1.1 bjh21 2449 1.1 bjh21 /* 2450 1.1 bjh21 ------------------------------------------------------------------------------- 2451 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2452 1.1 bjh21 `a' to the 64-bit two's complement integer format. The conversion is 2453 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2454 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 2455 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 2456 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 2457 1.1 bjh21 returned. 2458 1.1 bjh21 ------------------------------------------------------------------------------- 2459 1.1 bjh21 */ 2460 1.1 bjh21 int64 float64_to_int64_round_to_zero( float64 a ) 2461 1.1 bjh21 { 2462 1.1 bjh21 flag aSign; 2463 1.1 bjh21 int16 aExp, shiftCount; 2464 1.1 bjh21 bits64 aSig; 2465 1.1 bjh21 int64 z; 2466 1.1 bjh21 2467 1.1 bjh21 aSig = extractFloat64Frac( a ); 2468 1.1 bjh21 aExp = extractFloat64Exp( a ); 2469 1.1 bjh21 aSign = extractFloat64Sign( a ); 2470 1.1 bjh21 if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2471 1.1 bjh21 shiftCount = aExp - 0x433; 2472 1.1 bjh21 if ( 0 <= shiftCount ) { 2473 1.1 bjh21 if ( 0x43E <= aExp ) { 2474 1.1 bjh21 if ( a != LIT64( 0xC3E0000000000000 ) ) { 2475 1.1 bjh21 float_raise( float_flag_invalid ); 2476 1.1 bjh21 if ( ! aSign 2477 1.1 bjh21 || ( ( aExp == 0x7FF ) 2478 1.1 bjh21 && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2479 1.1 bjh21 ) { 2480 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 2481 1.1 bjh21 } 2482 1.1 bjh21 } 2483 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 2484 1.1 bjh21 } 2485 1.1 bjh21 z = aSig<<shiftCount; 2486 1.1 bjh21 } 2487 1.1 bjh21 else { 2488 1.1 bjh21 if ( aExp < 0x3FE ) { 2489 1.12 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 2490 1.1 bjh21 return 0; 2491 1.1 bjh21 } 2492 1.1 bjh21 z = aSig>>( - shiftCount ); 2493 1.1 bjh21 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 2494 1.12 matt set_float_exception_inexact_flag(); 2495 1.1 bjh21 } 2496 1.1 bjh21 } 2497 1.1 bjh21 if ( aSign ) z = - z; 2498 1.1 bjh21 return z; 2499 1.1 bjh21 2500 1.1 bjh21 } 2501 1.1 bjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 2502 1.1 bjh21 2503 1.1 bjh21 /* 2504 1.1 bjh21 ------------------------------------------------------------------------------- 2505 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2506 1.1 bjh21 `a' to the single-precision floating-point format. The conversion is 2507 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2508 1.1 bjh21 Arithmetic. 2509 1.1 bjh21 ------------------------------------------------------------------------------- 2510 1.1 bjh21 */ 2511 1.1 bjh21 float32 float64_to_float32( float64 a ) 2512 1.1 bjh21 { 2513 1.1 bjh21 flag aSign; 2514 1.1 bjh21 int16 aExp; 2515 1.1 bjh21 bits64 aSig; 2516 1.1 bjh21 bits32 zSig; 2517 1.1 bjh21 2518 1.1 bjh21 aSig = extractFloat64Frac( a ); 2519 1.1 bjh21 aExp = extractFloat64Exp( a ); 2520 1.1 bjh21 aSign = extractFloat64Sign( a ); 2521 1.1 bjh21 if ( aExp == 0x7FF ) { 2522 1.1 bjh21 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); 2523 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 ); 2524 1.1 bjh21 } 2525 1.1 bjh21 shift64RightJamming( aSig, 22, &aSig ); 2526 1.10 christos zSig = (bits32)aSig; 2527 1.1 bjh21 if ( aExp || zSig ) { 2528 1.1 bjh21 zSig |= 0x40000000; 2529 1.1 bjh21 aExp -= 0x381; 2530 1.1 bjh21 } 2531 1.1 bjh21 return roundAndPackFloat32( aSign, aExp, zSig ); 2532 1.1 bjh21 2533 1.1 bjh21 } 2534 1.1 bjh21 2535 1.1 bjh21 #ifdef FLOATX80 2536 1.1 bjh21 2537 1.1 bjh21 /* 2538 1.1 bjh21 ------------------------------------------------------------------------------- 2539 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2540 1.1 bjh21 `a' to the extended double-precision floating-point format. The conversion 2541 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2542 1.1 bjh21 Arithmetic. 2543 1.1 bjh21 ------------------------------------------------------------------------------- 2544 1.1 bjh21 */ 2545 1.1 bjh21 floatx80 float64_to_floatx80( float64 a ) 2546 1.1 bjh21 { 2547 1.1 bjh21 flag aSign; 2548 1.1 bjh21 int16 aExp; 2549 1.1 bjh21 bits64 aSig; 2550 1.1 bjh21 2551 1.1 bjh21 aSig = extractFloat64Frac( a ); 2552 1.1 bjh21 aExp = extractFloat64Exp( a ); 2553 1.1 bjh21 aSign = extractFloat64Sign( a ); 2554 1.1 bjh21 if ( aExp == 0x7FF ) { 2555 1.1 bjh21 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); 2556 1.1 bjh21 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2557 1.1 bjh21 } 2558 1.1 bjh21 if ( aExp == 0 ) { 2559 1.1 bjh21 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 2560 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2561 1.1 bjh21 } 2562 1.1 bjh21 return 2563 1.1 bjh21 packFloatx80( 2564 1.1 bjh21 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); 2565 1.1 bjh21 2566 1.1 bjh21 } 2567 1.1 bjh21 2568 1.1 bjh21 #endif 2569 1.1 bjh21 2570 1.1 bjh21 #ifdef FLOAT128 2571 1.1 bjh21 2572 1.1 bjh21 /* 2573 1.1 bjh21 ------------------------------------------------------------------------------- 2574 1.1 bjh21 Returns the result of converting the double-precision floating-point value 2575 1.1 bjh21 `a' to the quadruple-precision floating-point format. The conversion is 2576 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 2577 1.1 bjh21 Arithmetic. 2578 1.1 bjh21 ------------------------------------------------------------------------------- 2579 1.1 bjh21 */ 2580 1.1 bjh21 float128 float64_to_float128( float64 a ) 2581 1.1 bjh21 { 2582 1.1 bjh21 flag aSign; 2583 1.1 bjh21 int16 aExp; 2584 1.1 bjh21 bits64 aSig, zSig0, zSig1; 2585 1.1 bjh21 2586 1.1 bjh21 aSig = extractFloat64Frac( a ); 2587 1.1 bjh21 aExp = extractFloat64Exp( a ); 2588 1.1 bjh21 aSign = extractFloat64Sign( a ); 2589 1.1 bjh21 if ( aExp == 0x7FF ) { 2590 1.1 bjh21 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); 2591 1.1 bjh21 return packFloat128( aSign, 0x7FFF, 0, 0 ); 2592 1.1 bjh21 } 2593 1.1 bjh21 if ( aExp == 0 ) { 2594 1.1 bjh21 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 2595 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2596 1.1 bjh21 --aExp; 2597 1.1 bjh21 } 2598 1.1 bjh21 shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); 2599 1.1 bjh21 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); 2600 1.1 bjh21 2601 1.1 bjh21 } 2602 1.1 bjh21 2603 1.1 bjh21 #endif 2604 1.1 bjh21 2605 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 2606 1.1 bjh21 /* 2607 1.1 bjh21 ------------------------------------------------------------------------------- 2608 1.1 bjh21 Rounds the double-precision floating-point value `a' to an integer, and 2609 1.1 bjh21 returns the result as a double-precision floating-point value. The 2610 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary 2611 1.1 bjh21 Floating-Point Arithmetic. 2612 1.1 bjh21 ------------------------------------------------------------------------------- 2613 1.1 bjh21 */ 2614 1.1 bjh21 float64 float64_round_to_int( float64 a ) 2615 1.1 bjh21 { 2616 1.1 bjh21 flag aSign; 2617 1.1 bjh21 int16 aExp; 2618 1.1 bjh21 bits64 lastBitMask, roundBitsMask; 2619 1.1 bjh21 int8 roundingMode; 2620 1.1 bjh21 float64 z; 2621 1.1 bjh21 2622 1.1 bjh21 aExp = extractFloat64Exp( a ); 2623 1.1 bjh21 if ( 0x433 <= aExp ) { 2624 1.1 bjh21 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { 2625 1.1 bjh21 return propagateFloat64NaN( a, a ); 2626 1.1 bjh21 } 2627 1.1 bjh21 return a; 2628 1.1 bjh21 } 2629 1.1 bjh21 if ( aExp < 0x3FF ) { 2630 1.1 bjh21 if ( (bits64) ( a<<1 ) == 0 ) return a; 2631 1.12 matt set_float_exception_inexact_flag(); 2632 1.1 bjh21 aSign = extractFloat64Sign( a ); 2633 1.1 bjh21 switch ( float_rounding_mode ) { 2634 1.1 bjh21 case float_round_nearest_even: 2635 1.1 bjh21 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { 2636 1.1 bjh21 return packFloat64( aSign, 0x3FF, 0 ); 2637 1.1 bjh21 } 2638 1.1 bjh21 break; 2639 1.1 bjh21 case float_round_to_zero: 2640 1.1 bjh21 break; 2641 1.1 bjh21 case float_round_down: 2642 1.1 bjh21 return aSign ? LIT64( 0xBFF0000000000000 ) : 0; 2643 1.1 bjh21 case float_round_up: 2644 1.1 bjh21 return 2645 1.1 bjh21 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); 2646 1.1 bjh21 } 2647 1.1 bjh21 return packFloat64( aSign, 0, 0 ); 2648 1.1 bjh21 } 2649 1.1 bjh21 lastBitMask = 1; 2650 1.1 bjh21 lastBitMask <<= 0x433 - aExp; 2651 1.1 bjh21 roundBitsMask = lastBitMask - 1; 2652 1.1 bjh21 z = a; 2653 1.1 bjh21 roundingMode = float_rounding_mode; 2654 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 2655 1.1 bjh21 z += lastBitMask>>1; 2656 1.1 bjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 2657 1.1 bjh21 } 2658 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 2659 1.1 bjh21 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { 2660 1.1 bjh21 z += roundBitsMask; 2661 1.1 bjh21 } 2662 1.1 bjh21 } 2663 1.1 bjh21 z &= ~ roundBitsMask; 2664 1.12 matt if ( z != a ) set_float_exception_inexact_flag(); 2665 1.1 bjh21 return z; 2666 1.1 bjh21 2667 1.1 bjh21 } 2668 1.1 bjh21 #endif 2669 1.1 bjh21 2670 1.1 bjh21 /* 2671 1.1 bjh21 ------------------------------------------------------------------------------- 2672 1.1 bjh21 Returns the result of adding the absolute values of the double-precision 2673 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 2674 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN. 2675 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 2676 1.1 bjh21 Floating-Point Arithmetic. 2677 1.1 bjh21 ------------------------------------------------------------------------------- 2678 1.1 bjh21 */ 2679 1.1 bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 2680 1.1 bjh21 { 2681 1.1 bjh21 int16 aExp, bExp, zExp; 2682 1.1 bjh21 bits64 aSig, bSig, zSig; 2683 1.1 bjh21 int16 expDiff; 2684 1.1 bjh21 2685 1.1 bjh21 aSig = extractFloat64Frac( a ); 2686 1.1 bjh21 aExp = extractFloat64Exp( a ); 2687 1.1 bjh21 bSig = extractFloat64Frac( b ); 2688 1.1 bjh21 bExp = extractFloat64Exp( b ); 2689 1.1 bjh21 expDiff = aExp - bExp; 2690 1.1 bjh21 aSig <<= 9; 2691 1.1 bjh21 bSig <<= 9; 2692 1.1 bjh21 if ( 0 < expDiff ) { 2693 1.1 bjh21 if ( aExp == 0x7FF ) { 2694 1.1 bjh21 if ( aSig ) return propagateFloat64NaN( a, b ); 2695 1.1 bjh21 return a; 2696 1.1 bjh21 } 2697 1.1 bjh21 if ( bExp == 0 ) { 2698 1.1 bjh21 --expDiff; 2699 1.1 bjh21 } 2700 1.1 bjh21 else { 2701 1.1 bjh21 bSig |= LIT64( 0x2000000000000000 ); 2702 1.1 bjh21 } 2703 1.1 bjh21 shift64RightJamming( bSig, expDiff, &bSig ); 2704 1.1 bjh21 zExp = aExp; 2705 1.1 bjh21 } 2706 1.1 bjh21 else if ( expDiff < 0 ) { 2707 1.1 bjh21 if ( bExp == 0x7FF ) { 2708 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 2709 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0 ); 2710 1.1 bjh21 } 2711 1.1 bjh21 if ( aExp == 0 ) { 2712 1.1 bjh21 ++expDiff; 2713 1.1 bjh21 } 2714 1.1 bjh21 else { 2715 1.1 bjh21 aSig |= LIT64( 0x2000000000000000 ); 2716 1.1 bjh21 } 2717 1.1 bjh21 shift64RightJamming( aSig, - expDiff, &aSig ); 2718 1.1 bjh21 zExp = bExp; 2719 1.1 bjh21 } 2720 1.1 bjh21 else { 2721 1.1 bjh21 if ( aExp == 0x7FF ) { 2722 1.1 bjh21 if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2723 1.1 bjh21 return a; 2724 1.1 bjh21 } 2725 1.1 bjh21 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); 2726 1.1 bjh21 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; 2727 1.1 bjh21 zExp = aExp; 2728 1.1 bjh21 goto roundAndPack; 2729 1.1 bjh21 } 2730 1.1 bjh21 aSig |= LIT64( 0x2000000000000000 ); 2731 1.1 bjh21 zSig = ( aSig + bSig )<<1; 2732 1.1 bjh21 --zExp; 2733 1.1 bjh21 if ( (sbits64) zSig < 0 ) { 2734 1.1 bjh21 zSig = aSig + bSig; 2735 1.1 bjh21 ++zExp; 2736 1.1 bjh21 } 2737 1.1 bjh21 roundAndPack: 2738 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig ); 2739 1.1 bjh21 2740 1.1 bjh21 } 2741 1.1 bjh21 2742 1.1 bjh21 /* 2743 1.1 bjh21 ------------------------------------------------------------------------------- 2744 1.1 bjh21 Returns the result of subtracting the absolute values of the double- 2745 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 2746 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 2747 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 2748 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 2749 1.1 bjh21 ------------------------------------------------------------------------------- 2750 1.1 bjh21 */ 2751 1.1 bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 2752 1.1 bjh21 { 2753 1.1 bjh21 int16 aExp, bExp, zExp; 2754 1.1 bjh21 bits64 aSig, bSig, zSig; 2755 1.1 bjh21 int16 expDiff; 2756 1.1 bjh21 2757 1.1 bjh21 aSig = extractFloat64Frac( a ); 2758 1.1 bjh21 aExp = extractFloat64Exp( a ); 2759 1.1 bjh21 bSig = extractFloat64Frac( b ); 2760 1.1 bjh21 bExp = extractFloat64Exp( b ); 2761 1.1 bjh21 expDiff = aExp - bExp; 2762 1.1 bjh21 aSig <<= 10; 2763 1.1 bjh21 bSig <<= 10; 2764 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 2765 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 2766 1.1 bjh21 if ( aExp == 0x7FF ) { 2767 1.1 bjh21 if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2768 1.1 bjh21 float_raise( float_flag_invalid ); 2769 1.1 bjh21 return float64_default_nan; 2770 1.1 bjh21 } 2771 1.1 bjh21 if ( aExp == 0 ) { 2772 1.1 bjh21 aExp = 1; 2773 1.1 bjh21 bExp = 1; 2774 1.1 bjh21 } 2775 1.1 bjh21 if ( bSig < aSig ) goto aBigger; 2776 1.1 bjh21 if ( aSig < bSig ) goto bBigger; 2777 1.1 bjh21 return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); 2778 1.1 bjh21 bExpBigger: 2779 1.1 bjh21 if ( bExp == 0x7FF ) { 2780 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 2781 1.1 bjh21 return packFloat64( zSign ^ 1, 0x7FF, 0 ); 2782 1.1 bjh21 } 2783 1.1 bjh21 if ( aExp == 0 ) { 2784 1.1 bjh21 ++expDiff; 2785 1.1 bjh21 } 2786 1.1 bjh21 else { 2787 1.1 bjh21 aSig |= LIT64( 0x4000000000000000 ); 2788 1.1 bjh21 } 2789 1.1 bjh21 shift64RightJamming( aSig, - expDiff, &aSig ); 2790 1.1 bjh21 bSig |= LIT64( 0x4000000000000000 ); 2791 1.1 bjh21 bBigger: 2792 1.1 bjh21 zSig = bSig - aSig; 2793 1.1 bjh21 zExp = bExp; 2794 1.1 bjh21 zSign ^= 1; 2795 1.1 bjh21 goto normalizeRoundAndPack; 2796 1.1 bjh21 aExpBigger: 2797 1.1 bjh21 if ( aExp == 0x7FF ) { 2798 1.1 bjh21 if ( aSig ) return propagateFloat64NaN( a, b ); 2799 1.1 bjh21 return a; 2800 1.1 bjh21 } 2801 1.1 bjh21 if ( bExp == 0 ) { 2802 1.1 bjh21 --expDiff; 2803 1.1 bjh21 } 2804 1.1 bjh21 else { 2805 1.1 bjh21 bSig |= LIT64( 0x4000000000000000 ); 2806 1.1 bjh21 } 2807 1.1 bjh21 shift64RightJamming( bSig, expDiff, &bSig ); 2808 1.1 bjh21 aSig |= LIT64( 0x4000000000000000 ); 2809 1.1 bjh21 aBigger: 2810 1.1 bjh21 zSig = aSig - bSig; 2811 1.1 bjh21 zExp = aExp; 2812 1.1 bjh21 normalizeRoundAndPack: 2813 1.1 bjh21 --zExp; 2814 1.1 bjh21 return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); 2815 1.1 bjh21 2816 1.1 bjh21 } 2817 1.1 bjh21 2818 1.1 bjh21 /* 2819 1.1 bjh21 ------------------------------------------------------------------------------- 2820 1.1 bjh21 Returns the result of adding the double-precision floating-point values `a' 2821 1.1 bjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 2822 1.1 bjh21 Binary Floating-Point Arithmetic. 2823 1.1 bjh21 ------------------------------------------------------------------------------- 2824 1.1 bjh21 */ 2825 1.1 bjh21 float64 float64_add( float64 a, float64 b ) 2826 1.1 bjh21 { 2827 1.1 bjh21 flag aSign, bSign; 2828 1.1 bjh21 2829 1.1 bjh21 aSign = extractFloat64Sign( a ); 2830 1.1 bjh21 bSign = extractFloat64Sign( b ); 2831 1.1 bjh21 if ( aSign == bSign ) { 2832 1.1 bjh21 return addFloat64Sigs( a, b, aSign ); 2833 1.1 bjh21 } 2834 1.1 bjh21 else { 2835 1.1 bjh21 return subFloat64Sigs( a, b, aSign ); 2836 1.1 bjh21 } 2837 1.1 bjh21 2838 1.1 bjh21 } 2839 1.1 bjh21 2840 1.1 bjh21 /* 2841 1.1 bjh21 ------------------------------------------------------------------------------- 2842 1.1 bjh21 Returns the result of subtracting the double-precision floating-point values 2843 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 2844 1.1 bjh21 for Binary Floating-Point Arithmetic. 2845 1.1 bjh21 ------------------------------------------------------------------------------- 2846 1.1 bjh21 */ 2847 1.1 bjh21 float64 float64_sub( float64 a, float64 b ) 2848 1.1 bjh21 { 2849 1.1 bjh21 flag aSign, bSign; 2850 1.1 bjh21 2851 1.1 bjh21 aSign = extractFloat64Sign( a ); 2852 1.1 bjh21 bSign = extractFloat64Sign( b ); 2853 1.1 bjh21 if ( aSign == bSign ) { 2854 1.1 bjh21 return subFloat64Sigs( a, b, aSign ); 2855 1.1 bjh21 } 2856 1.1 bjh21 else { 2857 1.1 bjh21 return addFloat64Sigs( a, b, aSign ); 2858 1.1 bjh21 } 2859 1.1 bjh21 2860 1.1 bjh21 } 2861 1.1 bjh21 2862 1.1 bjh21 /* 2863 1.1 bjh21 ------------------------------------------------------------------------------- 2864 1.1 bjh21 Returns the result of multiplying the double-precision floating-point values 2865 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 2866 1.1 bjh21 for Binary Floating-Point Arithmetic. 2867 1.1 bjh21 ------------------------------------------------------------------------------- 2868 1.1 bjh21 */ 2869 1.1 bjh21 float64 float64_mul( float64 a, float64 b ) 2870 1.1 bjh21 { 2871 1.1 bjh21 flag aSign, bSign, zSign; 2872 1.1 bjh21 int16 aExp, bExp, zExp; 2873 1.1 bjh21 bits64 aSig, bSig, zSig0, zSig1; 2874 1.1 bjh21 2875 1.1 bjh21 aSig = extractFloat64Frac( a ); 2876 1.1 bjh21 aExp = extractFloat64Exp( a ); 2877 1.1 bjh21 aSign = extractFloat64Sign( a ); 2878 1.1 bjh21 bSig = extractFloat64Frac( b ); 2879 1.1 bjh21 bExp = extractFloat64Exp( b ); 2880 1.1 bjh21 bSign = extractFloat64Sign( b ); 2881 1.1 bjh21 zSign = aSign ^ bSign; 2882 1.1 bjh21 if ( aExp == 0x7FF ) { 2883 1.1 bjh21 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 2884 1.1 bjh21 return propagateFloat64NaN( a, b ); 2885 1.1 bjh21 } 2886 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) { 2887 1.1 bjh21 float_raise( float_flag_invalid ); 2888 1.1 bjh21 return float64_default_nan; 2889 1.1 bjh21 } 2890 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0 ); 2891 1.1 bjh21 } 2892 1.1 bjh21 if ( bExp == 0x7FF ) { 2893 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 2894 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 2895 1.1 bjh21 float_raise( float_flag_invalid ); 2896 1.1 bjh21 return float64_default_nan; 2897 1.1 bjh21 } 2898 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0 ); 2899 1.1 bjh21 } 2900 1.1 bjh21 if ( aExp == 0 ) { 2901 1.1 bjh21 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2902 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2903 1.1 bjh21 } 2904 1.1 bjh21 if ( bExp == 0 ) { 2905 1.1 bjh21 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); 2906 1.1 bjh21 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2907 1.1 bjh21 } 2908 1.1 bjh21 zExp = aExp + bExp - 0x3FF; 2909 1.1 bjh21 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2910 1.1 bjh21 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2911 1.1 bjh21 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 2912 1.1 bjh21 zSig0 |= ( zSig1 != 0 ); 2913 1.1 bjh21 if ( 0 <= (sbits64) ( zSig0<<1 ) ) { 2914 1.1 bjh21 zSig0 <<= 1; 2915 1.1 bjh21 --zExp; 2916 1.1 bjh21 } 2917 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig0 ); 2918 1.1 bjh21 2919 1.1 bjh21 } 2920 1.1 bjh21 2921 1.1 bjh21 /* 2922 1.1 bjh21 ------------------------------------------------------------------------------- 2923 1.1 bjh21 Returns the result of dividing the double-precision floating-point value `a' 2924 1.1 bjh21 by the corresponding value `b'. The operation is performed according to 2925 1.1 bjh21 the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2926 1.1 bjh21 ------------------------------------------------------------------------------- 2927 1.1 bjh21 */ 2928 1.1 bjh21 float64 float64_div( float64 a, float64 b ) 2929 1.1 bjh21 { 2930 1.1 bjh21 flag aSign, bSign, zSign; 2931 1.1 bjh21 int16 aExp, bExp, zExp; 2932 1.1 bjh21 bits64 aSig, bSig, zSig; 2933 1.1 bjh21 bits64 rem0, rem1; 2934 1.1 bjh21 bits64 term0, term1; 2935 1.1 bjh21 2936 1.1 bjh21 aSig = extractFloat64Frac( a ); 2937 1.1 bjh21 aExp = extractFloat64Exp( a ); 2938 1.1 bjh21 aSign = extractFloat64Sign( a ); 2939 1.1 bjh21 bSig = extractFloat64Frac( b ); 2940 1.1 bjh21 bExp = extractFloat64Exp( b ); 2941 1.1 bjh21 bSign = extractFloat64Sign( b ); 2942 1.1 bjh21 zSign = aSign ^ bSign; 2943 1.1 bjh21 if ( aExp == 0x7FF ) { 2944 1.1 bjh21 if ( aSig ) return propagateFloat64NaN( a, b ); 2945 1.1 bjh21 if ( bExp == 0x7FF ) { 2946 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 2947 1.1 bjh21 float_raise( float_flag_invalid ); 2948 1.1 bjh21 return float64_default_nan; 2949 1.1 bjh21 } 2950 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0 ); 2951 1.1 bjh21 } 2952 1.1 bjh21 if ( bExp == 0x7FF ) { 2953 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 2954 1.1 bjh21 return packFloat64( zSign, 0, 0 ); 2955 1.1 bjh21 } 2956 1.1 bjh21 if ( bExp == 0 ) { 2957 1.1 bjh21 if ( bSig == 0 ) { 2958 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 2959 1.1 bjh21 float_raise( float_flag_invalid ); 2960 1.1 bjh21 return float64_default_nan; 2961 1.1 bjh21 } 2962 1.1 bjh21 float_raise( float_flag_divbyzero ); 2963 1.1 bjh21 return packFloat64( zSign, 0x7FF, 0 ); 2964 1.1 bjh21 } 2965 1.1 bjh21 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2966 1.1 bjh21 } 2967 1.1 bjh21 if ( aExp == 0 ) { 2968 1.1 bjh21 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2969 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2970 1.1 bjh21 } 2971 1.1 bjh21 zExp = aExp - bExp + 0x3FD; 2972 1.1 bjh21 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2973 1.1 bjh21 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2974 1.1 bjh21 if ( bSig <= ( aSig + aSig ) ) { 2975 1.1 bjh21 aSig >>= 1; 2976 1.1 bjh21 ++zExp; 2977 1.1 bjh21 } 2978 1.1 bjh21 zSig = estimateDiv128To64( aSig, 0, bSig ); 2979 1.1 bjh21 if ( ( zSig & 0x1FF ) <= 2 ) { 2980 1.1 bjh21 mul64To128( bSig, zSig, &term0, &term1 ); 2981 1.1 bjh21 sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 2982 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 2983 1.1 bjh21 --zSig; 2984 1.1 bjh21 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 2985 1.1 bjh21 } 2986 1.1 bjh21 zSig |= ( rem1 != 0 ); 2987 1.1 bjh21 } 2988 1.1 bjh21 return roundAndPackFloat64( zSign, zExp, zSig ); 2989 1.1 bjh21 2990 1.1 bjh21 } 2991 1.1 bjh21 2992 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 2993 1.1 bjh21 /* 2994 1.1 bjh21 ------------------------------------------------------------------------------- 2995 1.1 bjh21 Returns the remainder of the double-precision floating-point value `a' 2996 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed 2997 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2998 1.1 bjh21 ------------------------------------------------------------------------------- 2999 1.1 bjh21 */ 3000 1.1 bjh21 float64 float64_rem( float64 a, float64 b ) 3001 1.1 bjh21 { 3002 1.1 bjh21 flag aSign, bSign, zSign; 3003 1.1 bjh21 int16 aExp, bExp, expDiff; 3004 1.1 bjh21 bits64 aSig, bSig; 3005 1.1 bjh21 bits64 q, alternateASig; 3006 1.1 bjh21 sbits64 sigMean; 3007 1.1 bjh21 3008 1.1 bjh21 aSig = extractFloat64Frac( a ); 3009 1.1 bjh21 aExp = extractFloat64Exp( a ); 3010 1.1 bjh21 aSign = extractFloat64Sign( a ); 3011 1.1 bjh21 bSig = extractFloat64Frac( b ); 3012 1.1 bjh21 bExp = extractFloat64Exp( b ); 3013 1.1 bjh21 bSign = extractFloat64Sign( b ); 3014 1.1 bjh21 if ( aExp == 0x7FF ) { 3015 1.1 bjh21 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 3016 1.1 bjh21 return propagateFloat64NaN( a, b ); 3017 1.1 bjh21 } 3018 1.1 bjh21 float_raise( float_flag_invalid ); 3019 1.1 bjh21 return float64_default_nan; 3020 1.1 bjh21 } 3021 1.1 bjh21 if ( bExp == 0x7FF ) { 3022 1.1 bjh21 if ( bSig ) return propagateFloat64NaN( a, b ); 3023 1.1 bjh21 return a; 3024 1.1 bjh21 } 3025 1.1 bjh21 if ( bExp == 0 ) { 3026 1.1 bjh21 if ( bSig == 0 ) { 3027 1.1 bjh21 float_raise( float_flag_invalid ); 3028 1.1 bjh21 return float64_default_nan; 3029 1.1 bjh21 } 3030 1.1 bjh21 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 3031 1.1 bjh21 } 3032 1.1 bjh21 if ( aExp == 0 ) { 3033 1.1 bjh21 if ( aSig == 0 ) return a; 3034 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3035 1.1 bjh21 } 3036 1.1 bjh21 expDiff = aExp - bExp; 3037 1.1 bjh21 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; 3038 1.1 bjh21 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 3039 1.1 bjh21 if ( expDiff < 0 ) { 3040 1.1 bjh21 if ( expDiff < -1 ) return a; 3041 1.1 bjh21 aSig >>= 1; 3042 1.1 bjh21 } 3043 1.1 bjh21 q = ( bSig <= aSig ); 3044 1.1 bjh21 if ( q ) aSig -= bSig; 3045 1.1 bjh21 expDiff -= 64; 3046 1.1 bjh21 while ( 0 < expDiff ) { 3047 1.1 bjh21 q = estimateDiv128To64( aSig, 0, bSig ); 3048 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 3049 1.1 bjh21 aSig = - ( ( bSig>>2 ) * q ); 3050 1.1 bjh21 expDiff -= 62; 3051 1.1 bjh21 } 3052 1.1 bjh21 expDiff += 64; 3053 1.1 bjh21 if ( 0 < expDiff ) { 3054 1.1 bjh21 q = estimateDiv128To64( aSig, 0, bSig ); 3055 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 3056 1.1 bjh21 q >>= 64 - expDiff; 3057 1.1 bjh21 bSig >>= 2; 3058 1.1 bjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 3059 1.1 bjh21 } 3060 1.1 bjh21 else { 3061 1.1 bjh21 aSig >>= 2; 3062 1.1 bjh21 bSig >>= 2; 3063 1.1 bjh21 } 3064 1.1 bjh21 do { 3065 1.1 bjh21 alternateASig = aSig; 3066 1.1 bjh21 ++q; 3067 1.1 bjh21 aSig -= bSig; 3068 1.1 bjh21 } while ( 0 <= (sbits64) aSig ); 3069 1.1 bjh21 sigMean = aSig + alternateASig; 3070 1.1 bjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 3071 1.1 bjh21 aSig = alternateASig; 3072 1.1 bjh21 } 3073 1.1 bjh21 zSign = ( (sbits64) aSig < 0 ); 3074 1.1 bjh21 if ( zSign ) aSig = - aSig; 3075 1.1 bjh21 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); 3076 1.1 bjh21 3077 1.1 bjh21 } 3078 1.1 bjh21 3079 1.1 bjh21 /* 3080 1.1 bjh21 ------------------------------------------------------------------------------- 3081 1.1 bjh21 Returns the square root of the double-precision floating-point value `a'. 3082 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary 3083 1.1 bjh21 Floating-Point Arithmetic. 3084 1.1 bjh21 ------------------------------------------------------------------------------- 3085 1.1 bjh21 */ 3086 1.1 bjh21 float64 float64_sqrt( float64 a ) 3087 1.1 bjh21 { 3088 1.1 bjh21 flag aSign; 3089 1.1 bjh21 int16 aExp, zExp; 3090 1.1 bjh21 bits64 aSig, zSig, doubleZSig; 3091 1.1 bjh21 bits64 rem0, rem1, term0, term1; 3092 1.1 bjh21 3093 1.1 bjh21 aSig = extractFloat64Frac( a ); 3094 1.1 bjh21 aExp = extractFloat64Exp( a ); 3095 1.1 bjh21 aSign = extractFloat64Sign( a ); 3096 1.1 bjh21 if ( aExp == 0x7FF ) { 3097 1.1 bjh21 if ( aSig ) return propagateFloat64NaN( a, a ); 3098 1.1 bjh21 if ( ! aSign ) return a; 3099 1.1 bjh21 float_raise( float_flag_invalid ); 3100 1.1 bjh21 return float64_default_nan; 3101 1.1 bjh21 } 3102 1.1 bjh21 if ( aSign ) { 3103 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) return a; 3104 1.1 bjh21 float_raise( float_flag_invalid ); 3105 1.1 bjh21 return float64_default_nan; 3106 1.1 bjh21 } 3107 1.1 bjh21 if ( aExp == 0 ) { 3108 1.1 bjh21 if ( aSig == 0 ) return 0; 3109 1.1 bjh21 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3110 1.1 bjh21 } 3111 1.1 bjh21 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 3112 1.1 bjh21 aSig |= LIT64( 0x0010000000000000 ); 3113 1.1 bjh21 zSig = estimateSqrt32( aExp, aSig>>21 ); 3114 1.1 bjh21 aSig <<= 9 - ( aExp & 1 ); 3115 1.1 bjh21 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); 3116 1.1 bjh21 if ( ( zSig & 0x1FF ) <= 5 ) { 3117 1.1 bjh21 doubleZSig = zSig<<1; 3118 1.1 bjh21 mul64To128( zSig, zSig, &term0, &term1 ); 3119 1.1 bjh21 sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 3120 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 3121 1.1 bjh21 --zSig; 3122 1.1 bjh21 doubleZSig -= 2; 3123 1.1 bjh21 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); 3124 1.1 bjh21 } 3125 1.1 bjh21 zSig |= ( ( rem0 | rem1 ) != 0 ); 3126 1.1 bjh21 } 3127 1.1 bjh21 return roundAndPackFloat64( 0, zExp, zSig ); 3128 1.1 bjh21 3129 1.1 bjh21 } 3130 1.1 bjh21 #endif 3131 1.1 bjh21 3132 1.1 bjh21 /* 3133 1.1 bjh21 ------------------------------------------------------------------------------- 3134 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to the 3135 1.1 bjh21 corresponding value `b', and 0 otherwise. The comparison is performed 3136 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3137 1.1 bjh21 ------------------------------------------------------------------------------- 3138 1.1 bjh21 */ 3139 1.1 bjh21 flag float64_eq( float64 a, float64 b ) 3140 1.1 bjh21 { 3141 1.1 bjh21 3142 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3143 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3144 1.1 bjh21 ) { 3145 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3146 1.1 bjh21 float_raise( float_flag_invalid ); 3147 1.1 bjh21 } 3148 1.1 bjh21 return 0; 3149 1.1 bjh21 } 3150 1.1 bjh21 return ( a == b ) || 3151 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 3152 1.1 bjh21 3153 1.1 bjh21 } 3154 1.1 bjh21 3155 1.1 bjh21 /* 3156 1.1 bjh21 ------------------------------------------------------------------------------- 3157 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than or 3158 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. The comparison is 3159 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 3160 1.1 bjh21 Arithmetic. 3161 1.1 bjh21 ------------------------------------------------------------------------------- 3162 1.1 bjh21 */ 3163 1.1 bjh21 flag float64_le( float64 a, float64 b ) 3164 1.1 bjh21 { 3165 1.1 bjh21 flag aSign, bSign; 3166 1.1 bjh21 3167 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3168 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3169 1.1 bjh21 ) { 3170 1.1 bjh21 float_raise( float_flag_invalid ); 3171 1.1 bjh21 return 0; 3172 1.1 bjh21 } 3173 1.1 bjh21 aSign = extractFloat64Sign( a ); 3174 1.1 bjh21 bSign = extractFloat64Sign( b ); 3175 1.1 bjh21 if ( aSign != bSign ) 3176 1.1 bjh21 return aSign || 3177 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 3178 1.1 bjh21 0 ); 3179 1.1 bjh21 return ( a == b ) || 3180 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3181 1.1 bjh21 3182 1.1 bjh21 } 3183 1.1 bjh21 3184 1.1 bjh21 /* 3185 1.1 bjh21 ------------------------------------------------------------------------------- 3186 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than 3187 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 3188 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3189 1.1 bjh21 ------------------------------------------------------------------------------- 3190 1.1 bjh21 */ 3191 1.1 bjh21 flag float64_lt( float64 a, float64 b ) 3192 1.1 bjh21 { 3193 1.1 bjh21 flag aSign, bSign; 3194 1.1 bjh21 3195 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3196 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3197 1.1 bjh21 ) { 3198 1.1 bjh21 float_raise( float_flag_invalid ); 3199 1.1 bjh21 return 0; 3200 1.1 bjh21 } 3201 1.1 bjh21 aSign = extractFloat64Sign( a ); 3202 1.1 bjh21 bSign = extractFloat64Sign( b ); 3203 1.1 bjh21 if ( aSign != bSign ) 3204 1.1 bjh21 return aSign && 3205 1.1 bjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 3206 1.1 bjh21 0 ); 3207 1.1 bjh21 return ( a != b ) && 3208 1.1 bjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3209 1.1 bjh21 3210 1.1 bjh21 } 3211 1.1 bjh21 3212 1.1 bjh21 #ifndef SOFTFLOAT_FOR_GCC 3213 1.1 bjh21 /* 3214 1.1 bjh21 ------------------------------------------------------------------------------- 3215 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is equal to the 3216 1.1 bjh21 corresponding value `b', and 0 otherwise. The invalid exception is raised 3217 1.1 bjh21 if either operand is a NaN. Otherwise, the comparison is performed 3218 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3219 1.1 bjh21 ------------------------------------------------------------------------------- 3220 1.1 bjh21 */ 3221 1.1 bjh21 flag float64_eq_signaling( float64 a, float64 b ) 3222 1.1 bjh21 { 3223 1.1 bjh21 3224 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3225 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3226 1.1 bjh21 ) { 3227 1.1 bjh21 float_raise( float_flag_invalid ); 3228 1.1 bjh21 return 0; 3229 1.1 bjh21 } 3230 1.1 bjh21 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3231 1.1 bjh21 3232 1.1 bjh21 } 3233 1.1 bjh21 3234 1.1 bjh21 /* 3235 1.1 bjh21 ------------------------------------------------------------------------------- 3236 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than or 3237 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 3238 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the 3239 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3240 1.1 bjh21 ------------------------------------------------------------------------------- 3241 1.1 bjh21 */ 3242 1.1 bjh21 flag float64_le_quiet( float64 a, float64 b ) 3243 1.1 bjh21 { 3244 1.1 bjh21 flag aSign, bSign; 3245 1.1 bjh21 3246 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3247 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3248 1.1 bjh21 ) { 3249 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3250 1.1 bjh21 float_raise( float_flag_invalid ); 3251 1.1 bjh21 } 3252 1.1 bjh21 return 0; 3253 1.1 bjh21 } 3254 1.1 bjh21 aSign = extractFloat64Sign( a ); 3255 1.1 bjh21 bSign = extractFloat64Sign( b ); 3256 1.1 bjh21 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3257 1.1 bjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 3258 1.1 bjh21 3259 1.1 bjh21 } 3260 1.1 bjh21 3261 1.1 bjh21 /* 3262 1.1 bjh21 ------------------------------------------------------------------------------- 3263 1.1 bjh21 Returns 1 if the double-precision floating-point value `a' is less than 3264 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 3265 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 3266 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 3267 1.1 bjh21 ------------------------------------------------------------------------------- 3268 1.1 bjh21 */ 3269 1.1 bjh21 flag float64_lt_quiet( float64 a, float64 b ) 3270 1.1 bjh21 { 3271 1.1 bjh21 flag aSign, bSign; 3272 1.1 bjh21 3273 1.1 bjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3274 1.1 bjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3275 1.1 bjh21 ) { 3276 1.1 bjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3277 1.1 bjh21 float_raise( float_flag_invalid ); 3278 1.1 bjh21 } 3279 1.1 bjh21 return 0; 3280 1.1 bjh21 } 3281 1.1 bjh21 aSign = extractFloat64Sign( a ); 3282 1.1 bjh21 bSign = extractFloat64Sign( b ); 3283 1.1 bjh21 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 3284 1.1 bjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 3285 1.1 bjh21 3286 1.1 bjh21 } 3287 1.1 bjh21 #endif 3288 1.1 bjh21 3289 1.1 bjh21 #ifdef FLOATX80 3290 1.1 bjh21 3291 1.1 bjh21 /* 3292 1.1 bjh21 ------------------------------------------------------------------------------- 3293 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3294 1.1 bjh21 point value `a' to the 32-bit two's complement integer format. The 3295 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3296 1.1 bjh21 Floating-Point Arithmetic---which means in particular that the conversion 3297 1.1 bjh21 is rounded according to the current rounding mode. If `a' is a NaN, the 3298 1.1 bjh21 largest positive integer is returned. Otherwise, if the conversion 3299 1.1 bjh21 overflows, the largest integer with the same sign as `a' is returned. 3300 1.1 bjh21 ------------------------------------------------------------------------------- 3301 1.1 bjh21 */ 3302 1.1 bjh21 int32 floatx80_to_int32( floatx80 a ) 3303 1.1 bjh21 { 3304 1.1 bjh21 flag aSign; 3305 1.1 bjh21 int32 aExp, shiftCount; 3306 1.1 bjh21 bits64 aSig; 3307 1.1 bjh21 3308 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3309 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3310 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3311 1.1 bjh21 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3312 1.1 bjh21 shiftCount = 0x4037 - aExp; 3313 1.1 bjh21 if ( shiftCount <= 0 ) shiftCount = 1; 3314 1.1 bjh21 shift64RightJamming( aSig, shiftCount, &aSig ); 3315 1.1 bjh21 return roundAndPackInt32( aSign, aSig ); 3316 1.1 bjh21 3317 1.1 bjh21 } 3318 1.1 bjh21 3319 1.1 bjh21 /* 3320 1.1 bjh21 ------------------------------------------------------------------------------- 3321 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3322 1.1 bjh21 point value `a' to the 32-bit two's complement integer format. The 3323 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3324 1.1 bjh21 Floating-Point Arithmetic, except that the conversion is always rounded 3325 1.1 bjh21 toward zero. If `a' is a NaN, the largest positive integer is returned. 3326 1.1 bjh21 Otherwise, if the conversion overflows, the largest integer with the same 3327 1.1 bjh21 sign as `a' is returned. 3328 1.1 bjh21 ------------------------------------------------------------------------------- 3329 1.1 bjh21 */ 3330 1.1 bjh21 int32 floatx80_to_int32_round_to_zero( floatx80 a ) 3331 1.1 bjh21 { 3332 1.1 bjh21 flag aSign; 3333 1.1 bjh21 int32 aExp, shiftCount; 3334 1.1 bjh21 bits64 aSig, savedASig; 3335 1.1 bjh21 int32 z; 3336 1.1 bjh21 3337 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3338 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3339 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3340 1.1 bjh21 if ( 0x401E < aExp ) { 3341 1.1 bjh21 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3342 1.1 bjh21 goto invalid; 3343 1.1 bjh21 } 3344 1.1 bjh21 else if ( aExp < 0x3FFF ) { 3345 1.12 matt if ( aExp || aSig ) set_float_exception_inexact_flag(); 3346 1.1 bjh21 return 0; 3347 1.1 bjh21 } 3348 1.1 bjh21 shiftCount = 0x403E - aExp; 3349 1.1 bjh21 savedASig = aSig; 3350 1.1 bjh21 aSig >>= shiftCount; 3351 1.1 bjh21 z = aSig; 3352 1.1 bjh21 if ( aSign ) z = - z; 3353 1.1 bjh21 if ( ( z < 0 ) ^ aSign ) { 3354 1.1 bjh21 invalid: 3355 1.1 bjh21 float_raise( float_flag_invalid ); 3356 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 3357 1.1 bjh21 } 3358 1.1 bjh21 if ( ( aSig<<shiftCount ) != savedASig ) { 3359 1.12 matt set_float_exception_inexact_flag(); 3360 1.1 bjh21 } 3361 1.1 bjh21 return z; 3362 1.1 bjh21 3363 1.1 bjh21 } 3364 1.1 bjh21 3365 1.1 bjh21 /* 3366 1.1 bjh21 ------------------------------------------------------------------------------- 3367 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3368 1.1 bjh21 point value `a' to the 64-bit two's complement integer format. The 3369 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3370 1.1 bjh21 Floating-Point Arithmetic---which means in particular that the conversion 3371 1.1 bjh21 is rounded according to the current rounding mode. If `a' is a NaN, 3372 1.1 bjh21 the largest positive integer is returned. Otherwise, if the conversion 3373 1.1 bjh21 overflows, the largest integer with the same sign as `a' is returned. 3374 1.1 bjh21 ------------------------------------------------------------------------------- 3375 1.1 bjh21 */ 3376 1.1 bjh21 int64 floatx80_to_int64( floatx80 a ) 3377 1.1 bjh21 { 3378 1.1 bjh21 flag aSign; 3379 1.1 bjh21 int32 aExp, shiftCount; 3380 1.1 bjh21 bits64 aSig, aSigExtra; 3381 1.1 bjh21 3382 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3383 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3384 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3385 1.1 bjh21 shiftCount = 0x403E - aExp; 3386 1.1 bjh21 if ( shiftCount <= 0 ) { 3387 1.1 bjh21 if ( shiftCount ) { 3388 1.1 bjh21 float_raise( float_flag_invalid ); 3389 1.1 bjh21 if ( ! aSign 3390 1.1 bjh21 || ( ( aExp == 0x7FFF ) 3391 1.1 bjh21 && ( aSig != LIT64( 0x8000000000000000 ) ) ) 3392 1.1 bjh21 ) { 3393 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 3394 1.1 bjh21 } 3395 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 3396 1.1 bjh21 } 3397 1.1 bjh21 aSigExtra = 0; 3398 1.1 bjh21 } 3399 1.1 bjh21 else { 3400 1.1 bjh21 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 3401 1.1 bjh21 } 3402 1.1 bjh21 return roundAndPackInt64( aSign, aSig, aSigExtra ); 3403 1.1 bjh21 3404 1.1 bjh21 } 3405 1.1 bjh21 3406 1.1 bjh21 /* 3407 1.1 bjh21 ------------------------------------------------------------------------------- 3408 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3409 1.1 bjh21 point value `a' to the 64-bit two's complement integer format. The 3410 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3411 1.1 bjh21 Floating-Point Arithmetic, except that the conversion is always rounded 3412 1.1 bjh21 toward zero. If `a' is a NaN, the largest positive integer is returned. 3413 1.1 bjh21 Otherwise, if the conversion overflows, the largest integer with the same 3414 1.1 bjh21 sign as `a' is returned. 3415 1.1 bjh21 ------------------------------------------------------------------------------- 3416 1.1 bjh21 */ 3417 1.1 bjh21 int64 floatx80_to_int64_round_to_zero( floatx80 a ) 3418 1.1 bjh21 { 3419 1.1 bjh21 flag aSign; 3420 1.1 bjh21 int32 aExp, shiftCount; 3421 1.1 bjh21 bits64 aSig; 3422 1.1 bjh21 int64 z; 3423 1.1 bjh21 3424 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3425 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3426 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3427 1.1 bjh21 shiftCount = aExp - 0x403E; 3428 1.1 bjh21 if ( 0 <= shiftCount ) { 3429 1.1 bjh21 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); 3430 1.16 nat if ( ( (a.high>>X80SHIFT) != 0xC03E ) || aSig ) { 3431 1.1 bjh21 float_raise( float_flag_invalid ); 3432 1.1 bjh21 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 3433 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 3434 1.1 bjh21 } 3435 1.1 bjh21 } 3436 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 3437 1.1 bjh21 } 3438 1.1 bjh21 else if ( aExp < 0x3FFF ) { 3439 1.12 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 3440 1.1 bjh21 return 0; 3441 1.1 bjh21 } 3442 1.1 bjh21 z = aSig>>( - shiftCount ); 3443 1.1 bjh21 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 3444 1.12 matt set_float_exception_inexact_flag(); 3445 1.1 bjh21 } 3446 1.1 bjh21 if ( aSign ) z = - z; 3447 1.1 bjh21 return z; 3448 1.1 bjh21 3449 1.1 bjh21 } 3450 1.1 bjh21 3451 1.1 bjh21 /* 3452 1.1 bjh21 ------------------------------------------------------------------------------- 3453 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3454 1.1 bjh21 point value `a' to the single-precision floating-point format. The 3455 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3456 1.1 bjh21 Floating-Point Arithmetic. 3457 1.1 bjh21 ------------------------------------------------------------------------------- 3458 1.1 bjh21 */ 3459 1.1 bjh21 float32 floatx80_to_float32( floatx80 a ) 3460 1.1 bjh21 { 3461 1.1 bjh21 flag aSign; 3462 1.1 bjh21 int32 aExp; 3463 1.1 bjh21 bits64 aSig; 3464 1.1 bjh21 3465 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3466 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3467 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3468 1.1 bjh21 if ( aExp == 0x7FFF ) { 3469 1.1 bjh21 if ( (bits64) ( aSig<<1 ) ) { 3470 1.1 bjh21 return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 3471 1.1 bjh21 } 3472 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 ); 3473 1.1 bjh21 } 3474 1.1 bjh21 shift64RightJamming( aSig, 33, &aSig ); 3475 1.1 bjh21 if ( aExp || aSig ) aExp -= 0x3F81; 3476 1.1 bjh21 return roundAndPackFloat32( aSign, aExp, aSig ); 3477 1.1 bjh21 3478 1.1 bjh21 } 3479 1.1 bjh21 3480 1.1 bjh21 /* 3481 1.1 bjh21 ------------------------------------------------------------------------------- 3482 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3483 1.1 bjh21 point value `a' to the double-precision floating-point format. The 3484 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3485 1.1 bjh21 Floating-Point Arithmetic. 3486 1.1 bjh21 ------------------------------------------------------------------------------- 3487 1.1 bjh21 */ 3488 1.1 bjh21 float64 floatx80_to_float64( floatx80 a ) 3489 1.1 bjh21 { 3490 1.1 bjh21 flag aSign; 3491 1.1 bjh21 int32 aExp; 3492 1.1 bjh21 bits64 aSig, zSig; 3493 1.1 bjh21 3494 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3495 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3496 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3497 1.1 bjh21 if ( aExp == 0x7FFF ) { 3498 1.1 bjh21 if ( (bits64) ( aSig<<1 ) ) { 3499 1.1 bjh21 return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 3500 1.1 bjh21 } 3501 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0 ); 3502 1.1 bjh21 } 3503 1.1 bjh21 shift64RightJamming( aSig, 1, &zSig ); 3504 1.1 bjh21 if ( aExp || aSig ) aExp -= 0x3C01; 3505 1.1 bjh21 return roundAndPackFloat64( aSign, aExp, zSig ); 3506 1.1 bjh21 3507 1.1 bjh21 } 3508 1.1 bjh21 3509 1.1 bjh21 #ifdef FLOAT128 3510 1.1 bjh21 3511 1.1 bjh21 /* 3512 1.1 bjh21 ------------------------------------------------------------------------------- 3513 1.1 bjh21 Returns the result of converting the extended double-precision floating- 3514 1.1 bjh21 point value `a' to the quadruple-precision floating-point format. The 3515 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 3516 1.1 bjh21 Floating-Point Arithmetic. 3517 1.1 bjh21 ------------------------------------------------------------------------------- 3518 1.1 bjh21 */ 3519 1.1 bjh21 float128 floatx80_to_float128( floatx80 a ) 3520 1.1 bjh21 { 3521 1.1 bjh21 flag aSign; 3522 1.1 bjh21 int16 aExp; 3523 1.1 bjh21 bits64 aSig, zSig0, zSig1; 3524 1.1 bjh21 3525 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3526 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3527 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3528 1.1 bjh21 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { 3529 1.1 bjh21 return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); 3530 1.1 bjh21 } 3531 1.1 bjh21 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 3532 1.1 bjh21 return packFloat128( aSign, aExp, zSig0, zSig1 ); 3533 1.1 bjh21 3534 1.1 bjh21 } 3535 1.1 bjh21 3536 1.1 bjh21 #endif 3537 1.1 bjh21 3538 1.1 bjh21 /* 3539 1.1 bjh21 ------------------------------------------------------------------------------- 3540 1.1 bjh21 Rounds the extended double-precision floating-point value `a' to an integer, 3541 1.1 bjh21 and returns the result as an extended quadruple-precision floating-point 3542 1.1 bjh21 value. The operation is performed according to the IEC/IEEE Standard for 3543 1.1 bjh21 Binary Floating-Point Arithmetic. 3544 1.1 bjh21 ------------------------------------------------------------------------------- 3545 1.1 bjh21 */ 3546 1.1 bjh21 floatx80 floatx80_round_to_int( floatx80 a ) 3547 1.1 bjh21 { 3548 1.1 bjh21 flag aSign; 3549 1.1 bjh21 int32 aExp; 3550 1.1 bjh21 bits64 lastBitMask, roundBitsMask; 3551 1.1 bjh21 int8 roundingMode; 3552 1.1 bjh21 floatx80 z; 3553 1.1 bjh21 3554 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3555 1.1 bjh21 if ( 0x403E <= aExp ) { 3556 1.1 bjh21 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { 3557 1.1 bjh21 return propagateFloatx80NaN( a, a ); 3558 1.1 bjh21 } 3559 1.1 bjh21 return a; 3560 1.1 bjh21 } 3561 1.1 bjh21 if ( aExp < 0x3FFF ) { 3562 1.1 bjh21 if ( ( aExp == 0 ) 3563 1.1 bjh21 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 3564 1.1 bjh21 return a; 3565 1.1 bjh21 } 3566 1.12 matt set_float_exception_inexact_flag(); 3567 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3568 1.1 bjh21 switch ( float_rounding_mode ) { 3569 1.1 bjh21 case float_round_nearest_even: 3570 1.1 bjh21 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 3571 1.1 bjh21 ) { 3572 1.1 bjh21 return 3573 1.1 bjh21 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3574 1.1 bjh21 } 3575 1.1 bjh21 break; 3576 1.1 bjh21 case float_round_to_zero: 3577 1.1 bjh21 break; 3578 1.1 bjh21 case float_round_down: 3579 1.1 bjh21 return 3580 1.1 bjh21 aSign ? 3581 1.1 bjh21 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 3582 1.1 bjh21 : packFloatx80( 0, 0, 0 ); 3583 1.1 bjh21 case float_round_up: 3584 1.1 bjh21 return 3585 1.1 bjh21 aSign ? packFloatx80( 1, 0, 0 ) 3586 1.1 bjh21 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3587 1.1 bjh21 } 3588 1.1 bjh21 return packFloatx80( aSign, 0, 0 ); 3589 1.1 bjh21 } 3590 1.1 bjh21 lastBitMask = 1; 3591 1.1 bjh21 lastBitMask <<= 0x403E - aExp; 3592 1.1 bjh21 roundBitsMask = lastBitMask - 1; 3593 1.1 bjh21 z = a; 3594 1.1 bjh21 roundingMode = float_rounding_mode; 3595 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 3596 1.1 bjh21 z.low += lastBitMask>>1; 3597 1.1 bjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 3598 1.1 bjh21 } 3599 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 3600 1.1 bjh21 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 3601 1.1 bjh21 z.low += roundBitsMask; 3602 1.1 bjh21 } 3603 1.1 bjh21 } 3604 1.1 bjh21 z.low &= ~ roundBitsMask; 3605 1.1 bjh21 if ( z.low == 0 ) { 3606 1.1 bjh21 ++z.high; 3607 1.1 bjh21 z.low = LIT64( 0x8000000000000000 ); 3608 1.1 bjh21 } 3609 1.16 nat z.high <<= X80SHIFT; 3610 1.12 matt if ( z.low != a.low ) set_float_exception_inexact_flag(); 3611 1.1 bjh21 return z; 3612 1.1 bjh21 3613 1.1 bjh21 } 3614 1.1 bjh21 3615 1.1 bjh21 /* 3616 1.1 bjh21 ------------------------------------------------------------------------------- 3617 1.1 bjh21 Returns the result of adding the absolute values of the extended double- 3618 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the sum is 3619 1.1 bjh21 negated before being returned. `zSign' is ignored if the result is a NaN. 3620 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 3621 1.1 bjh21 Floating-Point Arithmetic. 3622 1.1 bjh21 ------------------------------------------------------------------------------- 3623 1.1 bjh21 */ 3624 1.1 bjh21 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3625 1.1 bjh21 { 3626 1.1 bjh21 int32 aExp, bExp, zExp; 3627 1.1 bjh21 bits64 aSig, bSig, zSig0, zSig1; 3628 1.1 bjh21 int32 expDiff; 3629 1.1 bjh21 3630 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3631 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3632 1.1 bjh21 bSig = extractFloatx80Frac( b ); 3633 1.1 bjh21 bExp = extractFloatx80Exp( b ); 3634 1.1 bjh21 expDiff = aExp - bExp; 3635 1.1 bjh21 if ( 0 < expDiff ) { 3636 1.1 bjh21 if ( aExp == 0x7FFF ) { 3637 1.1 bjh21 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3638 1.1 bjh21 return a; 3639 1.1 bjh21 } 3640 1.1 bjh21 if ( bExp == 0 ) --expDiff; 3641 1.1 bjh21 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3642 1.1 bjh21 zExp = aExp; 3643 1.1 bjh21 } 3644 1.1 bjh21 else if ( expDiff < 0 ) { 3645 1.1 bjh21 if ( bExp == 0x7FFF ) { 3646 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3647 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3648 1.1 bjh21 } 3649 1.1 bjh21 if ( aExp == 0 ) ++expDiff; 3650 1.1 bjh21 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3651 1.1 bjh21 zExp = bExp; 3652 1.1 bjh21 } 3653 1.1 bjh21 else { 3654 1.1 bjh21 if ( aExp == 0x7FFF ) { 3655 1.1 bjh21 if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3656 1.1 bjh21 return propagateFloatx80NaN( a, b ); 3657 1.1 bjh21 } 3658 1.1 bjh21 return a; 3659 1.1 bjh21 } 3660 1.1 bjh21 zSig1 = 0; 3661 1.1 bjh21 zSig0 = aSig + bSig; 3662 1.1 bjh21 if ( aExp == 0 ) { 3663 1.1 bjh21 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 3664 1.1 bjh21 goto roundAndPack; 3665 1.1 bjh21 } 3666 1.1 bjh21 zExp = aExp; 3667 1.1 bjh21 goto shiftRight1; 3668 1.1 bjh21 } 3669 1.1 bjh21 zSig0 = aSig + bSig; 3670 1.1 bjh21 if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 3671 1.1 bjh21 shiftRight1: 3672 1.1 bjh21 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3673 1.1 bjh21 zSig0 |= LIT64( 0x8000000000000000 ); 3674 1.1 bjh21 ++zExp; 3675 1.1 bjh21 roundAndPack: 3676 1.1 bjh21 return 3677 1.1 bjh21 roundAndPackFloatx80( 3678 1.1 bjh21 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3679 1.1 bjh21 3680 1.1 bjh21 } 3681 1.1 bjh21 3682 1.1 bjh21 /* 3683 1.1 bjh21 ------------------------------------------------------------------------------- 3684 1.1 bjh21 Returns the result of subtracting the absolute values of the extended 3685 1.1 bjh21 double-precision floating-point values `a' and `b'. If `zSign' is 1, the 3686 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 3687 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 3688 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 3689 1.1 bjh21 ------------------------------------------------------------------------------- 3690 1.1 bjh21 */ 3691 1.1 bjh21 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3692 1.1 bjh21 { 3693 1.1 bjh21 int32 aExp, bExp, zExp; 3694 1.1 bjh21 bits64 aSig, bSig, zSig0, zSig1; 3695 1.1 bjh21 int32 expDiff; 3696 1.1 bjh21 floatx80 z; 3697 1.1 bjh21 3698 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3699 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3700 1.1 bjh21 bSig = extractFloatx80Frac( b ); 3701 1.1 bjh21 bExp = extractFloatx80Exp( b ); 3702 1.1 bjh21 expDiff = aExp - bExp; 3703 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 3704 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 3705 1.1 bjh21 if ( aExp == 0x7FFF ) { 3706 1.1 bjh21 if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3707 1.1 bjh21 return propagateFloatx80NaN( a, b ); 3708 1.1 bjh21 } 3709 1.1 bjh21 float_raise( float_flag_invalid ); 3710 1.1 bjh21 z.low = floatx80_default_nan_low; 3711 1.16 nat z.high = floatx80_default_nan_high<<X80SHIFT; 3712 1.1 bjh21 return z; 3713 1.1 bjh21 } 3714 1.1 bjh21 if ( aExp == 0 ) { 3715 1.1 bjh21 aExp = 1; 3716 1.1 bjh21 bExp = 1; 3717 1.1 bjh21 } 3718 1.1 bjh21 zSig1 = 0; 3719 1.1 bjh21 if ( bSig < aSig ) goto aBigger; 3720 1.1 bjh21 if ( aSig < bSig ) goto bBigger; 3721 1.1 bjh21 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 3722 1.1 bjh21 bExpBigger: 3723 1.1 bjh21 if ( bExp == 0x7FFF ) { 3724 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3725 1.1 bjh21 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3726 1.1 bjh21 } 3727 1.1 bjh21 if ( aExp == 0 ) ++expDiff; 3728 1.1 bjh21 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3729 1.1 bjh21 bBigger: 3730 1.1 bjh21 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 3731 1.1 bjh21 zExp = bExp; 3732 1.1 bjh21 zSign ^= 1; 3733 1.1 bjh21 goto normalizeRoundAndPack; 3734 1.1 bjh21 aExpBigger: 3735 1.1 bjh21 if ( aExp == 0x7FFF ) { 3736 1.1 bjh21 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3737 1.1 bjh21 return a; 3738 1.1 bjh21 } 3739 1.1 bjh21 if ( bExp == 0 ) --expDiff; 3740 1.1 bjh21 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3741 1.1 bjh21 aBigger: 3742 1.1 bjh21 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 3743 1.1 bjh21 zExp = aExp; 3744 1.1 bjh21 normalizeRoundAndPack: 3745 1.1 bjh21 return 3746 1.1 bjh21 normalizeRoundAndPackFloatx80( 3747 1.1 bjh21 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3748 1.1 bjh21 3749 1.1 bjh21 } 3750 1.1 bjh21 3751 1.1 bjh21 /* 3752 1.1 bjh21 ------------------------------------------------------------------------------- 3753 1.1 bjh21 Returns the result of adding the extended double-precision floating-point 3754 1.1 bjh21 values `a' and `b'. The operation is performed according to the IEC/IEEE 3755 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 3756 1.1 bjh21 ------------------------------------------------------------------------------- 3757 1.1 bjh21 */ 3758 1.1 bjh21 floatx80 floatx80_add( floatx80 a, floatx80 b ) 3759 1.1 bjh21 { 3760 1.1 bjh21 flag aSign, bSign; 3761 1.1 bjh21 3762 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3763 1.1 bjh21 bSign = extractFloatx80Sign( b ); 3764 1.1 bjh21 if ( aSign == bSign ) { 3765 1.1 bjh21 return addFloatx80Sigs( a, b, aSign ); 3766 1.1 bjh21 } 3767 1.1 bjh21 else { 3768 1.1 bjh21 return subFloatx80Sigs( a, b, aSign ); 3769 1.1 bjh21 } 3770 1.1 bjh21 3771 1.1 bjh21 } 3772 1.1 bjh21 3773 1.1 bjh21 /* 3774 1.1 bjh21 ------------------------------------------------------------------------------- 3775 1.1 bjh21 Returns the result of subtracting the extended double-precision floating- 3776 1.1 bjh21 point values `a' and `b'. The operation is performed according to the 3777 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3778 1.1 bjh21 ------------------------------------------------------------------------------- 3779 1.1 bjh21 */ 3780 1.1 bjh21 floatx80 floatx80_sub( floatx80 a, floatx80 b ) 3781 1.1 bjh21 { 3782 1.1 bjh21 flag aSign, bSign; 3783 1.1 bjh21 3784 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3785 1.1 bjh21 bSign = extractFloatx80Sign( b ); 3786 1.1 bjh21 if ( aSign == bSign ) { 3787 1.1 bjh21 return subFloatx80Sigs( a, b, aSign ); 3788 1.1 bjh21 } 3789 1.1 bjh21 else { 3790 1.1 bjh21 return addFloatx80Sigs( a, b, aSign ); 3791 1.1 bjh21 } 3792 1.1 bjh21 3793 1.1 bjh21 } 3794 1.1 bjh21 3795 1.1 bjh21 /* 3796 1.1 bjh21 ------------------------------------------------------------------------------- 3797 1.1 bjh21 Returns the result of multiplying the extended double-precision floating- 3798 1.1 bjh21 point values `a' and `b'. The operation is performed according to the 3799 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3800 1.1 bjh21 ------------------------------------------------------------------------------- 3801 1.1 bjh21 */ 3802 1.1 bjh21 floatx80 floatx80_mul( floatx80 a, floatx80 b ) 3803 1.1 bjh21 { 3804 1.1 bjh21 flag aSign, bSign, zSign; 3805 1.1 bjh21 int32 aExp, bExp, zExp; 3806 1.1 bjh21 bits64 aSig, bSig, zSig0, zSig1; 3807 1.1 bjh21 floatx80 z; 3808 1.1 bjh21 3809 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3810 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3811 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3812 1.1 bjh21 bSig = extractFloatx80Frac( b ); 3813 1.1 bjh21 bExp = extractFloatx80Exp( b ); 3814 1.1 bjh21 bSign = extractFloatx80Sign( b ); 3815 1.1 bjh21 zSign = aSign ^ bSign; 3816 1.1 bjh21 if ( aExp == 0x7FFF ) { 3817 1.1 bjh21 if ( (bits64) ( aSig<<1 ) 3818 1.1 bjh21 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3819 1.1 bjh21 return propagateFloatx80NaN( a, b ); 3820 1.1 bjh21 } 3821 1.1 bjh21 if ( ( bExp | bSig ) == 0 ) goto invalid; 3822 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3823 1.1 bjh21 } 3824 1.1 bjh21 if ( bExp == 0x7FFF ) { 3825 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3826 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 3827 1.1 bjh21 invalid: 3828 1.1 bjh21 float_raise( float_flag_invalid ); 3829 1.1 bjh21 z.low = floatx80_default_nan_low; 3830 1.16 nat z.high = floatx80_default_nan_high<<X80SHIFT; 3831 1.1 bjh21 return z; 3832 1.1 bjh21 } 3833 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3834 1.1 bjh21 } 3835 1.1 bjh21 if ( aExp == 0 ) { 3836 1.1 bjh21 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3837 1.1 bjh21 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3838 1.1 bjh21 } 3839 1.1 bjh21 if ( bExp == 0 ) { 3840 1.1 bjh21 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3841 1.1 bjh21 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3842 1.1 bjh21 } 3843 1.1 bjh21 zExp = aExp + bExp - 0x3FFE; 3844 1.1 bjh21 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 3845 1.1 bjh21 if ( 0 < (sbits64) zSig0 ) { 3846 1.1 bjh21 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3847 1.1 bjh21 --zExp; 3848 1.1 bjh21 } 3849 1.1 bjh21 return 3850 1.1 bjh21 roundAndPackFloatx80( 3851 1.1 bjh21 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3852 1.1 bjh21 3853 1.1 bjh21 } 3854 1.1 bjh21 3855 1.1 bjh21 /* 3856 1.1 bjh21 ------------------------------------------------------------------------------- 3857 1.1 bjh21 Returns the result of dividing the extended double-precision floating-point 3858 1.1 bjh21 value `a' by the corresponding value `b'. The operation is performed 3859 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3860 1.1 bjh21 ------------------------------------------------------------------------------- 3861 1.1 bjh21 */ 3862 1.1 bjh21 floatx80 floatx80_div( floatx80 a, floatx80 b ) 3863 1.1 bjh21 { 3864 1.1 bjh21 flag aSign, bSign, zSign; 3865 1.1 bjh21 int32 aExp, bExp, zExp; 3866 1.1 bjh21 bits64 aSig, bSig, zSig0, zSig1; 3867 1.1 bjh21 bits64 rem0, rem1, rem2, term0, term1, term2; 3868 1.1 bjh21 floatx80 z; 3869 1.1 bjh21 3870 1.1 bjh21 aSig = extractFloatx80Frac( a ); 3871 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3872 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3873 1.1 bjh21 bSig = extractFloatx80Frac( b ); 3874 1.1 bjh21 bExp = extractFloatx80Exp( b ); 3875 1.1 bjh21 bSign = extractFloatx80Sign( b ); 3876 1.1 bjh21 zSign = aSign ^ bSign; 3877 1.1 bjh21 if ( aExp == 0x7FFF ) { 3878 1.1 bjh21 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3879 1.1 bjh21 if ( bExp == 0x7FFF ) { 3880 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3881 1.1 bjh21 goto invalid; 3882 1.1 bjh21 } 3883 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3884 1.1 bjh21 } 3885 1.1 bjh21 if ( bExp == 0x7FFF ) { 3886 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3887 1.1 bjh21 return packFloatx80( zSign, 0, 0 ); 3888 1.1 bjh21 } 3889 1.1 bjh21 if ( bExp == 0 ) { 3890 1.1 bjh21 if ( bSig == 0 ) { 3891 1.1 bjh21 if ( ( aExp | aSig ) == 0 ) { 3892 1.1 bjh21 invalid: 3893 1.1 bjh21 float_raise( float_flag_invalid ); 3894 1.1 bjh21 z.low = floatx80_default_nan_low; 3895 1.16 nat z.high = floatx80_default_nan_high<<X80SHIFT; 3896 1.1 bjh21 return z; 3897 1.1 bjh21 } 3898 1.1 bjh21 float_raise( float_flag_divbyzero ); 3899 1.1 bjh21 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3900 1.1 bjh21 } 3901 1.1 bjh21 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3902 1.1 bjh21 } 3903 1.1 bjh21 if ( aExp == 0 ) { 3904 1.1 bjh21 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3905 1.1 bjh21 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3906 1.1 bjh21 } 3907 1.1 bjh21 zExp = aExp - bExp + 0x3FFE; 3908 1.1 bjh21 rem1 = 0; 3909 1.1 bjh21 if ( bSig <= aSig ) { 3910 1.1 bjh21 shift128Right( aSig, 0, 1, &aSig, &rem1 ); 3911 1.1 bjh21 ++zExp; 3912 1.1 bjh21 } 3913 1.1 bjh21 zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 3914 1.1 bjh21 mul64To128( bSig, zSig0, &term0, &term1 ); 3915 1.1 bjh21 sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 3916 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 3917 1.1 bjh21 --zSig0; 3918 1.1 bjh21 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 3919 1.1 bjh21 } 3920 1.1 bjh21 zSig1 = estimateDiv128To64( rem1, 0, bSig ); 3921 1.1 bjh21 if ( (bits64) ( zSig1<<1 ) <= 8 ) { 3922 1.1 bjh21 mul64To128( bSig, zSig1, &term1, &term2 ); 3923 1.1 bjh21 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 3924 1.1 bjh21 while ( (sbits64) rem1 < 0 ) { 3925 1.1 bjh21 --zSig1; 3926 1.1 bjh21 add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 3927 1.1 bjh21 } 3928 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 ) != 0 ); 3929 1.1 bjh21 } 3930 1.1 bjh21 return 3931 1.1 bjh21 roundAndPackFloatx80( 3932 1.1 bjh21 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3933 1.1 bjh21 3934 1.1 bjh21 } 3935 1.1 bjh21 3936 1.1 bjh21 /* 3937 1.1 bjh21 ------------------------------------------------------------------------------- 3938 1.1 bjh21 Returns the remainder of the extended double-precision floating-point value 3939 1.1 bjh21 `a' with respect to the corresponding value `b'. The operation is performed 3940 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3941 1.1 bjh21 ------------------------------------------------------------------------------- 3942 1.1 bjh21 */ 3943 1.1 bjh21 floatx80 floatx80_rem( floatx80 a, floatx80 b ) 3944 1.1 bjh21 { 3945 1.14 martin flag aSign, zSign; 3946 1.1 bjh21 int32 aExp, bExp, expDiff; 3947 1.1 bjh21 bits64 aSig0, aSig1, bSig; 3948 1.1 bjh21 bits64 q, term0, term1, alternateASig0, alternateASig1; 3949 1.1 bjh21 floatx80 z; 3950 1.1 bjh21 3951 1.1 bjh21 aSig0 = extractFloatx80Frac( a ); 3952 1.1 bjh21 aExp = extractFloatx80Exp( a ); 3953 1.1 bjh21 aSign = extractFloatx80Sign( a ); 3954 1.1 bjh21 bSig = extractFloatx80Frac( b ); 3955 1.1 bjh21 bExp = extractFloatx80Exp( b ); 3956 1.14 martin 3957 1.1 bjh21 if ( aExp == 0x7FFF ) { 3958 1.1 bjh21 if ( (bits64) ( aSig0<<1 ) 3959 1.1 bjh21 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3960 1.1 bjh21 return propagateFloatx80NaN( a, b ); 3961 1.1 bjh21 } 3962 1.1 bjh21 goto invalid; 3963 1.1 bjh21 } 3964 1.1 bjh21 if ( bExp == 0x7FFF ) { 3965 1.1 bjh21 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3966 1.1 bjh21 return a; 3967 1.1 bjh21 } 3968 1.1 bjh21 if ( bExp == 0 ) { 3969 1.1 bjh21 if ( bSig == 0 ) { 3970 1.1 bjh21 invalid: 3971 1.1 bjh21 float_raise( float_flag_invalid ); 3972 1.1 bjh21 z.low = floatx80_default_nan_low; 3973 1.16 nat z.high = floatx80_default_nan_high<<X80SHIFT; 3974 1.1 bjh21 return z; 3975 1.1 bjh21 } 3976 1.1 bjh21 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3977 1.1 bjh21 } 3978 1.1 bjh21 if ( aExp == 0 ) { 3979 1.1 bjh21 if ( (bits64) ( aSig0<<1 ) == 0 ) return a; 3980 1.1 bjh21 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 3981 1.1 bjh21 } 3982 1.1 bjh21 bSig |= LIT64( 0x8000000000000000 ); 3983 1.1 bjh21 zSign = aSign; 3984 1.1 bjh21 expDiff = aExp - bExp; 3985 1.1 bjh21 aSig1 = 0; 3986 1.1 bjh21 if ( expDiff < 0 ) { 3987 1.1 bjh21 if ( expDiff < -1 ) return a; 3988 1.1 bjh21 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 3989 1.1 bjh21 expDiff = 0; 3990 1.1 bjh21 } 3991 1.1 bjh21 q = ( bSig <= aSig0 ); 3992 1.1 bjh21 if ( q ) aSig0 -= bSig; 3993 1.1 bjh21 expDiff -= 64; 3994 1.1 bjh21 while ( 0 < expDiff ) { 3995 1.1 bjh21 q = estimateDiv128To64( aSig0, aSig1, bSig ); 3996 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 3997 1.1 bjh21 mul64To128( bSig, q, &term0, &term1 ); 3998 1.1 bjh21 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3999 1.1 bjh21 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 4000 1.1 bjh21 expDiff -= 62; 4001 1.1 bjh21 } 4002 1.1 bjh21 expDiff += 64; 4003 1.1 bjh21 if ( 0 < expDiff ) { 4004 1.1 bjh21 q = estimateDiv128To64( aSig0, aSig1, bSig ); 4005 1.1 bjh21 q = ( 2 < q ) ? q - 2 : 0; 4006 1.1 bjh21 q >>= 64 - expDiff; 4007 1.1 bjh21 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 4008 1.1 bjh21 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4009 1.1 bjh21 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 4010 1.1 bjh21 while ( le128( term0, term1, aSig0, aSig1 ) ) { 4011 1.1 bjh21 ++q; 4012 1.1 bjh21 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4013 1.1 bjh21 } 4014 1.1 bjh21 } 4015 1.1 bjh21 else { 4016 1.1 bjh21 term1 = 0; 4017 1.1 bjh21 term0 = bSig; 4018 1.1 bjh21 } 4019 1.1 bjh21 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 4020 1.1 bjh21 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4021 1.1 bjh21 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4022 1.1 bjh21 && ( q & 1 ) ) 4023 1.1 bjh21 ) { 4024 1.1 bjh21 aSig0 = alternateASig0; 4025 1.1 bjh21 aSig1 = alternateASig1; 4026 1.1 bjh21 zSign = ! zSign; 4027 1.1 bjh21 } 4028 1.1 bjh21 return 4029 1.1 bjh21 normalizeRoundAndPackFloatx80( 4030 1.1 bjh21 80, zSign, bExp + expDiff, aSig0, aSig1 ); 4031 1.1 bjh21 4032 1.1 bjh21 } 4033 1.1 bjh21 4034 1.1 bjh21 /* 4035 1.1 bjh21 ------------------------------------------------------------------------------- 4036 1.1 bjh21 Returns the square root of the extended double-precision floating-point 4037 1.1 bjh21 value `a'. The operation is performed according to the IEC/IEEE Standard 4038 1.1 bjh21 for Binary Floating-Point Arithmetic. 4039 1.1 bjh21 ------------------------------------------------------------------------------- 4040 1.1 bjh21 */ 4041 1.1 bjh21 floatx80 floatx80_sqrt( floatx80 a ) 4042 1.1 bjh21 { 4043 1.1 bjh21 flag aSign; 4044 1.1 bjh21 int32 aExp, zExp; 4045 1.1 bjh21 bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; 4046 1.1 bjh21 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 4047 1.1 bjh21 floatx80 z; 4048 1.1 bjh21 4049 1.1 bjh21 aSig0 = extractFloatx80Frac( a ); 4050 1.1 bjh21 aExp = extractFloatx80Exp( a ); 4051 1.1 bjh21 aSign = extractFloatx80Sign( a ); 4052 1.1 bjh21 if ( aExp == 0x7FFF ) { 4053 1.1 bjh21 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 4054 1.1 bjh21 if ( ! aSign ) return a; 4055 1.1 bjh21 goto invalid; 4056 1.1 bjh21 } 4057 1.1 bjh21 if ( aSign ) { 4058 1.1 bjh21 if ( ( aExp | aSig0 ) == 0 ) return a; 4059 1.1 bjh21 invalid: 4060 1.1 bjh21 float_raise( float_flag_invalid ); 4061 1.1 bjh21 z.low = floatx80_default_nan_low; 4062 1.16 nat z.high = floatx80_default_nan_high<<X80SHIFT; 4063 1.1 bjh21 return z; 4064 1.1 bjh21 } 4065 1.1 bjh21 if ( aExp == 0 ) { 4066 1.1 bjh21 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 4067 1.1 bjh21 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 4068 1.1 bjh21 } 4069 1.1 bjh21 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 4070 1.1 bjh21 zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 4071 1.1 bjh21 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 4072 1.1 bjh21 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 4073 1.1 bjh21 doubleZSig0 = zSig0<<1; 4074 1.1 bjh21 mul64To128( zSig0, zSig0, &term0, &term1 ); 4075 1.1 bjh21 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 4076 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 4077 1.1 bjh21 --zSig0; 4078 1.1 bjh21 doubleZSig0 -= 2; 4079 1.1 bjh21 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 4080 1.1 bjh21 } 4081 1.1 bjh21 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 4082 1.1 bjh21 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { 4083 1.1 bjh21 if ( zSig1 == 0 ) zSig1 = 1; 4084 1.1 bjh21 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 4085 1.1 bjh21 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 4086 1.1 bjh21 mul64To128( zSig1, zSig1, &term2, &term3 ); 4087 1.1 bjh21 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 4088 1.1 bjh21 while ( (sbits64) rem1 < 0 ) { 4089 1.1 bjh21 --zSig1; 4090 1.1 bjh21 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 4091 1.1 bjh21 term3 |= 1; 4092 1.1 bjh21 term2 |= doubleZSig0; 4093 1.1 bjh21 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 4094 1.1 bjh21 } 4095 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 4096 1.1 bjh21 } 4097 1.1 bjh21 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 4098 1.1 bjh21 zSig0 |= doubleZSig0; 4099 1.1 bjh21 return 4100 1.1 bjh21 roundAndPackFloatx80( 4101 1.1 bjh21 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 4102 1.1 bjh21 4103 1.1 bjh21 } 4104 1.1 bjh21 4105 1.1 bjh21 /* 4106 1.1 bjh21 ------------------------------------------------------------------------------- 4107 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is 4108 1.1 bjh21 equal to the corresponding value `b', and 0 otherwise. The comparison is 4109 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 4110 1.1 bjh21 Arithmetic. 4111 1.1 bjh21 ------------------------------------------------------------------------------- 4112 1.1 bjh21 */ 4113 1.1 bjh21 flag floatx80_eq( floatx80 a, floatx80 b ) 4114 1.1 bjh21 { 4115 1.1 bjh21 4116 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4117 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4118 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4119 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4120 1.1 bjh21 ) { 4121 1.1 bjh21 if ( floatx80_is_signaling_nan( a ) 4122 1.1 bjh21 || floatx80_is_signaling_nan( b ) ) { 4123 1.1 bjh21 float_raise( float_flag_invalid ); 4124 1.1 bjh21 } 4125 1.1 bjh21 return 0; 4126 1.1 bjh21 } 4127 1.1 bjh21 return 4128 1.1 bjh21 ( a.low == b.low ) 4129 1.1 bjh21 && ( ( a.high == b.high ) 4130 1.1 bjh21 || ( ( a.low == 0 ) 4131 1.16 nat && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 ) 4132 1.16 nat == 0 ) ) ); 4133 1.1 bjh21 4134 1.1 bjh21 } 4135 1.1 bjh21 4136 1.1 bjh21 /* 4137 1.1 bjh21 ------------------------------------------------------------------------------- 4138 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is 4139 1.1 bjh21 less than or equal to the corresponding value `b', and 0 otherwise. The 4140 1.1 bjh21 comparison is performed according to the IEC/IEEE Standard for Binary 4141 1.1 bjh21 Floating-Point Arithmetic. 4142 1.1 bjh21 ------------------------------------------------------------------------------- 4143 1.1 bjh21 */ 4144 1.1 bjh21 flag floatx80_le( floatx80 a, floatx80 b ) 4145 1.1 bjh21 { 4146 1.1 bjh21 flag aSign, bSign; 4147 1.1 bjh21 4148 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4149 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4150 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4151 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4152 1.1 bjh21 ) { 4153 1.1 bjh21 float_raise( float_flag_invalid ); 4154 1.1 bjh21 return 0; 4155 1.1 bjh21 } 4156 1.1 bjh21 aSign = extractFloatx80Sign( a ); 4157 1.1 bjh21 bSign = extractFloatx80Sign( b ); 4158 1.1 bjh21 if ( aSign != bSign ) { 4159 1.1 bjh21 return 4160 1.1 bjh21 aSign 4161 1.16 nat || ( ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT) 4162 1.16 nat <<1 ) ) | a.low | b.low ) == 0 ); 4163 1.1 bjh21 } 4164 1.1 bjh21 return 4165 1.16 nat aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low ) 4166 1.16 nat : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low ); 4167 1.1 bjh21 4168 1.1 bjh21 } 4169 1.1 bjh21 4170 1.1 bjh21 /* 4171 1.1 bjh21 ------------------------------------------------------------------------------- 4172 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is 4173 1.1 bjh21 less than the corresponding value `b', and 0 otherwise. The comparison 4174 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4175 1.1 bjh21 Arithmetic. 4176 1.1 bjh21 ------------------------------------------------------------------------------- 4177 1.1 bjh21 */ 4178 1.1 bjh21 flag floatx80_lt( floatx80 a, floatx80 b ) 4179 1.1 bjh21 { 4180 1.1 bjh21 flag aSign, bSign; 4181 1.1 bjh21 4182 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4183 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4184 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4185 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4186 1.1 bjh21 ) { 4187 1.1 bjh21 float_raise( float_flag_invalid ); 4188 1.1 bjh21 return 0; 4189 1.1 bjh21 } 4190 1.1 bjh21 aSign = extractFloatx80Sign( a ); 4191 1.1 bjh21 bSign = extractFloatx80Sign( b ); 4192 1.1 bjh21 if ( aSign != bSign ) { 4193 1.1 bjh21 return 4194 1.1 bjh21 aSign 4195 1.16 nat && ( ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT) 4196 1.16 nat <<1 ) ) | a.low | b.low ) != 0 ); 4197 1.1 bjh21 } 4198 1.1 bjh21 return 4199 1.16 nat aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low ) 4200 1.16 nat : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low ); 4201 1.1 bjh21 4202 1.1 bjh21 } 4203 1.1 bjh21 4204 1.1 bjh21 /* 4205 1.1 bjh21 ------------------------------------------------------------------------------- 4206 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is equal 4207 1.1 bjh21 to the corresponding value `b', and 0 otherwise. The invalid exception is 4208 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 4209 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4210 1.1 bjh21 ------------------------------------------------------------------------------- 4211 1.1 bjh21 */ 4212 1.1 bjh21 flag floatx80_eq_signaling( floatx80 a, floatx80 b ) 4213 1.1 bjh21 { 4214 1.1 bjh21 4215 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4216 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4217 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4218 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4219 1.1 bjh21 ) { 4220 1.1 bjh21 float_raise( float_flag_invalid ); 4221 1.1 bjh21 return 0; 4222 1.1 bjh21 } 4223 1.1 bjh21 return 4224 1.1 bjh21 ( a.low == b.low ) 4225 1.16 nat && ( ( (a.high>>X80SHIFT) == (b.high>>X80SHIFT) ) 4226 1.1 bjh21 || ( ( a.low == 0 ) 4227 1.16 nat && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 ) 4228 1.16 nat == 0 ) ) ); 4229 1.1 bjh21 4230 1.1 bjh21 } 4231 1.1 bjh21 4232 1.1 bjh21 /* 4233 1.1 bjh21 ------------------------------------------------------------------------------- 4234 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is less 4235 1.1 bjh21 than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 4236 1.1 bjh21 do not cause an exception. Otherwise, the comparison is performed according 4237 1.1 bjh21 to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4238 1.1 bjh21 ------------------------------------------------------------------------------- 4239 1.1 bjh21 */ 4240 1.1 bjh21 flag floatx80_le_quiet( floatx80 a, floatx80 b ) 4241 1.1 bjh21 { 4242 1.1 bjh21 flag aSign, bSign; 4243 1.1 bjh21 4244 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4245 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4246 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4247 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4248 1.1 bjh21 ) { 4249 1.1 bjh21 if ( floatx80_is_signaling_nan( a ) 4250 1.1 bjh21 || floatx80_is_signaling_nan( b ) ) { 4251 1.1 bjh21 float_raise( float_flag_invalid ); 4252 1.1 bjh21 } 4253 1.1 bjh21 return 0; 4254 1.1 bjh21 } 4255 1.1 bjh21 aSign = extractFloatx80Sign( a ); 4256 1.1 bjh21 bSign = extractFloatx80Sign( b ); 4257 1.1 bjh21 if ( aSign != bSign ) { 4258 1.1 bjh21 return 4259 1.1 bjh21 aSign 4260 1.16 nat || ( ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 4261 1.16 nat ) ) | a.low | b.low ) == 0 ); 4262 1.1 bjh21 } 4263 1.1 bjh21 return 4264 1.16 nat aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low ) 4265 1.16 nat : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low ); 4266 1.1 bjh21 4267 1.1 bjh21 } 4268 1.1 bjh21 4269 1.1 bjh21 /* 4270 1.1 bjh21 ------------------------------------------------------------------------------- 4271 1.1 bjh21 Returns 1 if the extended double-precision floating-point value `a' is less 4272 1.1 bjh21 than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 4273 1.1 bjh21 an exception. Otherwise, the comparison is performed according to the 4274 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4275 1.1 bjh21 ------------------------------------------------------------------------------- 4276 1.1 bjh21 */ 4277 1.1 bjh21 flag floatx80_lt_quiet( floatx80 a, floatx80 b ) 4278 1.1 bjh21 { 4279 1.1 bjh21 flag aSign, bSign; 4280 1.1 bjh21 4281 1.1 bjh21 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4282 1.1 bjh21 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4283 1.1 bjh21 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4284 1.1 bjh21 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4285 1.1 bjh21 ) { 4286 1.1 bjh21 if ( floatx80_is_signaling_nan( a ) 4287 1.1 bjh21 || floatx80_is_signaling_nan( b ) ) { 4288 1.1 bjh21 float_raise( float_flag_invalid ); 4289 1.1 bjh21 } 4290 1.1 bjh21 return 0; 4291 1.1 bjh21 } 4292 1.1 bjh21 aSign = extractFloatx80Sign( a ); 4293 1.1 bjh21 bSign = extractFloatx80Sign( b ); 4294 1.1 bjh21 if ( aSign != bSign ) { 4295 1.1 bjh21 return 4296 1.1 bjh21 aSign 4297 1.16 nat && ( ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 ) 4298 1.16 nat ) | a.low | b.low ) != 0 ); 4299 1.1 bjh21 } 4300 1.1 bjh21 return 4301 1.16 nat aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low ) 4302 1.16 nat : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low ); 4303 1.1 bjh21 4304 1.1 bjh21 } 4305 1.1 bjh21 4306 1.1 bjh21 #endif 4307 1.1 bjh21 4308 1.1 bjh21 #ifdef FLOAT128 4309 1.1 bjh21 4310 1.1 bjh21 /* 4311 1.1 bjh21 ------------------------------------------------------------------------------- 4312 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4313 1.1 bjh21 value `a' to the 32-bit two's complement integer format. The conversion 4314 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4315 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 4316 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 4317 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 4318 1.1 bjh21 largest integer with the same sign as `a' is returned. 4319 1.1 bjh21 ------------------------------------------------------------------------------- 4320 1.1 bjh21 */ 4321 1.1 bjh21 int32 float128_to_int32( float128 a ) 4322 1.1 bjh21 { 4323 1.1 bjh21 flag aSign; 4324 1.1 bjh21 int32 aExp, shiftCount; 4325 1.1 bjh21 bits64 aSig0, aSig1; 4326 1.1 bjh21 4327 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4328 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4329 1.1 bjh21 aExp = extractFloat128Exp( a ); 4330 1.1 bjh21 aSign = extractFloat128Sign( a ); 4331 1.1 bjh21 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; 4332 1.1 bjh21 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4333 1.1 bjh21 aSig0 |= ( aSig1 != 0 ); 4334 1.1 bjh21 shiftCount = 0x4028 - aExp; 4335 1.1 bjh21 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); 4336 1.1 bjh21 return roundAndPackInt32( aSign, aSig0 ); 4337 1.1 bjh21 4338 1.1 bjh21 } 4339 1.1 bjh21 4340 1.1 bjh21 /* 4341 1.1 bjh21 ------------------------------------------------------------------------------- 4342 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4343 1.1 bjh21 value `a' to the 32-bit two's complement integer format. The conversion 4344 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4345 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. If 4346 1.1 bjh21 `a' is a NaN, the largest positive integer is returned. Otherwise, if the 4347 1.1 bjh21 conversion overflows, the largest integer with the same sign as `a' is 4348 1.1 bjh21 returned. 4349 1.1 bjh21 ------------------------------------------------------------------------------- 4350 1.1 bjh21 */ 4351 1.1 bjh21 int32 float128_to_int32_round_to_zero( float128 a ) 4352 1.1 bjh21 { 4353 1.1 bjh21 flag aSign; 4354 1.1 bjh21 int32 aExp, shiftCount; 4355 1.1 bjh21 bits64 aSig0, aSig1, savedASig; 4356 1.1 bjh21 int32 z; 4357 1.1 bjh21 4358 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4359 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4360 1.1 bjh21 aExp = extractFloat128Exp( a ); 4361 1.1 bjh21 aSign = extractFloat128Sign( a ); 4362 1.1 bjh21 aSig0 |= ( aSig1 != 0 ); 4363 1.1 bjh21 if ( 0x401E < aExp ) { 4364 1.1 bjh21 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; 4365 1.1 bjh21 goto invalid; 4366 1.1 bjh21 } 4367 1.1 bjh21 else if ( aExp < 0x3FFF ) { 4368 1.12 matt if ( aExp || aSig0 ) set_float_exception_inexact_flag(); 4369 1.1 bjh21 return 0; 4370 1.1 bjh21 } 4371 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 4372 1.1 bjh21 shiftCount = 0x402F - aExp; 4373 1.1 bjh21 savedASig = aSig0; 4374 1.1 bjh21 aSig0 >>= shiftCount; 4375 1.10 christos z = (int32)aSig0; 4376 1.1 bjh21 if ( aSign ) z = - z; 4377 1.1 bjh21 if ( ( z < 0 ) ^ aSign ) { 4378 1.1 bjh21 invalid: 4379 1.1 bjh21 float_raise( float_flag_invalid ); 4380 1.1 bjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 4381 1.1 bjh21 } 4382 1.1 bjh21 if ( ( aSig0<<shiftCount ) != savedASig ) { 4383 1.12 matt set_float_exception_inexact_flag(); 4384 1.1 bjh21 } 4385 1.1 bjh21 return z; 4386 1.1 bjh21 4387 1.1 bjh21 } 4388 1.1 bjh21 4389 1.1 bjh21 /* 4390 1.1 bjh21 ------------------------------------------------------------------------------- 4391 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4392 1.1 bjh21 value `a' to the 64-bit two's complement integer format. The conversion 4393 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4394 1.1 bjh21 Arithmetic---which means in particular that the conversion is rounded 4395 1.1 bjh21 according to the current rounding mode. If `a' is a NaN, the largest 4396 1.1 bjh21 positive integer is returned. Otherwise, if the conversion overflows, the 4397 1.1 bjh21 largest integer with the same sign as `a' is returned. 4398 1.1 bjh21 ------------------------------------------------------------------------------- 4399 1.1 bjh21 */ 4400 1.1 bjh21 int64 float128_to_int64( float128 a ) 4401 1.1 bjh21 { 4402 1.1 bjh21 flag aSign; 4403 1.1 bjh21 int32 aExp, shiftCount; 4404 1.1 bjh21 bits64 aSig0, aSig1; 4405 1.1 bjh21 4406 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4407 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4408 1.1 bjh21 aExp = extractFloat128Exp( a ); 4409 1.1 bjh21 aSign = extractFloat128Sign( a ); 4410 1.1 bjh21 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4411 1.1 bjh21 shiftCount = 0x402F - aExp; 4412 1.1 bjh21 if ( shiftCount <= 0 ) { 4413 1.1 bjh21 if ( 0x403E < aExp ) { 4414 1.1 bjh21 float_raise( float_flag_invalid ); 4415 1.1 bjh21 if ( ! aSign 4416 1.1 bjh21 || ( ( aExp == 0x7FFF ) 4417 1.1 bjh21 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) 4418 1.1 bjh21 ) 4419 1.1 bjh21 ) { 4420 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 4421 1.1 bjh21 } 4422 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 4423 1.1 bjh21 } 4424 1.1 bjh21 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); 4425 1.1 bjh21 } 4426 1.1 bjh21 else { 4427 1.1 bjh21 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); 4428 1.1 bjh21 } 4429 1.1 bjh21 return roundAndPackInt64( aSign, aSig0, aSig1 ); 4430 1.1 bjh21 4431 1.1 bjh21 } 4432 1.1 bjh21 4433 1.1 bjh21 /* 4434 1.1 bjh21 ------------------------------------------------------------------------------- 4435 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4436 1.1 bjh21 value `a' to the 64-bit two's complement integer format. The conversion 4437 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4438 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. 4439 1.1 bjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 4440 1.1 bjh21 the conversion overflows, the largest integer with the same sign as `a' is 4441 1.1 bjh21 returned. 4442 1.1 bjh21 ------------------------------------------------------------------------------- 4443 1.1 bjh21 */ 4444 1.1 bjh21 int64 float128_to_int64_round_to_zero( float128 a ) 4445 1.1 bjh21 { 4446 1.1 bjh21 flag aSign; 4447 1.1 bjh21 int32 aExp, shiftCount; 4448 1.1 bjh21 bits64 aSig0, aSig1; 4449 1.1 bjh21 int64 z; 4450 1.1 bjh21 4451 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4452 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4453 1.1 bjh21 aExp = extractFloat128Exp( a ); 4454 1.1 bjh21 aSign = extractFloat128Sign( a ); 4455 1.1 bjh21 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4456 1.1 bjh21 shiftCount = aExp - 0x402F; 4457 1.1 bjh21 if ( 0 < shiftCount ) { 4458 1.1 bjh21 if ( 0x403E <= aExp ) { 4459 1.1 bjh21 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4460 1.1 bjh21 if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4461 1.1 bjh21 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4462 1.12 matt if ( aSig1 ) set_float_exception_inexact_flag(); 4463 1.1 bjh21 } 4464 1.1 bjh21 else { 4465 1.1 bjh21 float_raise( float_flag_invalid ); 4466 1.1 bjh21 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { 4467 1.1 bjh21 return LIT64( 0x7FFFFFFFFFFFFFFF ); 4468 1.1 bjh21 } 4469 1.1 bjh21 } 4470 1.1 bjh21 return (sbits64) LIT64( 0x8000000000000000 ); 4471 1.1 bjh21 } 4472 1.1 bjh21 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4473 1.1 bjh21 if ( (bits64) ( aSig1<<shiftCount ) ) { 4474 1.12 matt set_float_exception_inexact_flag(); 4475 1.1 bjh21 } 4476 1.1 bjh21 } 4477 1.1 bjh21 else { 4478 1.1 bjh21 if ( aExp < 0x3FFF ) { 4479 1.1 bjh21 if ( aExp | aSig0 | aSig1 ) { 4480 1.12 matt set_float_exception_inexact_flag(); 4481 1.1 bjh21 } 4482 1.1 bjh21 return 0; 4483 1.1 bjh21 } 4484 1.1 bjh21 z = aSig0>>( - shiftCount ); 4485 1.1 bjh21 if ( aSig1 4486 1.1 bjh21 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4487 1.12 matt set_float_exception_inexact_flag(); 4488 1.1 bjh21 } 4489 1.1 bjh21 } 4490 1.1 bjh21 if ( aSign ) z = - z; 4491 1.1 bjh21 return z; 4492 1.1 bjh21 4493 1.1 bjh21 } 4494 1.1 bjh21 4495 1.8 matt #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \ 4496 1.8 matt && defined(SOFTFLOAT_NEED_FIXUNS) 4497 1.1 bjh21 /* 4498 1.4 martin * just like above - but do not care for overflow of signed results 4499 1.4 martin */ 4500 1.4 martin uint64 float128_to_uint64_round_to_zero( float128 a ) 4501 1.4 martin { 4502 1.4 martin flag aSign; 4503 1.4 martin int32 aExp, shiftCount; 4504 1.4 martin bits64 aSig0, aSig1; 4505 1.4 martin uint64 z; 4506 1.4 martin 4507 1.4 martin aSig1 = extractFloat128Frac1( a ); 4508 1.4 martin aSig0 = extractFloat128Frac0( a ); 4509 1.4 martin aExp = extractFloat128Exp( a ); 4510 1.4 martin aSign = extractFloat128Sign( a ); 4511 1.4 martin if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4512 1.4 martin shiftCount = aExp - 0x402F; 4513 1.4 martin if ( 0 < shiftCount ) { 4514 1.4 martin if ( 0x403F <= aExp ) { 4515 1.4 martin aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4516 1.4 martin if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4517 1.4 martin && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4518 1.12 matt if ( aSig1 ) set_float_exception_inexact_flag(); 4519 1.4 martin } 4520 1.4 martin else { 4521 1.4 martin float_raise( float_flag_invalid ); 4522 1.4 martin } 4523 1.5 martin return LIT64( 0xFFFFFFFFFFFFFFFF ); 4524 1.4 martin } 4525 1.4 martin z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4526 1.4 martin if ( (bits64) ( aSig1<<shiftCount ) ) { 4527 1.12 matt set_float_exception_inexact_flag(); 4528 1.4 martin } 4529 1.4 martin } 4530 1.4 martin else { 4531 1.4 martin if ( aExp < 0x3FFF ) { 4532 1.4 martin if ( aExp | aSig0 | aSig1 ) { 4533 1.12 matt set_float_exception_inexact_flag(); 4534 1.4 martin } 4535 1.4 martin return 0; 4536 1.4 martin } 4537 1.4 martin z = aSig0>>( - shiftCount ); 4538 1.4 martin if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4539 1.12 matt set_float_exception_inexact_flag(); 4540 1.4 martin } 4541 1.4 martin } 4542 1.4 martin if ( aSign ) z = - z; 4543 1.4 martin return z; 4544 1.4 martin 4545 1.4 martin } 4546 1.8 matt #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */ 4547 1.4 martin 4548 1.4 martin /* 4549 1.1 bjh21 ------------------------------------------------------------------------------- 4550 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4551 1.1 bjh21 value `a' to the single-precision floating-point format. The conversion 4552 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4553 1.1 bjh21 Arithmetic. 4554 1.1 bjh21 ------------------------------------------------------------------------------- 4555 1.1 bjh21 */ 4556 1.1 bjh21 float32 float128_to_float32( float128 a ) 4557 1.1 bjh21 { 4558 1.1 bjh21 flag aSign; 4559 1.1 bjh21 int32 aExp; 4560 1.1 bjh21 bits64 aSig0, aSig1; 4561 1.1 bjh21 bits32 zSig; 4562 1.1 bjh21 4563 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4564 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4565 1.1 bjh21 aExp = extractFloat128Exp( a ); 4566 1.1 bjh21 aSign = extractFloat128Sign( a ); 4567 1.1 bjh21 if ( aExp == 0x7FFF ) { 4568 1.1 bjh21 if ( aSig0 | aSig1 ) { 4569 1.1 bjh21 return commonNaNToFloat32( float128ToCommonNaN( a ) ); 4570 1.1 bjh21 } 4571 1.1 bjh21 return packFloat32( aSign, 0xFF, 0 ); 4572 1.1 bjh21 } 4573 1.1 bjh21 aSig0 |= ( aSig1 != 0 ); 4574 1.1 bjh21 shift64RightJamming( aSig0, 18, &aSig0 ); 4575 1.10 christos zSig = (bits32)aSig0; 4576 1.1 bjh21 if ( aExp || zSig ) { 4577 1.1 bjh21 zSig |= 0x40000000; 4578 1.1 bjh21 aExp -= 0x3F81; 4579 1.1 bjh21 } 4580 1.1 bjh21 return roundAndPackFloat32( aSign, aExp, zSig ); 4581 1.1 bjh21 4582 1.1 bjh21 } 4583 1.1 bjh21 4584 1.1 bjh21 /* 4585 1.1 bjh21 ------------------------------------------------------------------------------- 4586 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4587 1.1 bjh21 value `a' to the double-precision floating-point format. The conversion 4588 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 4589 1.1 bjh21 Arithmetic. 4590 1.1 bjh21 ------------------------------------------------------------------------------- 4591 1.1 bjh21 */ 4592 1.1 bjh21 float64 float128_to_float64( float128 a ) 4593 1.1 bjh21 { 4594 1.1 bjh21 flag aSign; 4595 1.1 bjh21 int32 aExp; 4596 1.1 bjh21 bits64 aSig0, aSig1; 4597 1.1 bjh21 4598 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4599 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4600 1.1 bjh21 aExp = extractFloat128Exp( a ); 4601 1.1 bjh21 aSign = extractFloat128Sign( a ); 4602 1.1 bjh21 if ( aExp == 0x7FFF ) { 4603 1.1 bjh21 if ( aSig0 | aSig1 ) { 4604 1.1 bjh21 return commonNaNToFloat64( float128ToCommonNaN( a ) ); 4605 1.1 bjh21 } 4606 1.1 bjh21 return packFloat64( aSign, 0x7FF, 0 ); 4607 1.1 bjh21 } 4608 1.1 bjh21 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4609 1.1 bjh21 aSig0 |= ( aSig1 != 0 ); 4610 1.1 bjh21 if ( aExp || aSig0 ) { 4611 1.1 bjh21 aSig0 |= LIT64( 0x4000000000000000 ); 4612 1.1 bjh21 aExp -= 0x3C01; 4613 1.1 bjh21 } 4614 1.1 bjh21 return roundAndPackFloat64( aSign, aExp, aSig0 ); 4615 1.1 bjh21 4616 1.1 bjh21 } 4617 1.1 bjh21 4618 1.1 bjh21 #ifdef FLOATX80 4619 1.1 bjh21 4620 1.1 bjh21 /* 4621 1.1 bjh21 ------------------------------------------------------------------------------- 4622 1.1 bjh21 Returns the result of converting the quadruple-precision floating-point 4623 1.1 bjh21 value `a' to the extended double-precision floating-point format. The 4624 1.1 bjh21 conversion is performed according to the IEC/IEEE Standard for Binary 4625 1.1 bjh21 Floating-Point Arithmetic. 4626 1.1 bjh21 ------------------------------------------------------------------------------- 4627 1.1 bjh21 */ 4628 1.1 bjh21 floatx80 float128_to_floatx80( float128 a ) 4629 1.1 bjh21 { 4630 1.1 bjh21 flag aSign; 4631 1.1 bjh21 int32 aExp; 4632 1.1 bjh21 bits64 aSig0, aSig1; 4633 1.1 bjh21 4634 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4635 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4636 1.1 bjh21 aExp = extractFloat128Exp( a ); 4637 1.1 bjh21 aSign = extractFloat128Sign( a ); 4638 1.1 bjh21 if ( aExp == 0x7FFF ) { 4639 1.1 bjh21 if ( aSig0 | aSig1 ) { 4640 1.1 bjh21 return commonNaNToFloatx80( float128ToCommonNaN( a ) ); 4641 1.1 bjh21 } 4642 1.1 bjh21 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 4643 1.1 bjh21 } 4644 1.1 bjh21 if ( aExp == 0 ) { 4645 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 4646 1.1 bjh21 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4647 1.1 bjh21 } 4648 1.1 bjh21 else { 4649 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 4650 1.1 bjh21 } 4651 1.1 bjh21 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 4652 1.1 bjh21 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); 4653 1.1 bjh21 4654 1.1 bjh21 } 4655 1.1 bjh21 4656 1.1 bjh21 #endif 4657 1.1 bjh21 4658 1.1 bjh21 /* 4659 1.1 bjh21 ------------------------------------------------------------------------------- 4660 1.1 bjh21 Rounds the quadruple-precision floating-point value `a' to an integer, and 4661 1.1 bjh21 returns the result as a quadruple-precision floating-point value. The 4662 1.1 bjh21 operation is performed according to the IEC/IEEE Standard for Binary 4663 1.1 bjh21 Floating-Point Arithmetic. 4664 1.1 bjh21 ------------------------------------------------------------------------------- 4665 1.1 bjh21 */ 4666 1.1 bjh21 float128 float128_round_to_int( float128 a ) 4667 1.1 bjh21 { 4668 1.1 bjh21 flag aSign; 4669 1.1 bjh21 int32 aExp; 4670 1.1 bjh21 bits64 lastBitMask, roundBitsMask; 4671 1.1 bjh21 int8 roundingMode; 4672 1.1 bjh21 float128 z; 4673 1.1 bjh21 4674 1.1 bjh21 aExp = extractFloat128Exp( a ); 4675 1.1 bjh21 if ( 0x402F <= aExp ) { 4676 1.1 bjh21 if ( 0x406F <= aExp ) { 4677 1.1 bjh21 if ( ( aExp == 0x7FFF ) 4678 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) 4679 1.1 bjh21 ) { 4680 1.1 bjh21 return propagateFloat128NaN( a, a ); 4681 1.1 bjh21 } 4682 1.1 bjh21 return a; 4683 1.1 bjh21 } 4684 1.1 bjh21 lastBitMask = 1; 4685 1.1 bjh21 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; 4686 1.1 bjh21 roundBitsMask = lastBitMask - 1; 4687 1.1 bjh21 z = a; 4688 1.1 bjh21 roundingMode = float_rounding_mode; 4689 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 4690 1.1 bjh21 if ( lastBitMask ) { 4691 1.1 bjh21 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 4692 1.1 bjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 4693 1.1 bjh21 } 4694 1.1 bjh21 else { 4695 1.1 bjh21 if ( (sbits64) z.low < 0 ) { 4696 1.1 bjh21 ++z.high; 4697 1.1 bjh21 if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; 4698 1.1 bjh21 } 4699 1.1 bjh21 } 4700 1.1 bjh21 } 4701 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 4702 1.1 bjh21 if ( extractFloat128Sign( z ) 4703 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) { 4704 1.1 bjh21 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 4705 1.1 bjh21 } 4706 1.1 bjh21 } 4707 1.1 bjh21 z.low &= ~ roundBitsMask; 4708 1.1 bjh21 } 4709 1.1 bjh21 else { 4710 1.1 bjh21 if ( aExp < 0x3FFF ) { 4711 1.1 bjh21 if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 4712 1.12 matt set_float_exception_inexact_flag(); 4713 1.1 bjh21 aSign = extractFloat128Sign( a ); 4714 1.1 bjh21 switch ( float_rounding_mode ) { 4715 1.1 bjh21 case float_round_nearest_even: 4716 1.1 bjh21 if ( ( aExp == 0x3FFE ) 4717 1.1 bjh21 && ( extractFloat128Frac0( a ) 4718 1.1 bjh21 | extractFloat128Frac1( a ) ) 4719 1.1 bjh21 ) { 4720 1.1 bjh21 return packFloat128( aSign, 0x3FFF, 0, 0 ); 4721 1.1 bjh21 } 4722 1.1 bjh21 break; 4723 1.1 bjh21 case float_round_to_zero: 4724 1.1 bjh21 break; 4725 1.1 bjh21 case float_round_down: 4726 1.1 bjh21 return 4727 1.1 bjh21 aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) 4728 1.1 bjh21 : packFloat128( 0, 0, 0, 0 ); 4729 1.1 bjh21 case float_round_up: 4730 1.1 bjh21 return 4731 1.1 bjh21 aSign ? packFloat128( 1, 0, 0, 0 ) 4732 1.1 bjh21 : packFloat128( 0, 0x3FFF, 0, 0 ); 4733 1.1 bjh21 } 4734 1.1 bjh21 return packFloat128( aSign, 0, 0, 0 ); 4735 1.1 bjh21 } 4736 1.1 bjh21 lastBitMask = 1; 4737 1.1 bjh21 lastBitMask <<= 0x402F - aExp; 4738 1.1 bjh21 roundBitsMask = lastBitMask - 1; 4739 1.1 bjh21 z.low = 0; 4740 1.1 bjh21 z.high = a.high; 4741 1.1 bjh21 roundingMode = float_rounding_mode; 4742 1.1 bjh21 if ( roundingMode == float_round_nearest_even ) { 4743 1.1 bjh21 z.high += lastBitMask>>1; 4744 1.1 bjh21 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 4745 1.1 bjh21 z.high &= ~ lastBitMask; 4746 1.1 bjh21 } 4747 1.1 bjh21 } 4748 1.1 bjh21 else if ( roundingMode != float_round_to_zero ) { 4749 1.1 bjh21 if ( extractFloat128Sign( z ) 4750 1.1 bjh21 ^ ( roundingMode == float_round_up ) ) { 4751 1.1 bjh21 z.high |= ( a.low != 0 ); 4752 1.1 bjh21 z.high += roundBitsMask; 4753 1.1 bjh21 } 4754 1.1 bjh21 } 4755 1.1 bjh21 z.high &= ~ roundBitsMask; 4756 1.1 bjh21 } 4757 1.1 bjh21 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 4758 1.12 matt set_float_exception_inexact_flag(); 4759 1.1 bjh21 } 4760 1.1 bjh21 return z; 4761 1.1 bjh21 4762 1.1 bjh21 } 4763 1.1 bjh21 4764 1.1 bjh21 /* 4765 1.1 bjh21 ------------------------------------------------------------------------------- 4766 1.1 bjh21 Returns the result of adding the absolute values of the quadruple-precision 4767 1.1 bjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 4768 1.1 bjh21 before being returned. `zSign' is ignored if the result is a NaN. 4769 1.1 bjh21 The addition is performed according to the IEC/IEEE Standard for Binary 4770 1.1 bjh21 Floating-Point Arithmetic. 4771 1.1 bjh21 ------------------------------------------------------------------------------- 4772 1.1 bjh21 */ 4773 1.1 bjh21 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) 4774 1.1 bjh21 { 4775 1.1 bjh21 int32 aExp, bExp, zExp; 4776 1.1 bjh21 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 4777 1.1 bjh21 int32 expDiff; 4778 1.1 bjh21 4779 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4780 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4781 1.1 bjh21 aExp = extractFloat128Exp( a ); 4782 1.1 bjh21 bSig1 = extractFloat128Frac1( b ); 4783 1.1 bjh21 bSig0 = extractFloat128Frac0( b ); 4784 1.1 bjh21 bExp = extractFloat128Exp( b ); 4785 1.1 bjh21 expDiff = aExp - bExp; 4786 1.1 bjh21 if ( 0 < expDiff ) { 4787 1.1 bjh21 if ( aExp == 0x7FFF ) { 4788 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4789 1.1 bjh21 return a; 4790 1.1 bjh21 } 4791 1.1 bjh21 if ( bExp == 0 ) { 4792 1.1 bjh21 --expDiff; 4793 1.1 bjh21 } 4794 1.1 bjh21 else { 4795 1.1 bjh21 bSig0 |= LIT64( 0x0001000000000000 ); 4796 1.1 bjh21 } 4797 1.1 bjh21 shift128ExtraRightJamming( 4798 1.1 bjh21 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 4799 1.1 bjh21 zExp = aExp; 4800 1.1 bjh21 } 4801 1.1 bjh21 else if ( expDiff < 0 ) { 4802 1.1 bjh21 if ( bExp == 0x7FFF ) { 4803 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4804 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 4805 1.1 bjh21 } 4806 1.1 bjh21 if ( aExp == 0 ) { 4807 1.1 bjh21 ++expDiff; 4808 1.1 bjh21 } 4809 1.1 bjh21 else { 4810 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 4811 1.1 bjh21 } 4812 1.1 bjh21 shift128ExtraRightJamming( 4813 1.1 bjh21 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 4814 1.1 bjh21 zExp = bExp; 4815 1.1 bjh21 } 4816 1.1 bjh21 else { 4817 1.1 bjh21 if ( aExp == 0x7FFF ) { 4818 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4819 1.1 bjh21 return propagateFloat128NaN( a, b ); 4820 1.1 bjh21 } 4821 1.1 bjh21 return a; 4822 1.1 bjh21 } 4823 1.1 bjh21 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4824 1.1 bjh21 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); 4825 1.1 bjh21 zSig2 = 0; 4826 1.1 bjh21 zSig0 |= LIT64( 0x0002000000000000 ); 4827 1.1 bjh21 zExp = aExp; 4828 1.1 bjh21 goto shiftRight1; 4829 1.1 bjh21 } 4830 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 4831 1.1 bjh21 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4832 1.1 bjh21 --zExp; 4833 1.1 bjh21 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; 4834 1.1 bjh21 ++zExp; 4835 1.1 bjh21 shiftRight1: 4836 1.1 bjh21 shift128ExtraRightJamming( 4837 1.1 bjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 4838 1.1 bjh21 roundAndPack: 4839 1.1 bjh21 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 4840 1.1 bjh21 4841 1.1 bjh21 } 4842 1.1 bjh21 4843 1.1 bjh21 /* 4844 1.1 bjh21 ------------------------------------------------------------------------------- 4845 1.1 bjh21 Returns the result of subtracting the absolute values of the quadruple- 4846 1.1 bjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 4847 1.1 bjh21 difference is negated before being returned. `zSign' is ignored if the 4848 1.1 bjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 4849 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 4850 1.1 bjh21 ------------------------------------------------------------------------------- 4851 1.1 bjh21 */ 4852 1.1 bjh21 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) 4853 1.1 bjh21 { 4854 1.1 bjh21 int32 aExp, bExp, zExp; 4855 1.1 bjh21 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 4856 1.1 bjh21 int32 expDiff; 4857 1.1 bjh21 float128 z; 4858 1.1 bjh21 4859 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4860 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4861 1.1 bjh21 aExp = extractFloat128Exp( a ); 4862 1.1 bjh21 bSig1 = extractFloat128Frac1( b ); 4863 1.1 bjh21 bSig0 = extractFloat128Frac0( b ); 4864 1.1 bjh21 bExp = extractFloat128Exp( b ); 4865 1.1 bjh21 expDiff = aExp - bExp; 4866 1.1 bjh21 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4867 1.1 bjh21 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); 4868 1.1 bjh21 if ( 0 < expDiff ) goto aExpBigger; 4869 1.1 bjh21 if ( expDiff < 0 ) goto bExpBigger; 4870 1.1 bjh21 if ( aExp == 0x7FFF ) { 4871 1.1 bjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4872 1.1 bjh21 return propagateFloat128NaN( a, b ); 4873 1.1 bjh21 } 4874 1.1 bjh21 float_raise( float_flag_invalid ); 4875 1.1 bjh21 z.low = float128_default_nan_low; 4876 1.1 bjh21 z.high = float128_default_nan_high; 4877 1.1 bjh21 return z; 4878 1.1 bjh21 } 4879 1.1 bjh21 if ( aExp == 0 ) { 4880 1.1 bjh21 aExp = 1; 4881 1.1 bjh21 bExp = 1; 4882 1.1 bjh21 } 4883 1.1 bjh21 if ( bSig0 < aSig0 ) goto aBigger; 4884 1.1 bjh21 if ( aSig0 < bSig0 ) goto bBigger; 4885 1.1 bjh21 if ( bSig1 < aSig1 ) goto aBigger; 4886 1.1 bjh21 if ( aSig1 < bSig1 ) goto bBigger; 4887 1.1 bjh21 return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); 4888 1.1 bjh21 bExpBigger: 4889 1.1 bjh21 if ( bExp == 0x7FFF ) { 4890 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4891 1.1 bjh21 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); 4892 1.1 bjh21 } 4893 1.1 bjh21 if ( aExp == 0 ) { 4894 1.1 bjh21 ++expDiff; 4895 1.1 bjh21 } 4896 1.1 bjh21 else { 4897 1.1 bjh21 aSig0 |= LIT64( 0x4000000000000000 ); 4898 1.1 bjh21 } 4899 1.1 bjh21 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 4900 1.1 bjh21 bSig0 |= LIT64( 0x4000000000000000 ); 4901 1.1 bjh21 bBigger: 4902 1.1 bjh21 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 4903 1.1 bjh21 zExp = bExp; 4904 1.1 bjh21 zSign ^= 1; 4905 1.1 bjh21 goto normalizeRoundAndPack; 4906 1.1 bjh21 aExpBigger: 4907 1.1 bjh21 if ( aExp == 0x7FFF ) { 4908 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4909 1.1 bjh21 return a; 4910 1.1 bjh21 } 4911 1.1 bjh21 if ( bExp == 0 ) { 4912 1.1 bjh21 --expDiff; 4913 1.1 bjh21 } 4914 1.1 bjh21 else { 4915 1.1 bjh21 bSig0 |= LIT64( 0x4000000000000000 ); 4916 1.1 bjh21 } 4917 1.1 bjh21 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 4918 1.1 bjh21 aSig0 |= LIT64( 0x4000000000000000 ); 4919 1.1 bjh21 aBigger: 4920 1.1 bjh21 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4921 1.1 bjh21 zExp = aExp; 4922 1.1 bjh21 normalizeRoundAndPack: 4923 1.1 bjh21 --zExp; 4924 1.1 bjh21 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); 4925 1.1 bjh21 4926 1.1 bjh21 } 4927 1.1 bjh21 4928 1.1 bjh21 /* 4929 1.1 bjh21 ------------------------------------------------------------------------------- 4930 1.1 bjh21 Returns the result of adding the quadruple-precision floating-point values 4931 1.1 bjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 4932 1.1 bjh21 for Binary Floating-Point Arithmetic. 4933 1.1 bjh21 ------------------------------------------------------------------------------- 4934 1.1 bjh21 */ 4935 1.1 bjh21 float128 float128_add( float128 a, float128 b ) 4936 1.1 bjh21 { 4937 1.1 bjh21 flag aSign, bSign; 4938 1.1 bjh21 4939 1.1 bjh21 aSign = extractFloat128Sign( a ); 4940 1.1 bjh21 bSign = extractFloat128Sign( b ); 4941 1.1 bjh21 if ( aSign == bSign ) { 4942 1.1 bjh21 return addFloat128Sigs( a, b, aSign ); 4943 1.1 bjh21 } 4944 1.1 bjh21 else { 4945 1.1 bjh21 return subFloat128Sigs( a, b, aSign ); 4946 1.1 bjh21 } 4947 1.1 bjh21 4948 1.1 bjh21 } 4949 1.1 bjh21 4950 1.1 bjh21 /* 4951 1.1 bjh21 ------------------------------------------------------------------------------- 4952 1.1 bjh21 Returns the result of subtracting the quadruple-precision floating-point 4953 1.1 bjh21 values `a' and `b'. The operation is performed according to the IEC/IEEE 4954 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 4955 1.1 bjh21 ------------------------------------------------------------------------------- 4956 1.1 bjh21 */ 4957 1.1 bjh21 float128 float128_sub( float128 a, float128 b ) 4958 1.1 bjh21 { 4959 1.1 bjh21 flag aSign, bSign; 4960 1.1 bjh21 4961 1.1 bjh21 aSign = extractFloat128Sign( a ); 4962 1.1 bjh21 bSign = extractFloat128Sign( b ); 4963 1.1 bjh21 if ( aSign == bSign ) { 4964 1.1 bjh21 return subFloat128Sigs( a, b, aSign ); 4965 1.1 bjh21 } 4966 1.1 bjh21 else { 4967 1.1 bjh21 return addFloat128Sigs( a, b, aSign ); 4968 1.1 bjh21 } 4969 1.1 bjh21 4970 1.1 bjh21 } 4971 1.1 bjh21 4972 1.1 bjh21 /* 4973 1.1 bjh21 ------------------------------------------------------------------------------- 4974 1.1 bjh21 Returns the result of multiplying the quadruple-precision floating-point 4975 1.1 bjh21 values `a' and `b'. The operation is performed according to the IEC/IEEE 4976 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 4977 1.1 bjh21 ------------------------------------------------------------------------------- 4978 1.1 bjh21 */ 4979 1.1 bjh21 float128 float128_mul( float128 a, float128 b ) 4980 1.1 bjh21 { 4981 1.1 bjh21 flag aSign, bSign, zSign; 4982 1.1 bjh21 int32 aExp, bExp, zExp; 4983 1.1 bjh21 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 4984 1.1 bjh21 float128 z; 4985 1.1 bjh21 4986 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 4987 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 4988 1.1 bjh21 aExp = extractFloat128Exp( a ); 4989 1.1 bjh21 aSign = extractFloat128Sign( a ); 4990 1.1 bjh21 bSig1 = extractFloat128Frac1( b ); 4991 1.1 bjh21 bSig0 = extractFloat128Frac0( b ); 4992 1.1 bjh21 bExp = extractFloat128Exp( b ); 4993 1.1 bjh21 bSign = extractFloat128Sign( b ); 4994 1.1 bjh21 zSign = aSign ^ bSign; 4995 1.1 bjh21 if ( aExp == 0x7FFF ) { 4996 1.1 bjh21 if ( ( aSig0 | aSig1 ) 4997 1.1 bjh21 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 4998 1.1 bjh21 return propagateFloat128NaN( a, b ); 4999 1.1 bjh21 } 5000 1.1 bjh21 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 5001 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 5002 1.1 bjh21 } 5003 1.1 bjh21 if ( bExp == 0x7FFF ) { 5004 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5005 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 5006 1.1 bjh21 invalid: 5007 1.1 bjh21 float_raise( float_flag_invalid ); 5008 1.1 bjh21 z.low = float128_default_nan_low; 5009 1.1 bjh21 z.high = float128_default_nan_high; 5010 1.1 bjh21 return z; 5011 1.1 bjh21 } 5012 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 5013 1.1 bjh21 } 5014 1.1 bjh21 if ( aExp == 0 ) { 5015 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5016 1.1 bjh21 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5017 1.1 bjh21 } 5018 1.1 bjh21 if ( bExp == 0 ) { 5019 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5020 1.1 bjh21 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5021 1.1 bjh21 } 5022 1.1 bjh21 zExp = aExp + bExp - 0x4000; 5023 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 5024 1.1 bjh21 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); 5025 1.1 bjh21 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 5026 1.1 bjh21 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 5027 1.1 bjh21 zSig2 |= ( zSig3 != 0 ); 5028 1.1 bjh21 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { 5029 1.1 bjh21 shift128ExtraRightJamming( 5030 1.1 bjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 5031 1.1 bjh21 ++zExp; 5032 1.1 bjh21 } 5033 1.1 bjh21 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 5034 1.1 bjh21 5035 1.1 bjh21 } 5036 1.1 bjh21 5037 1.1 bjh21 /* 5038 1.1 bjh21 ------------------------------------------------------------------------------- 5039 1.1 bjh21 Returns the result of dividing the quadruple-precision floating-point value 5040 1.1 bjh21 `a' by the corresponding value `b'. The operation is performed according to 5041 1.1 bjh21 the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5042 1.1 bjh21 ------------------------------------------------------------------------------- 5043 1.1 bjh21 */ 5044 1.1 bjh21 float128 float128_div( float128 a, float128 b ) 5045 1.1 bjh21 { 5046 1.1 bjh21 flag aSign, bSign, zSign; 5047 1.1 bjh21 int32 aExp, bExp, zExp; 5048 1.1 bjh21 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 5049 1.1 bjh21 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 5050 1.1 bjh21 float128 z; 5051 1.1 bjh21 5052 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 5053 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 5054 1.1 bjh21 aExp = extractFloat128Exp( a ); 5055 1.1 bjh21 aSign = extractFloat128Sign( a ); 5056 1.1 bjh21 bSig1 = extractFloat128Frac1( b ); 5057 1.1 bjh21 bSig0 = extractFloat128Frac0( b ); 5058 1.1 bjh21 bExp = extractFloat128Exp( b ); 5059 1.1 bjh21 bSign = extractFloat128Sign( b ); 5060 1.1 bjh21 zSign = aSign ^ bSign; 5061 1.1 bjh21 if ( aExp == 0x7FFF ) { 5062 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 5063 1.1 bjh21 if ( bExp == 0x7FFF ) { 5064 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5065 1.1 bjh21 goto invalid; 5066 1.1 bjh21 } 5067 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 5068 1.1 bjh21 } 5069 1.1 bjh21 if ( bExp == 0x7FFF ) { 5070 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5071 1.1 bjh21 return packFloat128( zSign, 0, 0, 0 ); 5072 1.1 bjh21 } 5073 1.1 bjh21 if ( bExp == 0 ) { 5074 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 5075 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 5076 1.1 bjh21 invalid: 5077 1.1 bjh21 float_raise( float_flag_invalid ); 5078 1.1 bjh21 z.low = float128_default_nan_low; 5079 1.1 bjh21 z.high = float128_default_nan_high; 5080 1.1 bjh21 return z; 5081 1.1 bjh21 } 5082 1.1 bjh21 float_raise( float_flag_divbyzero ); 5083 1.1 bjh21 return packFloat128( zSign, 0x7FFF, 0, 0 ); 5084 1.1 bjh21 } 5085 1.1 bjh21 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5086 1.1 bjh21 } 5087 1.1 bjh21 if ( aExp == 0 ) { 5088 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5089 1.1 bjh21 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5090 1.1 bjh21 } 5091 1.1 bjh21 zExp = aExp - bExp + 0x3FFD; 5092 1.1 bjh21 shortShift128Left( 5093 1.1 bjh21 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); 5094 1.1 bjh21 shortShift128Left( 5095 1.1 bjh21 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 5096 1.1 bjh21 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { 5097 1.1 bjh21 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 5098 1.1 bjh21 ++zExp; 5099 1.1 bjh21 } 5100 1.1 bjh21 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5101 1.1 bjh21 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 5102 1.1 bjh21 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 5103 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 5104 1.1 bjh21 --zSig0; 5105 1.1 bjh21 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 5106 1.1 bjh21 } 5107 1.1 bjh21 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); 5108 1.1 bjh21 if ( ( zSig1 & 0x3FFF ) <= 4 ) { 5109 1.1 bjh21 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 5110 1.1 bjh21 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 5111 1.1 bjh21 while ( (sbits64) rem1 < 0 ) { 5112 1.1 bjh21 --zSig1; 5113 1.1 bjh21 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 5114 1.1 bjh21 } 5115 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5116 1.1 bjh21 } 5117 1.1 bjh21 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); 5118 1.1 bjh21 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 5119 1.1 bjh21 5120 1.1 bjh21 } 5121 1.1 bjh21 5122 1.1 bjh21 /* 5123 1.1 bjh21 ------------------------------------------------------------------------------- 5124 1.1 bjh21 Returns the remainder of the quadruple-precision floating-point value `a' 5125 1.1 bjh21 with respect to the corresponding value `b'. The operation is performed 5126 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5127 1.1 bjh21 ------------------------------------------------------------------------------- 5128 1.1 bjh21 */ 5129 1.1 bjh21 float128 float128_rem( float128 a, float128 b ) 5130 1.1 bjh21 { 5131 1.9 martin flag aSign, zSign; 5132 1.1 bjh21 int32 aExp, bExp, expDiff; 5133 1.1 bjh21 bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 5134 1.1 bjh21 bits64 allZero, alternateASig0, alternateASig1, sigMean1; 5135 1.1 bjh21 sbits64 sigMean0; 5136 1.1 bjh21 float128 z; 5137 1.1 bjh21 5138 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 5139 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 5140 1.1 bjh21 aExp = extractFloat128Exp( a ); 5141 1.1 bjh21 aSign = extractFloat128Sign( a ); 5142 1.1 bjh21 bSig1 = extractFloat128Frac1( b ); 5143 1.1 bjh21 bSig0 = extractFloat128Frac0( b ); 5144 1.1 bjh21 bExp = extractFloat128Exp( b ); 5145 1.1 bjh21 if ( aExp == 0x7FFF ) { 5146 1.1 bjh21 if ( ( aSig0 | aSig1 ) 5147 1.1 bjh21 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 5148 1.1 bjh21 return propagateFloat128NaN( a, b ); 5149 1.1 bjh21 } 5150 1.1 bjh21 goto invalid; 5151 1.1 bjh21 } 5152 1.1 bjh21 if ( bExp == 0x7FFF ) { 5153 1.1 bjh21 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5154 1.1 bjh21 return a; 5155 1.1 bjh21 } 5156 1.1 bjh21 if ( bExp == 0 ) { 5157 1.1 bjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 5158 1.1 bjh21 invalid: 5159 1.1 bjh21 float_raise( float_flag_invalid ); 5160 1.1 bjh21 z.low = float128_default_nan_low; 5161 1.1 bjh21 z.high = float128_default_nan_high; 5162 1.1 bjh21 return z; 5163 1.1 bjh21 } 5164 1.1 bjh21 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5165 1.1 bjh21 } 5166 1.1 bjh21 if ( aExp == 0 ) { 5167 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return a; 5168 1.1 bjh21 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5169 1.1 bjh21 } 5170 1.1 bjh21 expDiff = aExp - bExp; 5171 1.1 bjh21 if ( expDiff < -1 ) return a; 5172 1.1 bjh21 shortShift128Left( 5173 1.1 bjh21 aSig0 | LIT64( 0x0001000000000000 ), 5174 1.1 bjh21 aSig1, 5175 1.1 bjh21 15 - ( expDiff < 0 ), 5176 1.1 bjh21 &aSig0, 5177 1.1 bjh21 &aSig1 5178 1.1 bjh21 ); 5179 1.1 bjh21 shortShift128Left( 5180 1.1 bjh21 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 5181 1.1 bjh21 q = le128( bSig0, bSig1, aSig0, aSig1 ); 5182 1.1 bjh21 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5183 1.1 bjh21 expDiff -= 64; 5184 1.1 bjh21 while ( 0 < expDiff ) { 5185 1.1 bjh21 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5186 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0; 5187 1.1 bjh21 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5188 1.1 bjh21 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); 5189 1.1 bjh21 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); 5190 1.1 bjh21 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 5191 1.1 bjh21 expDiff -= 61; 5192 1.1 bjh21 } 5193 1.1 bjh21 if ( -64 < expDiff ) { 5194 1.1 bjh21 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5195 1.1 bjh21 q = ( 4 < q ) ? q - 4 : 0; 5196 1.1 bjh21 q >>= - expDiff; 5197 1.1 bjh21 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5198 1.1 bjh21 expDiff += 52; 5199 1.1 bjh21 if ( expDiff < 0 ) { 5200 1.1 bjh21 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 5201 1.1 bjh21 } 5202 1.1 bjh21 else { 5203 1.1 bjh21 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 5204 1.1 bjh21 } 5205 1.1 bjh21 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5206 1.1 bjh21 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 5207 1.1 bjh21 } 5208 1.1 bjh21 else { 5209 1.1 bjh21 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); 5210 1.1 bjh21 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5211 1.1 bjh21 } 5212 1.1 bjh21 do { 5213 1.1 bjh21 alternateASig0 = aSig0; 5214 1.1 bjh21 alternateASig1 = aSig1; 5215 1.1 bjh21 ++q; 5216 1.1 bjh21 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5217 1.1 bjh21 } while ( 0 <= (sbits64) aSig0 ); 5218 1.1 bjh21 add128( 5219 1.3 mrg aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); 5220 1.1 bjh21 if ( ( sigMean0 < 0 ) 5221 1.1 bjh21 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 5222 1.1 bjh21 aSig0 = alternateASig0; 5223 1.1 bjh21 aSig1 = alternateASig1; 5224 1.1 bjh21 } 5225 1.1 bjh21 zSign = ( (sbits64) aSig0 < 0 ); 5226 1.1 bjh21 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 5227 1.1 bjh21 return 5228 1.1 bjh21 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 5229 1.1 bjh21 5230 1.1 bjh21 } 5231 1.1 bjh21 5232 1.1 bjh21 /* 5233 1.1 bjh21 ------------------------------------------------------------------------------- 5234 1.1 bjh21 Returns the square root of the quadruple-precision floating-point value `a'. 5235 1.1 bjh21 The operation is performed according to the IEC/IEEE Standard for Binary 5236 1.1 bjh21 Floating-Point Arithmetic. 5237 1.1 bjh21 ------------------------------------------------------------------------------- 5238 1.1 bjh21 */ 5239 1.1 bjh21 float128 float128_sqrt( float128 a ) 5240 1.1 bjh21 { 5241 1.1 bjh21 flag aSign; 5242 1.1 bjh21 int32 aExp, zExp; 5243 1.1 bjh21 bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 5244 1.1 bjh21 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 5245 1.1 bjh21 float128 z; 5246 1.1 bjh21 5247 1.1 bjh21 aSig1 = extractFloat128Frac1( a ); 5248 1.1 bjh21 aSig0 = extractFloat128Frac0( a ); 5249 1.1 bjh21 aExp = extractFloat128Exp( a ); 5250 1.1 bjh21 aSign = extractFloat128Sign( a ); 5251 1.1 bjh21 if ( aExp == 0x7FFF ) { 5252 1.1 bjh21 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); 5253 1.1 bjh21 if ( ! aSign ) return a; 5254 1.1 bjh21 goto invalid; 5255 1.1 bjh21 } 5256 1.1 bjh21 if ( aSign ) { 5257 1.1 bjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 5258 1.1 bjh21 invalid: 5259 1.1 bjh21 float_raise( float_flag_invalid ); 5260 1.1 bjh21 z.low = float128_default_nan_low; 5261 1.1 bjh21 z.high = float128_default_nan_high; 5262 1.1 bjh21 return z; 5263 1.1 bjh21 } 5264 1.1 bjh21 if ( aExp == 0 ) { 5265 1.1 bjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); 5266 1.1 bjh21 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5267 1.1 bjh21 } 5268 1.15 christos zExp = (int32) ( (bits32)(aExp - 0x3FFF) >> 1) + 0x3FFE; 5269 1.1 bjh21 aSig0 |= LIT64( 0x0001000000000000 ); 5270 1.10 christos zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17)); 5271 1.1 bjh21 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); 5272 1.1 bjh21 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 5273 1.1 bjh21 doubleZSig0 = zSig0<<1; 5274 1.1 bjh21 mul64To128( zSig0, zSig0, &term0, &term1 ); 5275 1.1 bjh21 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 5276 1.1 bjh21 while ( (sbits64) rem0 < 0 ) { 5277 1.1 bjh21 --zSig0; 5278 1.1 bjh21 doubleZSig0 -= 2; 5279 1.1 bjh21 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 5280 1.1 bjh21 } 5281 1.1 bjh21 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 5282 1.1 bjh21 if ( ( zSig1 & 0x1FFF ) <= 5 ) { 5283 1.1 bjh21 if ( zSig1 == 0 ) zSig1 = 1; 5284 1.1 bjh21 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 5285 1.1 bjh21 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 5286 1.1 bjh21 mul64To128( zSig1, zSig1, &term2, &term3 ); 5287 1.1 bjh21 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 5288 1.1 bjh21 while ( (sbits64) rem1 < 0 ) { 5289 1.1 bjh21 --zSig1; 5290 1.1 bjh21 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 5291 1.1 bjh21 term3 |= 1; 5292 1.1 bjh21 term2 |= doubleZSig0; 5293 1.1 bjh21 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 5294 1.1 bjh21 } 5295 1.1 bjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5296 1.1 bjh21 } 5297 1.1 bjh21 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); 5298 1.1 bjh21 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); 5299 1.1 bjh21 5300 1.1 bjh21 } 5301 1.1 bjh21 5302 1.1 bjh21 /* 5303 1.1 bjh21 ------------------------------------------------------------------------------- 5304 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is equal to 5305 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 5306 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5307 1.1 bjh21 ------------------------------------------------------------------------------- 5308 1.1 bjh21 */ 5309 1.1 bjh21 flag float128_eq( float128 a, float128 b ) 5310 1.1 bjh21 { 5311 1.1 bjh21 5312 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5313 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5314 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5315 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5316 1.1 bjh21 ) { 5317 1.1 bjh21 if ( float128_is_signaling_nan( a ) 5318 1.1 bjh21 || float128_is_signaling_nan( b ) ) { 5319 1.1 bjh21 float_raise( float_flag_invalid ); 5320 1.1 bjh21 } 5321 1.1 bjh21 return 0; 5322 1.1 bjh21 } 5323 1.1 bjh21 return 5324 1.1 bjh21 ( a.low == b.low ) 5325 1.1 bjh21 && ( ( a.high == b.high ) 5326 1.1 bjh21 || ( ( a.low == 0 ) 5327 1.1 bjh21 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5328 1.1 bjh21 ); 5329 1.1 bjh21 5330 1.1 bjh21 } 5331 1.1 bjh21 5332 1.1 bjh21 /* 5333 1.1 bjh21 ------------------------------------------------------------------------------- 5334 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than 5335 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 5336 1.1 bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 5337 1.1 bjh21 Arithmetic. 5338 1.1 bjh21 ------------------------------------------------------------------------------- 5339 1.1 bjh21 */ 5340 1.1 bjh21 flag float128_le( float128 a, float128 b ) 5341 1.1 bjh21 { 5342 1.1 bjh21 flag aSign, bSign; 5343 1.1 bjh21 5344 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5345 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5346 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5347 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5348 1.1 bjh21 ) { 5349 1.1 bjh21 float_raise( float_flag_invalid ); 5350 1.1 bjh21 return 0; 5351 1.1 bjh21 } 5352 1.1 bjh21 aSign = extractFloat128Sign( a ); 5353 1.1 bjh21 bSign = extractFloat128Sign( b ); 5354 1.1 bjh21 if ( aSign != bSign ) { 5355 1.1 bjh21 return 5356 1.1 bjh21 aSign 5357 1.1 bjh21 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5358 1.1 bjh21 == 0 ); 5359 1.1 bjh21 } 5360 1.1 bjh21 return 5361 1.1 bjh21 aSign ? le128( b.high, b.low, a.high, a.low ) 5362 1.1 bjh21 : le128( a.high, a.low, b.high, b.low ); 5363 1.1 bjh21 5364 1.1 bjh21 } 5365 1.1 bjh21 5366 1.1 bjh21 /* 5367 1.1 bjh21 ------------------------------------------------------------------------------- 5368 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than 5369 1.1 bjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 5370 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5371 1.1 bjh21 ------------------------------------------------------------------------------- 5372 1.1 bjh21 */ 5373 1.1 bjh21 flag float128_lt( float128 a, float128 b ) 5374 1.1 bjh21 { 5375 1.1 bjh21 flag aSign, bSign; 5376 1.1 bjh21 5377 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5378 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5379 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5380 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5381 1.1 bjh21 ) { 5382 1.1 bjh21 float_raise( float_flag_invalid ); 5383 1.1 bjh21 return 0; 5384 1.1 bjh21 } 5385 1.1 bjh21 aSign = extractFloat128Sign( a ); 5386 1.1 bjh21 bSign = extractFloat128Sign( b ); 5387 1.1 bjh21 if ( aSign != bSign ) { 5388 1.1 bjh21 return 5389 1.1 bjh21 aSign 5390 1.1 bjh21 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5391 1.1 bjh21 != 0 ); 5392 1.1 bjh21 } 5393 1.1 bjh21 return 5394 1.1 bjh21 aSign ? lt128( b.high, b.low, a.high, a.low ) 5395 1.1 bjh21 : lt128( a.high, a.low, b.high, b.low ); 5396 1.1 bjh21 5397 1.1 bjh21 } 5398 1.1 bjh21 5399 1.1 bjh21 /* 5400 1.1 bjh21 ------------------------------------------------------------------------------- 5401 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is equal to 5402 1.1 bjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 5403 1.1 bjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 5404 1.1 bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5405 1.1 bjh21 ------------------------------------------------------------------------------- 5406 1.1 bjh21 */ 5407 1.1 bjh21 flag float128_eq_signaling( float128 a, float128 b ) 5408 1.1 bjh21 { 5409 1.1 bjh21 5410 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5411 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5412 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5413 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5414 1.1 bjh21 ) { 5415 1.1 bjh21 float_raise( float_flag_invalid ); 5416 1.1 bjh21 return 0; 5417 1.1 bjh21 } 5418 1.1 bjh21 return 5419 1.1 bjh21 ( a.low == b.low ) 5420 1.1 bjh21 && ( ( a.high == b.high ) 5421 1.1 bjh21 || ( ( a.low == 0 ) 5422 1.1 bjh21 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5423 1.1 bjh21 ); 5424 1.1 bjh21 5425 1.1 bjh21 } 5426 1.1 bjh21 5427 1.1 bjh21 /* 5428 1.1 bjh21 ------------------------------------------------------------------------------- 5429 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than 5430 1.1 bjh21 or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 5431 1.1 bjh21 cause an exception. Otherwise, the comparison is performed according to the 5432 1.1 bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5433 1.1 bjh21 ------------------------------------------------------------------------------- 5434 1.1 bjh21 */ 5435 1.1 bjh21 flag float128_le_quiet( float128 a, float128 b ) 5436 1.1 bjh21 { 5437 1.1 bjh21 flag aSign, bSign; 5438 1.1 bjh21 5439 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5440 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5441 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5442 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5443 1.1 bjh21 ) { 5444 1.1 bjh21 if ( float128_is_signaling_nan( a ) 5445 1.1 bjh21 || float128_is_signaling_nan( b ) ) { 5446 1.1 bjh21 float_raise( float_flag_invalid ); 5447 1.1 bjh21 } 5448 1.1 bjh21 return 0; 5449 1.1 bjh21 } 5450 1.1 bjh21 aSign = extractFloat128Sign( a ); 5451 1.1 bjh21 bSign = extractFloat128Sign( b ); 5452 1.1 bjh21 if ( aSign != bSign ) { 5453 1.1 bjh21 return 5454 1.1 bjh21 aSign 5455 1.1 bjh21 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5456 1.1 bjh21 == 0 ); 5457 1.1 bjh21 } 5458 1.1 bjh21 return 5459 1.1 bjh21 aSign ? le128( b.high, b.low, a.high, a.low ) 5460 1.1 bjh21 : le128( a.high, a.low, b.high, b.low ); 5461 1.1 bjh21 5462 1.1 bjh21 } 5463 1.1 bjh21 5464 1.1 bjh21 /* 5465 1.1 bjh21 ------------------------------------------------------------------------------- 5466 1.1 bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than 5467 1.1 bjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 5468 1.1 bjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 5469 1.1 bjh21 Standard for Binary Floating-Point Arithmetic. 5470 1.1 bjh21 ------------------------------------------------------------------------------- 5471 1.1 bjh21 */ 5472 1.1 bjh21 flag float128_lt_quiet( float128 a, float128 b ) 5473 1.1 bjh21 { 5474 1.1 bjh21 flag aSign, bSign; 5475 1.1 bjh21 5476 1.1 bjh21 if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5477 1.1 bjh21 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5478 1.1 bjh21 || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5479 1.1 bjh21 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5480 1.1 bjh21 ) { 5481 1.1 bjh21 if ( float128_is_signaling_nan( a ) 5482 1.1 bjh21 || float128_is_signaling_nan( b ) ) { 5483 1.1 bjh21 float_raise( float_flag_invalid ); 5484 1.1 bjh21 } 5485 1.1 bjh21 return 0; 5486 1.1 bjh21 } 5487 1.1 bjh21 aSign = extractFloat128Sign( a ); 5488 1.1 bjh21 bSign = extractFloat128Sign( b ); 5489 1.1 bjh21 if ( aSign != bSign ) { 5490 1.1 bjh21 return 5491 1.1 bjh21 aSign 5492 1.1 bjh21 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5493 1.1 bjh21 != 0 ); 5494 1.1 bjh21 } 5495 1.1 bjh21 return 5496 1.1 bjh21 aSign ? lt128( b.high, b.low, a.high, a.low ) 5497 1.1 bjh21 : lt128( a.high, a.low, b.high, b.low ); 5498 1.1 bjh21 5499 1.1 bjh21 } 5500 1.1 bjh21 5501 1.1 bjh21 #endif 5502 1.1 bjh21 5503 1.1 bjh21 5504 1.1 bjh21 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) 5505 1.1 bjh21 5506 1.1 bjh21 /* 5507 1.1 bjh21 * These two routines are not part of the original softfloat distribution. 5508 1.1 bjh21 * 5509 1.1 bjh21 * They are based on the corresponding conversions to integer but return 5510 1.1 bjh21 * unsigned numbers instead since these functions are required by GCC. 5511 1.1 bjh21 * 5512 1.2 salo * Added by Mark Brinicombe <mark (at) NetBSD.org> 27/09/97 5513 1.1 bjh21 * 5514 1.1 bjh21 * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15] 5515 1.1 bjh21 */ 5516 1.1 bjh21 5517 1.1 bjh21 /* 5518 1.1 bjh21 ------------------------------------------------------------------------------- 5519 1.1 bjh21 Returns the result of converting the double-precision floating-point value 5520 1.1 bjh21 `a' to the 32-bit unsigned integer format. The conversion is 5521 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-point 5522 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. If 5523 1.1 bjh21 `a' is a NaN, the largest positive integer is returned. If the conversion 5524 1.1 bjh21 overflows, the largest integer positive is returned. 5525 1.1 bjh21 ------------------------------------------------------------------------------- 5526 1.1 bjh21 */ 5527 1.1 bjh21 uint32 float64_to_uint32_round_to_zero( float64 a ) 5528 1.1 bjh21 { 5529 1.1 bjh21 flag aSign; 5530 1.1 bjh21 int16 aExp, shiftCount; 5531 1.1 bjh21 bits64 aSig, savedASig; 5532 1.1 bjh21 uint32 z; 5533 1.1 bjh21 5534 1.1 bjh21 aSig = extractFloat64Frac( a ); 5535 1.1 bjh21 aExp = extractFloat64Exp( a ); 5536 1.1 bjh21 aSign = extractFloat64Sign( a ); 5537 1.1 bjh21 5538 1.1 bjh21 if (aSign) { 5539 1.1 bjh21 float_raise( float_flag_invalid ); 5540 1.1 bjh21 return(0); 5541 1.1 bjh21 } 5542 1.1 bjh21 5543 1.1 bjh21 if ( 0x41E < aExp ) { 5544 1.1 bjh21 float_raise( float_flag_invalid ); 5545 1.1 bjh21 return 0xffffffff; 5546 1.1 bjh21 } 5547 1.1 bjh21 else if ( aExp < 0x3FF ) { 5548 1.12 matt if ( aExp || aSig ) set_float_exception_inexact_flag(); 5549 1.1 bjh21 return 0; 5550 1.1 bjh21 } 5551 1.1 bjh21 aSig |= LIT64( 0x0010000000000000 ); 5552 1.1 bjh21 shiftCount = 0x433 - aExp; 5553 1.1 bjh21 savedASig = aSig; 5554 1.1 bjh21 aSig >>= shiftCount; 5555 1.10 christos z = (uint32)aSig; 5556 1.1 bjh21 if ( ( aSig<<shiftCount ) != savedASig ) { 5557 1.12 matt set_float_exception_inexact_flag(); 5558 1.1 bjh21 } 5559 1.1 bjh21 return z; 5560 1.1 bjh21 5561 1.1 bjh21 } 5562 1.1 bjh21 5563 1.1 bjh21 /* 5564 1.1 bjh21 ------------------------------------------------------------------------------- 5565 1.1 bjh21 Returns the result of converting the single-precision floating-point value 5566 1.1 bjh21 `a' to the 32-bit unsigned integer format. The conversion is 5567 1.1 bjh21 performed according to the IEC/IEEE Standard for Binary Floating-point 5568 1.1 bjh21 Arithmetic, except that the conversion is always rounded toward zero. If 5569 1.1 bjh21 `a' is a NaN, the largest positive integer is returned. If the conversion 5570 1.1 bjh21 overflows, the largest positive integer is returned. 5571 1.1 bjh21 ------------------------------------------------------------------------------- 5572 1.1 bjh21 */ 5573 1.1 bjh21 uint32 float32_to_uint32_round_to_zero( float32 a ) 5574 1.1 bjh21 { 5575 1.1 bjh21 flag aSign; 5576 1.1 bjh21 int16 aExp, shiftCount; 5577 1.1 bjh21 bits32 aSig; 5578 1.1 bjh21 uint32 z; 5579 1.1 bjh21 5580 1.1 bjh21 aSig = extractFloat32Frac( a ); 5581 1.1 bjh21 aExp = extractFloat32Exp( a ); 5582 1.1 bjh21 aSign = extractFloat32Sign( a ); 5583 1.1 bjh21 shiftCount = aExp - 0x9E; 5584 1.1 bjh21 5585 1.1 bjh21 if (aSign) { 5586 1.1 bjh21 float_raise( float_flag_invalid ); 5587 1.1 bjh21 return(0); 5588 1.1 bjh21 } 5589 1.1 bjh21 if ( 0 < shiftCount ) { 5590 1.1 bjh21 float_raise( float_flag_invalid ); 5591 1.1 bjh21 return 0xFFFFFFFF; 5592 1.1 bjh21 } 5593 1.1 bjh21 else if ( aExp <= 0x7E ) { 5594 1.12 matt if ( aExp | aSig ) set_float_exception_inexact_flag(); 5595 1.1 bjh21 return 0; 5596 1.1 bjh21 } 5597 1.1 bjh21 aSig = ( aSig | 0x800000 )<<8; 5598 1.1 bjh21 z = aSig>>( - shiftCount ); 5599 1.1 bjh21 if ( aSig<<( shiftCount & 31 ) ) { 5600 1.12 matt set_float_exception_inexact_flag(); 5601 1.1 bjh21 } 5602 1.1 bjh21 return z; 5603 1.1 bjh21 5604 1.1 bjh21 } 5605 1.1 bjh21 5606 1.1 bjh21 #endif 5607