Home | History | Annotate | Line # | Download | only in bits64
softfloat.c revision 1.8.2.1
      1 /* $NetBSD: softfloat.c,v 1.8.2.1 2012/04/17 00:05:23 yamt Exp $ */
      2 
      3 /*
      4  * This version hacked for use with gcc -msoft-float by bjh21.
      5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  *  itself).
      7  */
      8 
      9 /*
     10  * Things you may want to define:
     11  *
     12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  *   properly renamed.
     15  */
     16 
     17 /*
     18 ===============================================================================
     19 
     20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21 Arithmetic Package, Release 2a.
     22 
     23 Written by John R. Hauser.  This work was made possible in part by the
     24 International Computer Science Institute, located at Suite 600, 1947 Center
     25 Street, Berkeley, California 94704.  Funding was partially provided by the
     26 National Science Foundation under grant MIP-9311980.  The original version
     27 of this code was written as part of a project to build a fixed-point vector
     28 processor in collaboration with the University of California at Berkeley,
     29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31 arithmetic/SoftFloat.html'.
     32 
     33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38 
     39 Derivative works are acceptable, even for commercial purposes, so long as
     40 (1) they include prominent notice that the work is derivative, and (2) they
     41 include prominent notice akin to these four paragraphs for those parts of
     42 this code that are retained.
     43 
     44 ===============================================================================
     45 */
     46 
     47 #include <sys/cdefs.h>
     48 #if defined(LIBC_SCCS) && !defined(lint)
     49 __RCSID("$NetBSD: softfloat.c,v 1.8.2.1 2012/04/17 00:05:23 yamt Exp $");
     50 #endif /* LIBC_SCCS and not lint */
     51 
     52 #ifdef SOFTFLOAT_FOR_GCC
     53 #include "softfloat-for-gcc.h"
     54 #endif
     55 
     56 #include "milieu.h"
     57 #include "softfloat.h"
     58 
     59 /*
     60  * Conversions between floats as stored in memory and floats as
     61  * SoftFloat uses them
     62  */
     63 #ifndef FLOAT64_DEMANGLE
     64 #define FLOAT64_DEMANGLE(a)	(a)
     65 #endif
     66 #ifndef FLOAT64_MANGLE
     67 #define FLOAT64_MANGLE(a)	(a)
     68 #endif
     69 
     70 /*
     71 -------------------------------------------------------------------------------
     72 Floating-point rounding mode, extended double-precision rounding precision,
     73 and exception flags.
     74 -------------------------------------------------------------------------------
     75 */
     76 fp_rnd float_rounding_mode = float_round_nearest_even;
     77 fp_except float_exception_flags = 0;
     78 #ifdef FLOATX80
     79 int8 floatx80_rounding_precision = 80;
     80 #endif
     81 
     82 /*
     83 -------------------------------------------------------------------------------
     84 Primitive arithmetic functions, including multi-word arithmetic, and
     85 division and square root approximations.  (Can be specialized to target if
     86 desired.)
     87 -------------------------------------------------------------------------------
     88 */
     89 #include "softfloat-macros"
     90 
     91 /*
     92 -------------------------------------------------------------------------------
     93 Functions and definitions to determine:  (1) whether tininess for underflow
     94 is detected before or after rounding by default, (2) what (if anything)
     95 happens when exceptions are raised, (3) how signaling NaNs are distinguished
     96 from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
     97 are propagated from function inputs to output.  These details are target-
     98 specific.
     99 -------------------------------------------------------------------------------
    100 */
    101 #include "softfloat-specialize"
    102 
    103 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    104 /*
    105 -------------------------------------------------------------------------------
    106 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
    107 and 7, and returns the properly rounded 32-bit integer corresponding to the
    108 input.  If `zSign' is 1, the input is negated before being converted to an
    109 integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
    110 is simply rounded to an integer, with the inexact exception raised if the
    111 input cannot be represented exactly as an integer.  However, if the fixed-
    112 point input is too large, the invalid exception is raised and the largest
    113 positive or negative integer is returned.
    114 -------------------------------------------------------------------------------
    115 */
    116 static int32 roundAndPackInt32( flag zSign, bits64 absZ )
    117 {
    118     int8 roundingMode;
    119     flag roundNearestEven;
    120     int8 roundIncrement, roundBits;
    121     int32 z;
    122 
    123     roundingMode = float_rounding_mode;
    124     roundNearestEven = ( roundingMode == float_round_nearest_even );
    125     roundIncrement = 0x40;
    126     if ( ! roundNearestEven ) {
    127         if ( roundingMode == float_round_to_zero ) {
    128             roundIncrement = 0;
    129         }
    130         else {
    131             roundIncrement = 0x7F;
    132             if ( zSign ) {
    133                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    134             }
    135             else {
    136                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    137             }
    138         }
    139     }
    140     roundBits = (int8)(absZ & 0x7F);
    141     absZ = ( absZ + roundIncrement )>>7;
    142     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    143     z = (int32)absZ;
    144     if ( zSign ) z = - z;
    145     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
    146         float_raise( float_flag_invalid );
    147         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
    148     }
    149     if ( roundBits ) float_exception_flags |= float_flag_inexact;
    150     return z;
    151 
    152 }
    153 
    154 /*
    155 -------------------------------------------------------------------------------
    156 Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
    157 `absZ1', with binary point between bits 63 and 64 (between the input words),
    158 and returns the properly rounded 64-bit integer corresponding to the input.
    159 If `zSign' is 1, the input is negated before being converted to an integer.
    160 Ordinarily, the fixed-point input is simply rounded to an integer, with
    161 the inexact exception raised if the input cannot be represented exactly as
    162 an integer.  However, if the fixed-point input is too large, the invalid
    163 exception is raised and the largest positive or negative integer is
    164 returned.
    165 -------------------------------------------------------------------------------
    166 */
    167 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
    168 {
    169     int8 roundingMode;
    170     flag roundNearestEven, increment;
    171     int64 z;
    172 
    173     roundingMode = float_rounding_mode;
    174     roundNearestEven = ( roundingMode == float_round_nearest_even );
    175     increment = ( (sbits64) absZ1 < 0 );
    176     if ( ! roundNearestEven ) {
    177         if ( roundingMode == float_round_to_zero ) {
    178             increment = 0;
    179         }
    180         else {
    181             if ( zSign ) {
    182                 increment = ( roundingMode == float_round_down ) && absZ1;
    183             }
    184             else {
    185                 increment = ( roundingMode == float_round_up ) && absZ1;
    186             }
    187         }
    188     }
    189     if ( increment ) {
    190         ++absZ0;
    191         if ( absZ0 == 0 ) goto overflow;
    192         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
    193     }
    194     z = absZ0;
    195     if ( zSign ) z = - z;
    196     if ( z && ( ( z < 0 ) ^ zSign ) ) {
    197  overflow:
    198         float_raise( float_flag_invalid );
    199         return
    200               zSign ? (sbits64) LIT64( 0x8000000000000000 )
    201             : LIT64( 0x7FFFFFFFFFFFFFFF );
    202     }
    203     if ( absZ1 ) float_exception_flags |= float_flag_inexact;
    204     return z;
    205 
    206 }
    207 #endif
    208 
    209 /*
    210 -------------------------------------------------------------------------------
    211 Returns the fraction bits of the single-precision floating-point value `a'.
    212 -------------------------------------------------------------------------------
    213 */
    214 INLINE bits32 extractFloat32Frac( float32 a )
    215 {
    216 
    217     return a & 0x007FFFFF;
    218 
    219 }
    220 
    221 /*
    222 -------------------------------------------------------------------------------
    223 Returns the exponent bits of the single-precision floating-point value `a'.
    224 -------------------------------------------------------------------------------
    225 */
    226 INLINE int16 extractFloat32Exp( float32 a )
    227 {
    228 
    229     return ( a>>23 ) & 0xFF;
    230 
    231 }
    232 
    233 /*
    234 -------------------------------------------------------------------------------
    235 Returns the sign bit of the single-precision floating-point value `a'.
    236 -------------------------------------------------------------------------------
    237 */
    238 INLINE flag extractFloat32Sign( float32 a )
    239 {
    240 
    241     return a>>31;
    242 
    243 }
    244 
    245 /*
    246 -------------------------------------------------------------------------------
    247 Normalizes the subnormal single-precision floating-point value represented
    248 by the denormalized significand `aSig'.  The normalized exponent and
    249 significand are stored at the locations pointed to by `zExpPtr' and
    250 `zSigPtr', respectively.
    251 -------------------------------------------------------------------------------
    252 */
    253 static void
    254  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    255 {
    256     int8 shiftCount;
    257 
    258     shiftCount = countLeadingZeros32( aSig ) - 8;
    259     *zSigPtr = aSig<<shiftCount;
    260     *zExpPtr = 1 - shiftCount;
    261 
    262 }
    263 
    264 /*
    265 -------------------------------------------------------------------------------
    266 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    267 single-precision floating-point value, returning the result.  After being
    268 shifted into the proper positions, the three fields are simply added
    269 together to form the result.  This means that any integer portion of `zSig'
    270 will be added into the exponent.  Since a properly normalized significand
    271 will have an integer portion equal to 1, the `zExp' input should be 1 less
    272 than the desired result exponent whenever `zSig' is a complete, normalized
    273 significand.
    274 -------------------------------------------------------------------------------
    275 */
    276 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    277 {
    278 
    279     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    280 
    281 }
    282 
    283 /*
    284 -------------------------------------------------------------------------------
    285 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    286 and significand `zSig', and returns the proper single-precision floating-
    287 point value corresponding to the abstract input.  Ordinarily, the abstract
    288 value is simply rounded and packed into the single-precision format, with
    289 the inexact exception raised if the abstract input cannot be represented
    290 exactly.  However, if the abstract value is too large, the overflow and
    291 inexact exceptions are raised and an infinity or maximal finite value is
    292 returned.  If the abstract value is too small, the input value is rounded to
    293 a subnormal number, and the underflow and inexact exceptions are raised if
    294 the abstract input cannot be represented exactly as a subnormal single-
    295 precision floating-point number.
    296     The input significand `zSig' has its binary point between bits 30
    297 and 29, which is 7 bits to the left of the usual location.  This shifted
    298 significand must be normalized or smaller.  If `zSig' is not normalized,
    299 `zExp' must be 0; in that case, the result returned is a subnormal number,
    300 and it must not require rounding.  In the usual case that `zSig' is
    301 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    302 The handling of underflow and overflow follows the IEC/IEEE Standard for
    303 Binary Floating-Point Arithmetic.
    304 -------------------------------------------------------------------------------
    305 */
    306 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    307 {
    308     int8 roundingMode;
    309     flag roundNearestEven;
    310     int8 roundIncrement, roundBits;
    311     flag isTiny;
    312 
    313     roundingMode = float_rounding_mode;
    314     roundNearestEven = ( roundingMode == float_round_nearest_even );
    315     roundIncrement = 0x40;
    316     if ( ! roundNearestEven ) {
    317         if ( roundingMode == float_round_to_zero ) {
    318             roundIncrement = 0;
    319         }
    320         else {
    321             roundIncrement = 0x7F;
    322             if ( zSign ) {
    323                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    324             }
    325             else {
    326                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    327             }
    328         }
    329     }
    330     roundBits = zSig & 0x7F;
    331     if ( 0xFD <= (bits16) zExp ) {
    332         if (    ( 0xFD < zExp )
    333              || (    ( zExp == 0xFD )
    334                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    335            ) {
    336             float_raise( float_flag_overflow | float_flag_inexact );
    337             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    338         }
    339         if ( zExp < 0 ) {
    340             isTiny =
    341                    ( float_detect_tininess == float_tininess_before_rounding )
    342                 || ( zExp < -1 )
    343                 || ( zSig + roundIncrement < 0x80000000U );
    344             shift32RightJamming( zSig, - zExp, &zSig );
    345             zExp = 0;
    346             roundBits = zSig & 0x7F;
    347             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    348         }
    349     }
    350     if ( roundBits ) float_exception_flags |= float_flag_inexact;
    351     zSig = ( zSig + roundIncrement )>>7;
    352     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    353     if ( zSig == 0 ) zExp = 0;
    354     return packFloat32( zSign, zExp, zSig );
    355 
    356 }
    357 
    358 /*
    359 -------------------------------------------------------------------------------
    360 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    361 and significand `zSig', and returns the proper single-precision floating-
    362 point value corresponding to the abstract input.  This routine is just like
    363 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    364 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    365 floating-point exponent.
    366 -------------------------------------------------------------------------------
    367 */
    368 static float32
    369  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    370 {
    371     int8 shiftCount;
    372 
    373     shiftCount = countLeadingZeros32( zSig ) - 1;
    374     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    375 
    376 }
    377 
    378 /*
    379 -------------------------------------------------------------------------------
    380 Returns the fraction bits of the double-precision floating-point value `a'.
    381 -------------------------------------------------------------------------------
    382 */
    383 INLINE bits64 extractFloat64Frac( float64 a )
    384 {
    385 
    386     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
    387 
    388 }
    389 
    390 /*
    391 -------------------------------------------------------------------------------
    392 Returns the exponent bits of the double-precision floating-point value `a'.
    393 -------------------------------------------------------------------------------
    394 */
    395 INLINE int16 extractFloat64Exp( float64 a )
    396 {
    397 
    398     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    399 
    400 }
    401 
    402 /*
    403 -------------------------------------------------------------------------------
    404 Returns the sign bit of the double-precision floating-point value `a'.
    405 -------------------------------------------------------------------------------
    406 */
    407 INLINE flag extractFloat64Sign( float64 a )
    408 {
    409 
    410     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    411 
    412 }
    413 
    414 /*
    415 -------------------------------------------------------------------------------
    416 Normalizes the subnormal double-precision floating-point value represented
    417 by the denormalized significand `aSig'.  The normalized exponent and
    418 significand are stored at the locations pointed to by `zExpPtr' and
    419 `zSigPtr', respectively.
    420 -------------------------------------------------------------------------------
    421 */
    422 static void
    423  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
    424 {
    425     int8 shiftCount;
    426 
    427     shiftCount = countLeadingZeros64( aSig ) - 11;
    428     *zSigPtr = aSig<<shiftCount;
    429     *zExpPtr = 1 - shiftCount;
    430 
    431 }
    432 
    433 /*
    434 -------------------------------------------------------------------------------
    435 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    436 double-precision floating-point value, returning the result.  After being
    437 shifted into the proper positions, the three fields are simply added
    438 together to form the result.  This means that any integer portion of `zSig'
    439 will be added into the exponent.  Since a properly normalized significand
    440 will have an integer portion equal to 1, the `zExp' input should be 1 less
    441 than the desired result exponent whenever `zSig' is a complete, normalized
    442 significand.
    443 -------------------------------------------------------------------------------
    444 */
    445 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
    446 {
    447 
    448     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    449 			   ( ( (bits64) zExp )<<52 ) + zSig );
    450 
    451 }
    452 
    453 /*
    454 -------------------------------------------------------------------------------
    455 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    456 and significand `zSig', and returns the proper double-precision floating-
    457 point value corresponding to the abstract input.  Ordinarily, the abstract
    458 value is simply rounded and packed into the double-precision format, with
    459 the inexact exception raised if the abstract input cannot be represented
    460 exactly.  However, if the abstract value is too large, the overflow and
    461 inexact exceptions are raised and an infinity or maximal finite value is
    462 returned.  If the abstract value is too small, the input value is rounded to
    463 a subnormal number, and the underflow and inexact exceptions are raised if
    464 the abstract input cannot be represented exactly as a subnormal double-
    465 precision floating-point number.
    466     The input significand `zSig' has its binary point between bits 62
    467 and 61, which is 10 bits to the left of the usual location.  This shifted
    468 significand must be normalized or smaller.  If `zSig' is not normalized,
    469 `zExp' must be 0; in that case, the result returned is a subnormal number,
    470 and it must not require rounding.  In the usual case that `zSig' is
    471 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    472 The handling of underflow and overflow follows the IEC/IEEE Standard for
    473 Binary Floating-Point Arithmetic.
    474 -------------------------------------------------------------------------------
    475 */
    476 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    477 {
    478     int8 roundingMode;
    479     flag roundNearestEven;
    480     int16 roundIncrement, roundBits;
    481     flag isTiny;
    482 
    483     roundingMode = float_rounding_mode;
    484     roundNearestEven = ( roundingMode == float_round_nearest_even );
    485     roundIncrement = 0x200;
    486     if ( ! roundNearestEven ) {
    487         if ( roundingMode == float_round_to_zero ) {
    488             roundIncrement = 0;
    489         }
    490         else {
    491             roundIncrement = 0x3FF;
    492             if ( zSign ) {
    493                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    494             }
    495             else {
    496                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    497             }
    498         }
    499     }
    500     roundBits = (int16)(zSig & 0x3FF);
    501     if ( 0x7FD <= (bits16) zExp ) {
    502         if (    ( 0x7FD < zExp )
    503              || (    ( zExp == 0x7FD )
    504                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
    505            ) {
    506             float_raise( float_flag_overflow | float_flag_inexact );
    507             return FLOAT64_MANGLE(
    508 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
    509 		( roundIncrement == 0 ));
    510         }
    511         if ( zExp < 0 ) {
    512             isTiny =
    513                    ( float_detect_tininess == float_tininess_before_rounding )
    514                 || ( zExp < -1 )
    515                 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
    516             shift64RightJamming( zSig, - zExp, &zSig );
    517             zExp = 0;
    518             roundBits = (int16)(zSig & 0x3FF);
    519             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    520         }
    521     }
    522     if ( roundBits ) float_exception_flags |= float_flag_inexact;
    523     zSig = ( zSig + roundIncrement )>>10;
    524     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
    525     if ( zSig == 0 ) zExp = 0;
    526     return packFloat64( zSign, zExp, zSig );
    527 
    528 }
    529 
    530 /*
    531 -------------------------------------------------------------------------------
    532 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    533 and significand `zSig', and returns the proper double-precision floating-
    534 point value corresponding to the abstract input.  This routine is just like
    535 `roundAndPackFloat64' except that `zSig' does not have to be normalized.
    536 Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    537 floating-point exponent.
    538 -------------------------------------------------------------------------------
    539 */
    540 static float64
    541  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    542 {
    543     int8 shiftCount;
    544 
    545     shiftCount = countLeadingZeros64( zSig ) - 1;
    546     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
    547 
    548 }
    549 
    550 #ifdef FLOATX80
    551 
    552 /*
    553 -------------------------------------------------------------------------------
    554 Returns the fraction bits of the extended double-precision floating-point
    555 value `a'.
    556 -------------------------------------------------------------------------------
    557 */
    558 INLINE bits64 extractFloatx80Frac( floatx80 a )
    559 {
    560 
    561     return a.low;
    562 
    563 }
    564 
    565 /*
    566 -------------------------------------------------------------------------------
    567 Returns the exponent bits of the extended double-precision floating-point
    568 value `a'.
    569 -------------------------------------------------------------------------------
    570 */
    571 INLINE int32 extractFloatx80Exp( floatx80 a )
    572 {
    573 
    574     return a.high & 0x7FFF;
    575 
    576 }
    577 
    578 /*
    579 -------------------------------------------------------------------------------
    580 Returns the sign bit of the extended double-precision floating-point value
    581 `a'.
    582 -------------------------------------------------------------------------------
    583 */
    584 INLINE flag extractFloatx80Sign( floatx80 a )
    585 {
    586 
    587     return a.high>>15;
    588 
    589 }
    590 
    591 /*
    592 -------------------------------------------------------------------------------
    593 Normalizes the subnormal extended double-precision floating-point value
    594 represented by the denormalized significand `aSig'.  The normalized exponent
    595 and significand are stored at the locations pointed to by `zExpPtr' and
    596 `zSigPtr', respectively.
    597 -------------------------------------------------------------------------------
    598 */
    599 static void
    600  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
    601 {
    602     int8 shiftCount;
    603 
    604     shiftCount = countLeadingZeros64( aSig );
    605     *zSigPtr = aSig<<shiftCount;
    606     *zExpPtr = 1 - shiftCount;
    607 
    608 }
    609 
    610 /*
    611 -------------------------------------------------------------------------------
    612 Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
    613 extended double-precision floating-point value, returning the result.
    614 -------------------------------------------------------------------------------
    615 */
    616 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
    617 {
    618     floatx80 z;
    619 
    620     z.low = zSig;
    621     z.high = ( ( (bits16) zSign )<<15 ) + zExp;
    622     return z;
    623 
    624 }
    625 
    626 /*
    627 -------------------------------------------------------------------------------
    628 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    629 and extended significand formed by the concatenation of `zSig0' and `zSig1',
    630 and returns the proper extended double-precision floating-point value
    631 corresponding to the abstract input.  Ordinarily, the abstract value is
    632 rounded and packed into the extended double-precision format, with the
    633 inexact exception raised if the abstract input cannot be represented
    634 exactly.  However, if the abstract value is too large, the overflow and
    635 inexact exceptions are raised and an infinity or maximal finite value is
    636 returned.  If the abstract value is too small, the input value is rounded to
    637 a subnormal number, and the underflow and inexact exceptions are raised if
    638 the abstract input cannot be represented exactly as a subnormal extended
    639 double-precision floating-point number.
    640     If `roundingPrecision' is 32 or 64, the result is rounded to the same
    641 number of bits as single or double precision, respectively.  Otherwise, the
    642 result is rounded to the full precision of the extended double-precision
    643 format.
    644     The input significand must be normalized or smaller.  If the input
    645 significand is not normalized, `zExp' must be 0; in that case, the result
    646 returned is a subnormal number, and it must not require rounding.  The
    647 handling of underflow and overflow follows the IEC/IEEE Standard for Binary
    648 Floating-Point Arithmetic.
    649 -------------------------------------------------------------------------------
    650 */
    651 static floatx80
    652  roundAndPackFloatx80(
    653      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    654  )
    655 {
    656     int8 roundingMode;
    657     flag roundNearestEven, increment, isTiny;
    658     int64 roundIncrement, roundMask, roundBits;
    659 
    660     roundingMode = float_rounding_mode;
    661     roundNearestEven = ( roundingMode == float_round_nearest_even );
    662     if ( roundingPrecision == 80 ) goto precision80;
    663     if ( roundingPrecision == 64 ) {
    664         roundIncrement = LIT64( 0x0000000000000400 );
    665         roundMask = LIT64( 0x00000000000007FF );
    666     }
    667     else if ( roundingPrecision == 32 ) {
    668         roundIncrement = LIT64( 0x0000008000000000 );
    669         roundMask = LIT64( 0x000000FFFFFFFFFF );
    670     }
    671     else {
    672         goto precision80;
    673     }
    674     zSig0 |= ( zSig1 != 0 );
    675     if ( ! roundNearestEven ) {
    676         if ( roundingMode == float_round_to_zero ) {
    677             roundIncrement = 0;
    678         }
    679         else {
    680             roundIncrement = roundMask;
    681             if ( zSign ) {
    682                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    683             }
    684             else {
    685                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    686             }
    687         }
    688     }
    689     roundBits = zSig0 & roundMask;
    690     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    691         if (    ( 0x7FFE < zExp )
    692              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
    693            ) {
    694             goto overflow;
    695         }
    696         if ( zExp <= 0 ) {
    697             isTiny =
    698                    ( float_detect_tininess == float_tininess_before_rounding )
    699                 || ( zExp < 0 )
    700                 || ( zSig0 <= zSig0 + roundIncrement );
    701             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
    702             zExp = 0;
    703             roundBits = zSig0 & roundMask;
    704             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    705             if ( roundBits ) float_exception_flags |= float_flag_inexact;
    706             zSig0 += roundIncrement;
    707             if ( (sbits64) zSig0 < 0 ) zExp = 1;
    708             roundIncrement = roundMask + 1;
    709             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    710                 roundMask |= roundIncrement;
    711             }
    712             zSig0 &= ~ roundMask;
    713             return packFloatx80( zSign, zExp, zSig0 );
    714         }
    715     }
    716     if ( roundBits ) float_exception_flags |= float_flag_inexact;
    717     zSig0 += roundIncrement;
    718     if ( zSig0 < roundIncrement ) {
    719         ++zExp;
    720         zSig0 = LIT64( 0x8000000000000000 );
    721     }
    722     roundIncrement = roundMask + 1;
    723     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    724         roundMask |= roundIncrement;
    725     }
    726     zSig0 &= ~ roundMask;
    727     if ( zSig0 == 0 ) zExp = 0;
    728     return packFloatx80( zSign, zExp, zSig0 );
    729  precision80:
    730     increment = ( (sbits64) zSig1 < 0 );
    731     if ( ! roundNearestEven ) {
    732         if ( roundingMode == float_round_to_zero ) {
    733             increment = 0;
    734         }
    735         else {
    736             if ( zSign ) {
    737                 increment = ( roundingMode == float_round_down ) && zSig1;
    738             }
    739             else {
    740                 increment = ( roundingMode == float_round_up ) && zSig1;
    741             }
    742         }
    743     }
    744     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    745         if (    ( 0x7FFE < zExp )
    746              || (    ( zExp == 0x7FFE )
    747                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
    748                   && increment
    749                 )
    750            ) {
    751             roundMask = 0;
    752  overflow:
    753             float_raise( float_flag_overflow | float_flag_inexact );
    754             if (    ( roundingMode == float_round_to_zero )
    755                  || ( zSign && ( roundingMode == float_round_up ) )
    756                  || ( ! zSign && ( roundingMode == float_round_down ) )
    757                ) {
    758                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
    759             }
    760             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
    761         }
    762         if ( zExp <= 0 ) {
    763             isTiny =
    764                    ( float_detect_tininess == float_tininess_before_rounding )
    765                 || ( zExp < 0 )
    766                 || ! increment
    767                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
    768             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
    769             zExp = 0;
    770             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
    771             if ( zSig1 ) float_exception_flags |= float_flag_inexact;
    772             if ( roundNearestEven ) {
    773                 increment = ( (sbits64) zSig1 < 0 );
    774             }
    775             else {
    776                 if ( zSign ) {
    777                     increment = ( roundingMode == float_round_down ) && zSig1;
    778                 }
    779                 else {
    780                     increment = ( roundingMode == float_round_up ) && zSig1;
    781                 }
    782             }
    783             if ( increment ) {
    784                 ++zSig0;
    785                 zSig0 &=
    786                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    787                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
    788             }
    789             return packFloatx80( zSign, zExp, zSig0 );
    790         }
    791     }
    792     if ( zSig1 ) float_exception_flags |= float_flag_inexact;
    793     if ( increment ) {
    794         ++zSig0;
    795         if ( zSig0 == 0 ) {
    796             ++zExp;
    797             zSig0 = LIT64( 0x8000000000000000 );
    798         }
    799         else {
    800             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    801         }
    802     }
    803     else {
    804         if ( zSig0 == 0 ) zExp = 0;
    805     }
    806     return packFloatx80( zSign, zExp, zSig0 );
    807 
    808 }
    809 
    810 /*
    811 -------------------------------------------------------------------------------
    812 Takes an abstract floating-point value having sign `zSign', exponent
    813 `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
    814 and returns the proper extended double-precision floating-point value
    815 corresponding to the abstract input.  This routine is just like
    816 `roundAndPackFloatx80' except that the input significand does not have to be
    817 normalized.
    818 -------------------------------------------------------------------------------
    819 */
    820 static floatx80
    821  normalizeRoundAndPackFloatx80(
    822      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    823  )
    824 {
    825     int8 shiftCount;
    826 
    827     if ( zSig0 == 0 ) {
    828         zSig0 = zSig1;
    829         zSig1 = 0;
    830         zExp -= 64;
    831     }
    832     shiftCount = countLeadingZeros64( zSig0 );
    833     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    834     zExp -= shiftCount;
    835     return
    836         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
    837 
    838 }
    839 
    840 #endif
    841 
    842 #ifdef FLOAT128
    843 
    844 /*
    845 -------------------------------------------------------------------------------
    846 Returns the least-significant 64 fraction bits of the quadruple-precision
    847 floating-point value `a'.
    848 -------------------------------------------------------------------------------
    849 */
    850 INLINE bits64 extractFloat128Frac1( float128 a )
    851 {
    852 
    853     return a.low;
    854 
    855 }
    856 
    857 /*
    858 -------------------------------------------------------------------------------
    859 Returns the most-significant 48 fraction bits of the quadruple-precision
    860 floating-point value `a'.
    861 -------------------------------------------------------------------------------
    862 */
    863 INLINE bits64 extractFloat128Frac0( float128 a )
    864 {
    865 
    866     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
    867 
    868 }
    869 
    870 /*
    871 -------------------------------------------------------------------------------
    872 Returns the exponent bits of the quadruple-precision floating-point value
    873 `a'.
    874 -------------------------------------------------------------------------------
    875 */
    876 INLINE int32 extractFloat128Exp( float128 a )
    877 {
    878 
    879     return (int32)((a.high >> 48) & 0x7FFF);
    880 
    881 }
    882 
    883 /*
    884 -------------------------------------------------------------------------------
    885 Returns the sign bit of the quadruple-precision floating-point value `a'.
    886 -------------------------------------------------------------------------------
    887 */
    888 INLINE flag extractFloat128Sign( float128 a )
    889 {
    890 
    891     return (flag)(a.high >> 63);
    892 
    893 }
    894 
    895 /*
    896 -------------------------------------------------------------------------------
    897 Normalizes the subnormal quadruple-precision floating-point value
    898 represented by the denormalized significand formed by the concatenation of
    899 `aSig0' and `aSig1'.  The normalized exponent is stored at the location
    900 pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
    901 significand are stored at the location pointed to by `zSig0Ptr', and the
    902 least significant 64 bits of the normalized significand are stored at the
    903 location pointed to by `zSig1Ptr'.
    904 -------------------------------------------------------------------------------
    905 */
    906 static void
    907  normalizeFloat128Subnormal(
    908      bits64 aSig0,
    909      bits64 aSig1,
    910      int32 *zExpPtr,
    911      bits64 *zSig0Ptr,
    912      bits64 *zSig1Ptr
    913  )
    914 {
    915     int8 shiftCount;
    916 
    917     if ( aSig0 == 0 ) {
    918         shiftCount = countLeadingZeros64( aSig1 ) - 15;
    919         if ( shiftCount < 0 ) {
    920             *zSig0Ptr = aSig1>>( - shiftCount );
    921             *zSig1Ptr = aSig1<<( shiftCount & 63 );
    922         }
    923         else {
    924             *zSig0Ptr = aSig1<<shiftCount;
    925             *zSig1Ptr = 0;
    926         }
    927         *zExpPtr = - shiftCount - 63;
    928     }
    929     else {
    930         shiftCount = countLeadingZeros64( aSig0 ) - 15;
    931         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    932         *zExpPtr = 1 - shiftCount;
    933     }
    934 
    935 }
    936 
    937 /*
    938 -------------------------------------------------------------------------------
    939 Packs the sign `zSign', the exponent `zExp', and the significand formed
    940 by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
    941 floating-point value, returning the result.  After being shifted into the
    942 proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
    943 added together to form the most significant 32 bits of the result.  This
    944 means that any integer portion of `zSig0' will be added into the exponent.
    945 Since a properly normalized significand will have an integer portion equal
    946 to 1, the `zExp' input should be 1 less than the desired result exponent
    947 whenever `zSig0' and `zSig1' concatenated form a complete, normalized
    948 significand.
    949 -------------------------------------------------------------------------------
    950 */
    951 INLINE float128
    952  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
    953 {
    954     float128 z;
    955 
    956     z.low = zSig1;
    957     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    958     return z;
    959 
    960 }
    961 
    962 /*
    963 -------------------------------------------------------------------------------
    964 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    965 and extended significand formed by the concatenation of `zSig0', `zSig1',
    966 and `zSig2', and returns the proper quadruple-precision floating-point value
    967 corresponding to the abstract input.  Ordinarily, the abstract value is
    968 simply rounded and packed into the quadruple-precision format, with the
    969 inexact exception raised if the abstract input cannot be represented
    970 exactly.  However, if the abstract value is too large, the overflow and
    971 inexact exceptions are raised and an infinity or maximal finite value is
    972 returned.  If the abstract value is too small, the input value is rounded to
    973 a subnormal number, and the underflow and inexact exceptions are raised if
    974 the abstract input cannot be represented exactly as a subnormal quadruple-
    975 precision floating-point number.
    976     The input significand must be normalized or smaller.  If the input
    977 significand is not normalized, `zExp' must be 0; in that case, the result
    978 returned is a subnormal number, and it must not require rounding.  In the
    979 usual case that the input significand is normalized, `zExp' must be 1 less
    980 than the ``true'' floating-point exponent.  The handling of underflow and
    981 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    982 -------------------------------------------------------------------------------
    983 */
    984 static float128
    985  roundAndPackFloat128(
    986      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
    987 {
    988     int8 roundingMode;
    989     flag roundNearestEven, increment, isTiny;
    990 
    991     roundingMode = float_rounding_mode;
    992     roundNearestEven = ( roundingMode == float_round_nearest_even );
    993     increment = ( (sbits64) zSig2 < 0 );
    994     if ( ! roundNearestEven ) {
    995         if ( roundingMode == float_round_to_zero ) {
    996             increment = 0;
    997         }
    998         else {
    999             if ( zSign ) {
   1000                 increment = ( roundingMode == float_round_down ) && zSig2;
   1001             }
   1002             else {
   1003                 increment = ( roundingMode == float_round_up ) && zSig2;
   1004             }
   1005         }
   1006     }
   1007     if ( 0x7FFD <= (bits32) zExp ) {
   1008         if (    ( 0x7FFD < zExp )
   1009              || (    ( zExp == 0x7FFD )
   1010                   && eq128(
   1011                          LIT64( 0x0001FFFFFFFFFFFF ),
   1012                          LIT64( 0xFFFFFFFFFFFFFFFF ),
   1013                          zSig0,
   1014                          zSig1
   1015                      )
   1016                   && increment
   1017                 )
   1018            ) {
   1019             float_raise( float_flag_overflow | float_flag_inexact );
   1020             if (    ( roundingMode == float_round_to_zero )
   1021                  || ( zSign && ( roundingMode == float_round_up ) )
   1022                  || ( ! zSign && ( roundingMode == float_round_down ) )
   1023                ) {
   1024                 return
   1025                     packFloat128(
   1026                         zSign,
   1027                         0x7FFE,
   1028                         LIT64( 0x0000FFFFFFFFFFFF ),
   1029                         LIT64( 0xFFFFFFFFFFFFFFFF )
   1030                     );
   1031             }
   1032             return packFloat128( zSign, 0x7FFF, 0, 0 );
   1033         }
   1034         if ( zExp < 0 ) {
   1035             isTiny =
   1036                    ( float_detect_tininess == float_tininess_before_rounding )
   1037                 || ( zExp < -1 )
   1038                 || ! increment
   1039                 || lt128(
   1040                        zSig0,
   1041                        zSig1,
   1042                        LIT64( 0x0001FFFFFFFFFFFF ),
   1043                        LIT64( 0xFFFFFFFFFFFFFFFF )
   1044                    );
   1045             shift128ExtraRightJamming(
   1046                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
   1047             zExp = 0;
   1048             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
   1049             if ( roundNearestEven ) {
   1050                 increment = ( (sbits64) zSig2 < 0 );
   1051             }
   1052             else {
   1053                 if ( zSign ) {
   1054                     increment = ( roundingMode == float_round_down ) && zSig2;
   1055                 }
   1056                 else {
   1057                     increment = ( roundingMode == float_round_up ) && zSig2;
   1058                 }
   1059             }
   1060         }
   1061     }
   1062     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
   1063     if ( increment ) {
   1064         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
   1065         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
   1066     }
   1067     else {
   1068         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
   1069     }
   1070     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1071 
   1072 }
   1073 
   1074 /*
   1075 -------------------------------------------------------------------------------
   1076 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   1077 and significand formed by the concatenation of `zSig0' and `zSig1', and
   1078 returns the proper quadruple-precision floating-point value corresponding
   1079 to the abstract input.  This routine is just like `roundAndPackFloat128'
   1080 except that the input significand has fewer bits and does not have to be
   1081 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
   1082 point exponent.
   1083 -------------------------------------------------------------------------------
   1084 */
   1085 static float128
   1086  normalizeRoundAndPackFloat128(
   1087      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
   1088 {
   1089     int8 shiftCount;
   1090     bits64 zSig2;
   1091 
   1092     if ( zSig0 == 0 ) {
   1093         zSig0 = zSig1;
   1094         zSig1 = 0;
   1095         zExp -= 64;
   1096     }
   1097     shiftCount = countLeadingZeros64( zSig0 ) - 15;
   1098     if ( 0 <= shiftCount ) {
   1099         zSig2 = 0;
   1100         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1101     }
   1102     else {
   1103         shift128ExtraRightJamming(
   1104             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
   1105     }
   1106     zExp -= shiftCount;
   1107     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   1108 
   1109 }
   1110 
   1111 #endif
   1112 
   1113 /*
   1114 -------------------------------------------------------------------------------
   1115 Returns the result of converting the 32-bit two's complement integer `a'
   1116 to the single-precision floating-point format.  The conversion is performed
   1117 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1118 -------------------------------------------------------------------------------
   1119 */
   1120 float32 int32_to_float32( int32 a )
   1121 {
   1122     flag zSign;
   1123 
   1124     if ( a == 0 ) return 0;
   1125     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
   1126     zSign = ( a < 0 );
   1127     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
   1128 
   1129 }
   1130 
   1131 float32 uint32_to_float32( uint32 a )
   1132 {
   1133     if ( a == 0 ) return 0;
   1134     if ( a & (bits32) 0x80000000 )
   1135 	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
   1136     return normalizeRoundAndPackFloat32( 0, 0x9C, a );
   1137 }
   1138 
   1139 
   1140 /*
   1141 -------------------------------------------------------------------------------
   1142 Returns the result of converting the 32-bit two's complement integer `a'
   1143 to the double-precision floating-point format.  The conversion is performed
   1144 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1145 -------------------------------------------------------------------------------
   1146 */
   1147 float64 int32_to_float64( int32 a )
   1148 {
   1149     flag zSign;
   1150     uint32 absA;
   1151     int8 shiftCount;
   1152     bits64 zSig;
   1153 
   1154     if ( a == 0 ) return 0;
   1155     zSign = ( a < 0 );
   1156     absA = zSign ? - a : a;
   1157     shiftCount = countLeadingZeros32( absA ) + 21;
   1158     zSig = absA;
   1159     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
   1160 
   1161 }
   1162 
   1163 float64 uint32_to_float64( uint32 a )
   1164 {
   1165     int8 shiftCount;
   1166     bits64 zSig = a;
   1167 
   1168     if ( a == 0 ) return 0;
   1169     shiftCount = countLeadingZeros32( a ) + 21;
   1170     return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
   1171 
   1172 }
   1173 
   1174 #ifdef FLOATX80
   1175 
   1176 /*
   1177 -------------------------------------------------------------------------------
   1178 Returns the result of converting the 32-bit two's complement integer `a'
   1179 to the extended double-precision floating-point format.  The conversion
   1180 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1181 Arithmetic.
   1182 -------------------------------------------------------------------------------
   1183 */
   1184 floatx80 int32_to_floatx80( int32 a )
   1185 {
   1186     flag zSign;
   1187     uint32 absA;
   1188     int8 shiftCount;
   1189     bits64 zSig;
   1190 
   1191     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1192     zSign = ( a < 0 );
   1193     absA = zSign ? - a : a;
   1194     shiftCount = countLeadingZeros32( absA ) + 32;
   1195     zSig = absA;
   1196     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
   1197 
   1198 }
   1199 
   1200 floatx80 uint32_to_floatx80( uint32 a )
   1201 {
   1202     int8 shiftCount;
   1203     bits64 zSig = a;
   1204 
   1205     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1206     shiftCount = countLeadingZeros32( a ) + 32;
   1207     return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
   1208 
   1209 }
   1210 
   1211 #endif
   1212 
   1213 #ifdef FLOAT128
   1214 
   1215 /*
   1216 -------------------------------------------------------------------------------
   1217 Returns the result of converting the 32-bit two's complement integer `a' to
   1218 the quadruple-precision floating-point format.  The conversion is performed
   1219 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1220 -------------------------------------------------------------------------------
   1221 */
   1222 float128 int32_to_float128( int32 a )
   1223 {
   1224     flag zSign;
   1225     uint32 absA;
   1226     int8 shiftCount;
   1227     bits64 zSig0;
   1228 
   1229     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1230     zSign = ( a < 0 );
   1231     absA = zSign ? - a : a;
   1232     shiftCount = countLeadingZeros32( absA ) + 17;
   1233     zSig0 = absA;
   1234     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1235 
   1236 }
   1237 
   1238 float128 uint32_to_float128( uint32 a )
   1239 {
   1240     int8 shiftCount;
   1241     bits64 zSig0 = a;
   1242 
   1243     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1244     shiftCount = countLeadingZeros32( a ) + 17;
   1245     return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1246 
   1247 }
   1248 
   1249 #endif
   1250 
   1251 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
   1252 /*
   1253 -------------------------------------------------------------------------------
   1254 Returns the result of converting the 64-bit two's complement integer `a'
   1255 to the single-precision floating-point format.  The conversion is performed
   1256 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1257 -------------------------------------------------------------------------------
   1258 */
   1259 float32 int64_to_float32( int64 a )
   1260 {
   1261     flag zSign;
   1262     uint64 absA;
   1263     int8 shiftCount;
   1264 
   1265     if ( a == 0 ) return 0;
   1266     zSign = ( a < 0 );
   1267     absA = zSign ? - a : a;
   1268     shiftCount = countLeadingZeros64( absA ) - 40;
   1269     if ( 0 <= shiftCount ) {
   1270         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
   1271     }
   1272     else {
   1273         shiftCount += 7;
   1274         if ( shiftCount < 0 ) {
   1275             shift64RightJamming( absA, - shiftCount, &absA );
   1276         }
   1277         else {
   1278             absA <<= shiftCount;
   1279         }
   1280         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
   1281     }
   1282 
   1283 }
   1284 
   1285 /*
   1286 -------------------------------------------------------------------------------
   1287 Returns the result of converting the 64-bit two's complement integer `a'
   1288 to the double-precision floating-point format.  The conversion is performed
   1289 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1290 -------------------------------------------------------------------------------
   1291 */
   1292 float64 int64_to_float64( int64 a )
   1293 {
   1294     flag zSign;
   1295 
   1296     if ( a == 0 ) return 0;
   1297     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
   1298         return packFloat64( 1, 0x43E, 0 );
   1299     }
   1300     zSign = ( a < 0 );
   1301     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
   1302 
   1303 }
   1304 
   1305 #ifdef FLOATX80
   1306 
   1307 /*
   1308 -------------------------------------------------------------------------------
   1309 Returns the result of converting the 64-bit two's complement integer `a'
   1310 to the extended double-precision floating-point format.  The conversion
   1311 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1312 Arithmetic.
   1313 -------------------------------------------------------------------------------
   1314 */
   1315 floatx80 int64_to_floatx80( int64 a )
   1316 {
   1317     flag zSign;
   1318     uint64 absA;
   1319     int8 shiftCount;
   1320 
   1321     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1322     zSign = ( a < 0 );
   1323     absA = zSign ? - a : a;
   1324     shiftCount = countLeadingZeros64( absA );
   1325     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
   1326 
   1327 }
   1328 
   1329 #endif
   1330 
   1331 #endif /* !SOFTFLOAT_FOR_GCC */
   1332 
   1333 #ifdef FLOAT128
   1334 
   1335 /*
   1336 -------------------------------------------------------------------------------
   1337 Returns the result of converting the 64-bit two's complement integer `a' to
   1338 the quadruple-precision floating-point format.  The conversion is performed
   1339 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1340 -------------------------------------------------------------------------------
   1341 */
   1342 float128 int64_to_float128( int64 a )
   1343 {
   1344     flag zSign;
   1345     uint64 absA;
   1346     int8 shiftCount;
   1347     int32 zExp;
   1348     bits64 zSig0, zSig1;
   1349 
   1350     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1351     zSign = ( a < 0 );
   1352     absA = zSign ? - a : a;
   1353     shiftCount = countLeadingZeros64( absA ) + 49;
   1354     zExp = 0x406E - shiftCount;
   1355     if ( 64 <= shiftCount ) {
   1356         zSig1 = 0;
   1357         zSig0 = absA;
   1358         shiftCount -= 64;
   1359     }
   1360     else {
   1361         zSig1 = absA;
   1362         zSig0 = 0;
   1363     }
   1364     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1365     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1366 
   1367 }
   1368 
   1369 #endif
   1370 
   1371 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1372 /*
   1373 -------------------------------------------------------------------------------
   1374 Returns the result of converting the single-precision floating-point value
   1375 `a' to the 32-bit two's complement integer format.  The conversion is
   1376 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1377 Arithmetic---which means in particular that the conversion is rounded
   1378 according to the current rounding mode.  If `a' is a NaN, the largest
   1379 positive integer is returned.  Otherwise, if the conversion overflows, the
   1380 largest integer with the same sign as `a' is returned.
   1381 -------------------------------------------------------------------------------
   1382 */
   1383 int32 float32_to_int32( float32 a )
   1384 {
   1385     flag aSign;
   1386     int16 aExp, shiftCount;
   1387     bits32 aSig;
   1388     bits64 aSig64;
   1389 
   1390     aSig = extractFloat32Frac( a );
   1391     aExp = extractFloat32Exp( a );
   1392     aSign = extractFloat32Sign( a );
   1393     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
   1394     if ( aExp ) aSig |= 0x00800000;
   1395     shiftCount = 0xAF - aExp;
   1396     aSig64 = aSig;
   1397     aSig64 <<= 32;
   1398     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
   1399     return roundAndPackInt32( aSign, aSig64 );
   1400 
   1401 }
   1402 #endif /* !SOFTFLOAT_FOR_GCC */
   1403 
   1404 /*
   1405 -------------------------------------------------------------------------------
   1406 Returns the result of converting the single-precision floating-point value
   1407 `a' to the 32-bit two's complement integer format.  The conversion is
   1408 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1409 Arithmetic, except that the conversion is always rounded toward zero.
   1410 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1411 the conversion overflows, the largest integer with the same sign as `a' is
   1412 returned.
   1413 -------------------------------------------------------------------------------
   1414 */
   1415 int32 float32_to_int32_round_to_zero( float32 a )
   1416 {
   1417     flag aSign;
   1418     int16 aExp, shiftCount;
   1419     bits32 aSig;
   1420     int32 z;
   1421 
   1422     aSig = extractFloat32Frac( a );
   1423     aExp = extractFloat32Exp( a );
   1424     aSign = extractFloat32Sign( a );
   1425     shiftCount = aExp - 0x9E;
   1426     if ( 0 <= shiftCount ) {
   1427         if ( a != 0xCF000000 ) {
   1428             float_raise( float_flag_invalid );
   1429             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
   1430         }
   1431         return (sbits32) 0x80000000;
   1432     }
   1433     else if ( aExp <= 0x7E ) {
   1434         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
   1435         return 0;
   1436     }
   1437     aSig = ( aSig | 0x00800000 )<<8;
   1438     z = aSig>>( - shiftCount );
   1439     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
   1440         float_exception_flags |= float_flag_inexact;
   1441     }
   1442     if ( aSign ) z = - z;
   1443     return z;
   1444 
   1445 }
   1446 
   1447 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
   1448 /*
   1449 -------------------------------------------------------------------------------
   1450 Returns the result of converting the single-precision floating-point value
   1451 `a' to the 64-bit two's complement integer format.  The conversion is
   1452 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1453 Arithmetic---which means in particular that the conversion is rounded
   1454 according to the current rounding mode.  If `a' is a NaN, the largest
   1455 positive integer is returned.  Otherwise, if the conversion overflows, the
   1456 largest integer with the same sign as `a' is returned.
   1457 -------------------------------------------------------------------------------
   1458 */
   1459 int64 float32_to_int64( float32 a )
   1460 {
   1461     flag aSign;
   1462     int16 aExp, shiftCount;
   1463     bits32 aSig;
   1464     bits64 aSig64, aSigExtra;
   1465 
   1466     aSig = extractFloat32Frac( a );
   1467     aExp = extractFloat32Exp( a );
   1468     aSign = extractFloat32Sign( a );
   1469     shiftCount = 0xBE - aExp;
   1470     if ( shiftCount < 0 ) {
   1471         float_raise( float_flag_invalid );
   1472         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1473             return LIT64( 0x7FFFFFFFFFFFFFFF );
   1474         }
   1475         return (sbits64) LIT64( 0x8000000000000000 );
   1476     }
   1477     if ( aExp ) aSig |= 0x00800000;
   1478     aSig64 = aSig;
   1479     aSig64 <<= 40;
   1480     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
   1481     return roundAndPackInt64( aSign, aSig64, aSigExtra );
   1482 
   1483 }
   1484 
   1485 /*
   1486 -------------------------------------------------------------------------------
   1487 Returns the result of converting the single-precision floating-point value
   1488 `a' to the 64-bit two's complement integer format.  The conversion is
   1489 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1490 Arithmetic, except that the conversion is always rounded toward zero.  If
   1491 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   1492 conversion overflows, the largest integer with the same sign as `a' is
   1493 returned.
   1494 -------------------------------------------------------------------------------
   1495 */
   1496 int64 float32_to_int64_round_to_zero( float32 a )
   1497 {
   1498     flag aSign;
   1499     int16 aExp, shiftCount;
   1500     bits32 aSig;
   1501     bits64 aSig64;
   1502     int64 z;
   1503 
   1504     aSig = extractFloat32Frac( a );
   1505     aExp = extractFloat32Exp( a );
   1506     aSign = extractFloat32Sign( a );
   1507     shiftCount = aExp - 0xBE;
   1508     if ( 0 <= shiftCount ) {
   1509         if ( a != 0xDF000000 ) {
   1510             float_raise( float_flag_invalid );
   1511             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1512                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   1513             }
   1514         }
   1515         return (sbits64) LIT64( 0x8000000000000000 );
   1516     }
   1517     else if ( aExp <= 0x7E ) {
   1518         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
   1519         return 0;
   1520     }
   1521     aSig64 = aSig | 0x00800000;
   1522     aSig64 <<= 40;
   1523     z = aSig64>>( - shiftCount );
   1524     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
   1525         float_exception_flags |= float_flag_inexact;
   1526     }
   1527     if ( aSign ) z = - z;
   1528     return z;
   1529 
   1530 }
   1531 #endif /* !SOFTFLOAT_FOR_GCC */
   1532 
   1533 /*
   1534 -------------------------------------------------------------------------------
   1535 Returns the result of converting the single-precision floating-point value
   1536 `a' to the double-precision floating-point format.  The conversion is
   1537 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1538 Arithmetic.
   1539 -------------------------------------------------------------------------------
   1540 */
   1541 float64 float32_to_float64( float32 a )
   1542 {
   1543     flag aSign;
   1544     int16 aExp;
   1545     bits32 aSig;
   1546 
   1547     aSig = extractFloat32Frac( a );
   1548     aExp = extractFloat32Exp( a );
   1549     aSign = extractFloat32Sign( a );
   1550     if ( aExp == 0xFF ) {
   1551         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
   1552         return packFloat64( aSign, 0x7FF, 0 );
   1553     }
   1554     if ( aExp == 0 ) {
   1555         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
   1556         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1557         --aExp;
   1558     }
   1559     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
   1560 
   1561 }
   1562 
   1563 #ifdef FLOATX80
   1564 
   1565 /*
   1566 -------------------------------------------------------------------------------
   1567 Returns the result of converting the single-precision floating-point value
   1568 `a' to the extended double-precision floating-point format.  The conversion
   1569 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1570 Arithmetic.
   1571 -------------------------------------------------------------------------------
   1572 */
   1573 floatx80 float32_to_floatx80( float32 a )
   1574 {
   1575     flag aSign;
   1576     int16 aExp;
   1577     bits32 aSig;
   1578 
   1579     aSig = extractFloat32Frac( a );
   1580     aExp = extractFloat32Exp( a );
   1581     aSign = extractFloat32Sign( a );
   1582     if ( aExp == 0xFF ) {
   1583         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
   1584         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   1585     }
   1586     if ( aExp == 0 ) {
   1587         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   1588         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1589     }
   1590     aSig |= 0x00800000;
   1591     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
   1592 
   1593 }
   1594 
   1595 #endif
   1596 
   1597 #ifdef FLOAT128
   1598 
   1599 /*
   1600 -------------------------------------------------------------------------------
   1601 Returns the result of converting the single-precision floating-point value
   1602 `a' to the double-precision floating-point format.  The conversion is
   1603 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1604 Arithmetic.
   1605 -------------------------------------------------------------------------------
   1606 */
   1607 float128 float32_to_float128( float32 a )
   1608 {
   1609     flag aSign;
   1610     int16 aExp;
   1611     bits32 aSig;
   1612 
   1613     aSig = extractFloat32Frac( a );
   1614     aExp = extractFloat32Exp( a );
   1615     aSign = extractFloat32Sign( a );
   1616     if ( aExp == 0xFF ) {
   1617         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
   1618         return packFloat128( aSign, 0x7FFF, 0, 0 );
   1619     }
   1620     if ( aExp == 0 ) {
   1621         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   1622         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1623         --aExp;
   1624     }
   1625     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
   1626 
   1627 }
   1628 
   1629 #endif
   1630 
   1631 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1632 /*
   1633 -------------------------------------------------------------------------------
   1634 Rounds the single-precision floating-point value `a' to an integer, and
   1635 returns the result as a single-precision floating-point value.  The
   1636 operation is performed according to the IEC/IEEE Standard for Binary
   1637 Floating-Point Arithmetic.
   1638 -------------------------------------------------------------------------------
   1639 */
   1640 float32 float32_round_to_int( float32 a )
   1641 {
   1642     flag aSign;
   1643     int16 aExp;
   1644     bits32 lastBitMask, roundBitsMask;
   1645     int8 roundingMode;
   1646     float32 z;
   1647 
   1648     aExp = extractFloat32Exp( a );
   1649     if ( 0x96 <= aExp ) {
   1650         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
   1651             return propagateFloat32NaN( a, a );
   1652         }
   1653         return a;
   1654     }
   1655     if ( aExp <= 0x7E ) {
   1656         if ( (bits32) ( a<<1 ) == 0 ) return a;
   1657         float_exception_flags |= float_flag_inexact;
   1658         aSign = extractFloat32Sign( a );
   1659         switch ( float_rounding_mode ) {
   1660          case float_round_nearest_even:
   1661             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
   1662                 return packFloat32( aSign, 0x7F, 0 );
   1663             }
   1664             break;
   1665 	 case float_round_to_zero:
   1666 	    break;
   1667          case float_round_down:
   1668             return aSign ? 0xBF800000 : 0;
   1669          case float_round_up:
   1670             return aSign ? 0x80000000 : 0x3F800000;
   1671         }
   1672         return packFloat32( aSign, 0, 0 );
   1673     }
   1674     lastBitMask = 1;
   1675     lastBitMask <<= 0x96 - aExp;
   1676     roundBitsMask = lastBitMask - 1;
   1677     z = a;
   1678     roundingMode = float_rounding_mode;
   1679     if ( roundingMode == float_round_nearest_even ) {
   1680         z += lastBitMask>>1;
   1681         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   1682     }
   1683     else if ( roundingMode != float_round_to_zero ) {
   1684         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   1685             z += roundBitsMask;
   1686         }
   1687     }
   1688     z &= ~ roundBitsMask;
   1689     if ( z != a ) float_exception_flags |= float_flag_inexact;
   1690     return z;
   1691 
   1692 }
   1693 #endif /* !SOFTFLOAT_FOR_GCC */
   1694 
   1695 /*
   1696 -------------------------------------------------------------------------------
   1697 Returns the result of adding the absolute values of the single-precision
   1698 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1699 before being returned.  `zSign' is ignored if the result is a NaN.
   1700 The addition is performed according to the IEC/IEEE Standard for Binary
   1701 Floating-Point Arithmetic.
   1702 -------------------------------------------------------------------------------
   1703 */
   1704 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
   1705 {
   1706     int16 aExp, bExp, zExp;
   1707     bits32 aSig, bSig, zSig;
   1708     int16 expDiff;
   1709 
   1710     aSig = extractFloat32Frac( a );
   1711     aExp = extractFloat32Exp( a );
   1712     bSig = extractFloat32Frac( b );
   1713     bExp = extractFloat32Exp( b );
   1714     expDiff = aExp - bExp;
   1715     aSig <<= 6;
   1716     bSig <<= 6;
   1717     if ( 0 < expDiff ) {
   1718         if ( aExp == 0xFF ) {
   1719             if ( aSig ) return propagateFloat32NaN( a, b );
   1720             return a;
   1721         }
   1722         if ( bExp == 0 ) {
   1723             --expDiff;
   1724         }
   1725         else {
   1726             bSig |= 0x20000000;
   1727         }
   1728         shift32RightJamming( bSig, expDiff, &bSig );
   1729         zExp = aExp;
   1730     }
   1731     else if ( expDiff < 0 ) {
   1732         if ( bExp == 0xFF ) {
   1733             if ( bSig ) return propagateFloat32NaN( a, b );
   1734             return packFloat32( zSign, 0xFF, 0 );
   1735         }
   1736         if ( aExp == 0 ) {
   1737             ++expDiff;
   1738         }
   1739         else {
   1740             aSig |= 0x20000000;
   1741         }
   1742         shift32RightJamming( aSig, - expDiff, &aSig );
   1743         zExp = bExp;
   1744     }
   1745     else {
   1746         if ( aExp == 0xFF ) {
   1747             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1748             return a;
   1749         }
   1750         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
   1751         zSig = 0x40000000 + aSig + bSig;
   1752         zExp = aExp;
   1753         goto roundAndPack;
   1754     }
   1755     aSig |= 0x20000000;
   1756     zSig = ( aSig + bSig )<<1;
   1757     --zExp;
   1758     if ( (sbits32) zSig < 0 ) {
   1759         zSig = aSig + bSig;
   1760         ++zExp;
   1761     }
   1762  roundAndPack:
   1763     return roundAndPackFloat32( zSign, zExp, zSig );
   1764 
   1765 }
   1766 
   1767 /*
   1768 -------------------------------------------------------------------------------
   1769 Returns the result of subtracting the absolute values of the single-
   1770 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1771 difference is negated before being returned.  `zSign' is ignored if the
   1772 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1773 Standard for Binary Floating-Point Arithmetic.
   1774 -------------------------------------------------------------------------------
   1775 */
   1776 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
   1777 {
   1778     int16 aExp, bExp, zExp;
   1779     bits32 aSig, bSig, zSig;
   1780     int16 expDiff;
   1781 
   1782     aSig = extractFloat32Frac( a );
   1783     aExp = extractFloat32Exp( a );
   1784     bSig = extractFloat32Frac( b );
   1785     bExp = extractFloat32Exp( b );
   1786     expDiff = aExp - bExp;
   1787     aSig <<= 7;
   1788     bSig <<= 7;
   1789     if ( 0 < expDiff ) goto aExpBigger;
   1790     if ( expDiff < 0 ) goto bExpBigger;
   1791     if ( aExp == 0xFF ) {
   1792         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1793         float_raise( float_flag_invalid );
   1794         return float32_default_nan;
   1795     }
   1796     if ( aExp == 0 ) {
   1797         aExp = 1;
   1798         bExp = 1;
   1799     }
   1800     if ( bSig < aSig ) goto aBigger;
   1801     if ( aSig < bSig ) goto bBigger;
   1802     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
   1803  bExpBigger:
   1804     if ( bExp == 0xFF ) {
   1805         if ( bSig ) return propagateFloat32NaN( a, b );
   1806         return packFloat32( zSign ^ 1, 0xFF, 0 );
   1807     }
   1808     if ( aExp == 0 ) {
   1809         ++expDiff;
   1810     }
   1811     else {
   1812         aSig |= 0x40000000;
   1813     }
   1814     shift32RightJamming( aSig, - expDiff, &aSig );
   1815     bSig |= 0x40000000;
   1816  bBigger:
   1817     zSig = bSig - aSig;
   1818     zExp = bExp;
   1819     zSign ^= 1;
   1820     goto normalizeRoundAndPack;
   1821  aExpBigger:
   1822     if ( aExp == 0xFF ) {
   1823         if ( aSig ) return propagateFloat32NaN( a, b );
   1824         return a;
   1825     }
   1826     if ( bExp == 0 ) {
   1827         --expDiff;
   1828     }
   1829     else {
   1830         bSig |= 0x40000000;
   1831     }
   1832     shift32RightJamming( bSig, expDiff, &bSig );
   1833     aSig |= 0x40000000;
   1834  aBigger:
   1835     zSig = aSig - bSig;
   1836     zExp = aExp;
   1837  normalizeRoundAndPack:
   1838     --zExp;
   1839     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
   1840 
   1841 }
   1842 
   1843 /*
   1844 -------------------------------------------------------------------------------
   1845 Returns the result of adding the single-precision floating-point values `a'
   1846 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1847 Binary Floating-Point Arithmetic.
   1848 -------------------------------------------------------------------------------
   1849 */
   1850 float32 float32_add( float32 a, float32 b )
   1851 {
   1852     flag aSign, bSign;
   1853 
   1854     aSign = extractFloat32Sign( a );
   1855     bSign = extractFloat32Sign( b );
   1856     if ( aSign == bSign ) {
   1857         return addFloat32Sigs( a, b, aSign );
   1858     }
   1859     else {
   1860         return subFloat32Sigs( a, b, aSign );
   1861     }
   1862 
   1863 }
   1864 
   1865 /*
   1866 -------------------------------------------------------------------------------
   1867 Returns the result of subtracting the single-precision floating-point values
   1868 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1869 for Binary Floating-Point Arithmetic.
   1870 -------------------------------------------------------------------------------
   1871 */
   1872 float32 float32_sub( float32 a, float32 b )
   1873 {
   1874     flag aSign, bSign;
   1875 
   1876     aSign = extractFloat32Sign( a );
   1877     bSign = extractFloat32Sign( b );
   1878     if ( aSign == bSign ) {
   1879         return subFloat32Sigs( a, b, aSign );
   1880     }
   1881     else {
   1882         return addFloat32Sigs( a, b, aSign );
   1883     }
   1884 
   1885 }
   1886 
   1887 /*
   1888 -------------------------------------------------------------------------------
   1889 Returns the result of multiplying the single-precision floating-point values
   1890 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1891 for Binary Floating-Point Arithmetic.
   1892 -------------------------------------------------------------------------------
   1893 */
   1894 float32 float32_mul( float32 a, float32 b )
   1895 {
   1896     flag aSign, bSign, zSign;
   1897     int16 aExp, bExp, zExp;
   1898     bits32 aSig, bSig;
   1899     bits64 zSig64;
   1900     bits32 zSig;
   1901 
   1902     aSig = extractFloat32Frac( a );
   1903     aExp = extractFloat32Exp( a );
   1904     aSign = extractFloat32Sign( a );
   1905     bSig = extractFloat32Frac( b );
   1906     bExp = extractFloat32Exp( b );
   1907     bSign = extractFloat32Sign( b );
   1908     zSign = aSign ^ bSign;
   1909     if ( aExp == 0xFF ) {
   1910         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1911             return propagateFloat32NaN( a, b );
   1912         }
   1913         if ( ( bExp | bSig ) == 0 ) {
   1914             float_raise( float_flag_invalid );
   1915             return float32_default_nan;
   1916         }
   1917         return packFloat32( zSign, 0xFF, 0 );
   1918     }
   1919     if ( bExp == 0xFF ) {
   1920         if ( bSig ) return propagateFloat32NaN( a, b );
   1921         if ( ( aExp | aSig ) == 0 ) {
   1922             float_raise( float_flag_invalid );
   1923             return float32_default_nan;
   1924         }
   1925         return packFloat32( zSign, 0xFF, 0 );
   1926     }
   1927     if ( aExp == 0 ) {
   1928         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1929         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1930     }
   1931     if ( bExp == 0 ) {
   1932         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1933         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1934     }
   1935     zExp = aExp + bExp - 0x7F;
   1936     aSig = ( aSig | 0x00800000 )<<7;
   1937     bSig = ( bSig | 0x00800000 )<<8;
   1938     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
   1939     zSig = (bits32)zSig64;
   1940     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
   1941         zSig <<= 1;
   1942         --zExp;
   1943     }
   1944     return roundAndPackFloat32( zSign, zExp, zSig );
   1945 
   1946 }
   1947 
   1948 /*
   1949 -------------------------------------------------------------------------------
   1950 Returns the result of dividing the single-precision floating-point value `a'
   1951 by the corresponding value `b'.  The operation is performed according to the
   1952 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1953 -------------------------------------------------------------------------------
   1954 */
   1955 float32 float32_div( float32 a, float32 b )
   1956 {
   1957     flag aSign, bSign, zSign;
   1958     int16 aExp, bExp, zExp;
   1959     bits32 aSig, bSig, zSig;
   1960 
   1961     aSig = extractFloat32Frac( a );
   1962     aExp = extractFloat32Exp( a );
   1963     aSign = extractFloat32Sign( a );
   1964     bSig = extractFloat32Frac( b );
   1965     bExp = extractFloat32Exp( b );
   1966     bSign = extractFloat32Sign( b );
   1967     zSign = aSign ^ bSign;
   1968     if ( aExp == 0xFF ) {
   1969         if ( aSig ) return propagateFloat32NaN( a, b );
   1970         if ( bExp == 0xFF ) {
   1971             if ( bSig ) return propagateFloat32NaN( a, b );
   1972             float_raise( float_flag_invalid );
   1973             return float32_default_nan;
   1974         }
   1975         return packFloat32( zSign, 0xFF, 0 );
   1976     }
   1977     if ( bExp == 0xFF ) {
   1978         if ( bSig ) return propagateFloat32NaN( a, b );
   1979         return packFloat32( zSign, 0, 0 );
   1980     }
   1981     if ( bExp == 0 ) {
   1982         if ( bSig == 0 ) {
   1983             if ( ( aExp | aSig ) == 0 ) {
   1984                 float_raise( float_flag_invalid );
   1985                 return float32_default_nan;
   1986             }
   1987             float_raise( float_flag_divbyzero );
   1988             return packFloat32( zSign, 0xFF, 0 );
   1989         }
   1990         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1991     }
   1992     if ( aExp == 0 ) {
   1993         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1994         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1995     }
   1996     zExp = aExp - bExp + 0x7D;
   1997     aSig = ( aSig | 0x00800000 )<<7;
   1998     bSig = ( bSig | 0x00800000 )<<8;
   1999     if ( bSig <= ( aSig + aSig ) ) {
   2000         aSig >>= 1;
   2001         ++zExp;
   2002     }
   2003     zSig = (bits32)((((bits64) aSig) << 32) / bSig);
   2004     if ( ( zSig & 0x3F ) == 0 ) {
   2005         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
   2006     }
   2007     return roundAndPackFloat32( zSign, zExp, zSig );
   2008 
   2009 }
   2010 
   2011 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2012 /*
   2013 -------------------------------------------------------------------------------
   2014 Returns the remainder of the single-precision floating-point value `a'
   2015 with respect to the corresponding value `b'.  The operation is performed
   2016 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2017 -------------------------------------------------------------------------------
   2018 */
   2019 float32 float32_rem( float32 a, float32 b )
   2020 {
   2021     flag aSign, bSign, zSign;
   2022     int16 aExp, bExp, expDiff;
   2023     bits32 aSig, bSig;
   2024     bits32 q;
   2025     bits64 aSig64, bSig64, q64;
   2026     bits32 alternateASig;
   2027     sbits32 sigMean;
   2028 
   2029     aSig = extractFloat32Frac( a );
   2030     aExp = extractFloat32Exp( a );
   2031     aSign = extractFloat32Sign( a );
   2032     bSig = extractFloat32Frac( b );
   2033     bExp = extractFloat32Exp( b );
   2034     bSign = extractFloat32Sign( b );
   2035     if ( aExp == 0xFF ) {
   2036         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   2037             return propagateFloat32NaN( a, b );
   2038         }
   2039         float_raise( float_flag_invalid );
   2040         return float32_default_nan;
   2041     }
   2042     if ( bExp == 0xFF ) {
   2043         if ( bSig ) return propagateFloat32NaN( a, b );
   2044         return a;
   2045     }
   2046     if ( bExp == 0 ) {
   2047         if ( bSig == 0 ) {
   2048             float_raise( float_flag_invalid );
   2049             return float32_default_nan;
   2050         }
   2051         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2052     }
   2053     if ( aExp == 0 ) {
   2054         if ( aSig == 0 ) return a;
   2055         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2056     }
   2057     expDiff = aExp - bExp;
   2058     aSig |= 0x00800000;
   2059     bSig |= 0x00800000;
   2060     if ( expDiff < 32 ) {
   2061         aSig <<= 8;
   2062         bSig <<= 8;
   2063         if ( expDiff < 0 ) {
   2064             if ( expDiff < -1 ) return a;
   2065             aSig >>= 1;
   2066         }
   2067         q = ( bSig <= aSig );
   2068         if ( q ) aSig -= bSig;
   2069         if ( 0 < expDiff ) {
   2070             q = ( ( (bits64) aSig )<<32 ) / bSig;
   2071             q >>= 32 - expDiff;
   2072             bSig >>= 2;
   2073             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   2074         }
   2075         else {
   2076             aSig >>= 2;
   2077             bSig >>= 2;
   2078         }
   2079     }
   2080     else {
   2081         if ( bSig <= aSig ) aSig -= bSig;
   2082         aSig64 = ( (bits64) aSig )<<40;
   2083         bSig64 = ( (bits64) bSig )<<40;
   2084         expDiff -= 64;
   2085         while ( 0 < expDiff ) {
   2086             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2087             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2088             aSig64 = - ( ( bSig * q64 )<<38 );
   2089             expDiff -= 62;
   2090         }
   2091         expDiff += 64;
   2092         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2093         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2094         q = q64>>( 64 - expDiff );
   2095         bSig <<= 6;
   2096         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
   2097     }
   2098     do {
   2099         alternateASig = aSig;
   2100         ++q;
   2101         aSig -= bSig;
   2102     } while ( 0 <= (sbits32) aSig );
   2103     sigMean = aSig + alternateASig;
   2104     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   2105         aSig = alternateASig;
   2106     }
   2107     zSign = ( (sbits32) aSig < 0 );
   2108     if ( zSign ) aSig = - aSig;
   2109     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   2110 
   2111 }
   2112 #endif /* !SOFTFLOAT_FOR_GCC */
   2113 
   2114 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2115 /*
   2116 -------------------------------------------------------------------------------
   2117 Returns the square root of the single-precision floating-point value `a'.
   2118 The operation is performed according to the IEC/IEEE Standard for Binary
   2119 Floating-Point Arithmetic.
   2120 -------------------------------------------------------------------------------
   2121 */
   2122 float32 float32_sqrt( float32 a )
   2123 {
   2124     flag aSign;
   2125     int16 aExp, zExp;
   2126     bits32 aSig, zSig;
   2127     bits64 rem, term;
   2128 
   2129     aSig = extractFloat32Frac( a );
   2130     aExp = extractFloat32Exp( a );
   2131     aSign = extractFloat32Sign( a );
   2132     if ( aExp == 0xFF ) {
   2133         if ( aSig ) return propagateFloat32NaN( a, 0 );
   2134         if ( ! aSign ) return a;
   2135         float_raise( float_flag_invalid );
   2136         return float32_default_nan;
   2137     }
   2138     if ( aSign ) {
   2139         if ( ( aExp | aSig ) == 0 ) return a;
   2140         float_raise( float_flag_invalid );
   2141         return float32_default_nan;
   2142     }
   2143     if ( aExp == 0 ) {
   2144         if ( aSig == 0 ) return 0;
   2145         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2146     }
   2147     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   2148     aSig = ( aSig | 0x00800000 )<<8;
   2149     zSig = estimateSqrt32( aExp, aSig ) + 2;
   2150     if ( ( zSig & 0x7F ) <= 5 ) {
   2151         if ( zSig < 2 ) {
   2152             zSig = 0x7FFFFFFF;
   2153             goto roundAndPack;
   2154         }
   2155         aSig >>= aExp & 1;
   2156         term = ( (bits64) zSig ) * zSig;
   2157         rem = ( ( (bits64) aSig )<<32 ) - term;
   2158         while ( (sbits64) rem < 0 ) {
   2159             --zSig;
   2160             rem += ( ( (bits64) zSig )<<1 ) | 1;
   2161         }
   2162         zSig |= ( rem != 0 );
   2163     }
   2164     shift32RightJamming( zSig, 1, &zSig );
   2165  roundAndPack:
   2166     return roundAndPackFloat32( 0, zExp, zSig );
   2167 
   2168 }
   2169 #endif /* !SOFTFLOAT_FOR_GCC */
   2170 
   2171 /*
   2172 -------------------------------------------------------------------------------
   2173 Returns 1 if the single-precision floating-point value `a' is equal to
   2174 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2175 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2176 -------------------------------------------------------------------------------
   2177 */
   2178 flag float32_eq( float32 a, float32 b )
   2179 {
   2180 
   2181     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2182          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2183        ) {
   2184         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2185             float_raise( float_flag_invalid );
   2186         }
   2187         return 0;
   2188     }
   2189     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2190 
   2191 }
   2192 
   2193 /*
   2194 -------------------------------------------------------------------------------
   2195 Returns 1 if the single-precision floating-point value `a' is less than
   2196 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2197 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2198 Arithmetic.
   2199 -------------------------------------------------------------------------------
   2200 */
   2201 flag float32_le( float32 a, float32 b )
   2202 {
   2203     flag aSign, bSign;
   2204 
   2205     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2206          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2207        ) {
   2208         float_raise( float_flag_invalid );
   2209         return 0;
   2210     }
   2211     aSign = extractFloat32Sign( a );
   2212     bSign = extractFloat32Sign( b );
   2213     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2214     return ( a == b ) || ( aSign ^ ( a < b ) );
   2215 
   2216 }
   2217 
   2218 /*
   2219 -------------------------------------------------------------------------------
   2220 Returns 1 if the single-precision floating-point value `a' is less than
   2221 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2222 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2223 -------------------------------------------------------------------------------
   2224 */
   2225 flag float32_lt( float32 a, float32 b )
   2226 {
   2227     flag aSign, bSign;
   2228 
   2229     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2230          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2231        ) {
   2232         float_raise( float_flag_invalid );
   2233         return 0;
   2234     }
   2235     aSign = extractFloat32Sign( a );
   2236     bSign = extractFloat32Sign( b );
   2237     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2238     return ( a != b ) && ( aSign ^ ( a < b ) );
   2239 
   2240 }
   2241 
   2242 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2243 /*
   2244 -------------------------------------------------------------------------------
   2245 Returns 1 if the single-precision floating-point value `a' is equal to
   2246 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2247 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2248 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2249 -------------------------------------------------------------------------------
   2250 */
   2251 flag float32_eq_signaling( float32 a, float32 b )
   2252 {
   2253 
   2254     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2255          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2256        ) {
   2257         float_raise( float_flag_invalid );
   2258         return 0;
   2259     }
   2260     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2261 
   2262 }
   2263 
   2264 /*
   2265 -------------------------------------------------------------------------------
   2266 Returns 1 if the single-precision floating-point value `a' is less than or
   2267 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2268 cause an exception.  Otherwise, the comparison is performed according to the
   2269 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2270 -------------------------------------------------------------------------------
   2271 */
   2272 flag float32_le_quiet( float32 a, float32 b )
   2273 {
   2274     flag aSign, bSign;
   2275 
   2276     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2277          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2278        ) {
   2279         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2280             float_raise( float_flag_invalid );
   2281         }
   2282         return 0;
   2283     }
   2284     aSign = extractFloat32Sign( a );
   2285     bSign = extractFloat32Sign( b );
   2286     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2287     return ( a == b ) || ( aSign ^ ( a < b ) );
   2288 
   2289 }
   2290 
   2291 /*
   2292 -------------------------------------------------------------------------------
   2293 Returns 1 if the single-precision floating-point value `a' is less than
   2294 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2295 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2296 Standard for Binary Floating-Point Arithmetic.
   2297 -------------------------------------------------------------------------------
   2298 */
   2299 flag float32_lt_quiet( float32 a, float32 b )
   2300 {
   2301     flag aSign, bSign;
   2302 
   2303     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2304          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2305        ) {
   2306         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2307             float_raise( float_flag_invalid );
   2308         }
   2309         return 0;
   2310     }
   2311     aSign = extractFloat32Sign( a );
   2312     bSign = extractFloat32Sign( b );
   2313     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2314     return ( a != b ) && ( aSign ^ ( a < b ) );
   2315 
   2316 }
   2317 #endif /* !SOFTFLOAT_FOR_GCC */
   2318 
   2319 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2320 /*
   2321 -------------------------------------------------------------------------------
   2322 Returns the result of converting the double-precision floating-point value
   2323 `a' to the 32-bit two's complement integer format.  The conversion is
   2324 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2325 Arithmetic---which means in particular that the conversion is rounded
   2326 according to the current rounding mode.  If `a' is a NaN, the largest
   2327 positive integer is returned.  Otherwise, if the conversion overflows, the
   2328 largest integer with the same sign as `a' is returned.
   2329 -------------------------------------------------------------------------------
   2330 */
   2331 int32 float64_to_int32( float64 a )
   2332 {
   2333     flag aSign;
   2334     int16 aExp, shiftCount;
   2335     bits64 aSig;
   2336 
   2337     aSig = extractFloat64Frac( a );
   2338     aExp = extractFloat64Exp( a );
   2339     aSign = extractFloat64Sign( a );
   2340     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2341     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2342     shiftCount = 0x42C - aExp;
   2343     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
   2344     return roundAndPackInt32( aSign, aSig );
   2345 
   2346 }
   2347 #endif /* !SOFTFLOAT_FOR_GCC */
   2348 
   2349 /*
   2350 -------------------------------------------------------------------------------
   2351 Returns the result of converting the double-precision floating-point value
   2352 `a' to the 32-bit two's complement integer format.  The conversion is
   2353 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2354 Arithmetic, except that the conversion is always rounded toward zero.
   2355 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2356 the conversion overflows, the largest integer with the same sign as `a' is
   2357 returned.
   2358 -------------------------------------------------------------------------------
   2359 */
   2360 int32 float64_to_int32_round_to_zero( float64 a )
   2361 {
   2362     flag aSign;
   2363     int16 aExp, shiftCount;
   2364     bits64 aSig, savedASig;
   2365     int32 z;
   2366 
   2367     aSig = extractFloat64Frac( a );
   2368     aExp = extractFloat64Exp( a );
   2369     aSign = extractFloat64Sign( a );
   2370     if ( 0x41E < aExp ) {
   2371         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2372         goto invalid;
   2373     }
   2374     else if ( aExp < 0x3FF ) {
   2375         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
   2376         return 0;
   2377     }
   2378     aSig |= LIT64( 0x0010000000000000 );
   2379     shiftCount = 0x433 - aExp;
   2380     savedASig = aSig;
   2381     aSig >>= shiftCount;
   2382     z = (int32)aSig;
   2383     if ( aSign ) z = - z;
   2384     if ( ( z < 0 ) ^ aSign ) {
   2385  invalid:
   2386         float_raise( float_flag_invalid );
   2387         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   2388     }
   2389     if ( ( aSig<<shiftCount ) != savedASig ) {
   2390         float_exception_flags |= float_flag_inexact;
   2391     }
   2392     return z;
   2393 
   2394 }
   2395 
   2396 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2397 /*
   2398 -------------------------------------------------------------------------------
   2399 Returns the result of converting the double-precision floating-point value
   2400 `a' to the 64-bit two's complement integer format.  The conversion is
   2401 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2402 Arithmetic---which means in particular that the conversion is rounded
   2403 according to the current rounding mode.  If `a' is a NaN, the largest
   2404 positive integer is returned.  Otherwise, if the conversion overflows, the
   2405 largest integer with the same sign as `a' is returned.
   2406 -------------------------------------------------------------------------------
   2407 */
   2408 int64 float64_to_int64( float64 a )
   2409 {
   2410     flag aSign;
   2411     int16 aExp, shiftCount;
   2412     bits64 aSig, aSigExtra;
   2413 
   2414     aSig = extractFloat64Frac( a );
   2415     aExp = extractFloat64Exp( a );
   2416     aSign = extractFloat64Sign( a );
   2417     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2418     shiftCount = 0x433 - aExp;
   2419     if ( shiftCount <= 0 ) {
   2420         if ( 0x43E < aExp ) {
   2421             float_raise( float_flag_invalid );
   2422             if (    ! aSign
   2423                  || (    ( aExp == 0x7FF )
   2424                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2425                ) {
   2426                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   2427             }
   2428             return (sbits64) LIT64( 0x8000000000000000 );
   2429         }
   2430         aSigExtra = 0;
   2431         aSig <<= - shiftCount;
   2432     }
   2433     else {
   2434         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   2435     }
   2436     return roundAndPackInt64( aSign, aSig, aSigExtra );
   2437 
   2438 }
   2439 
   2440 /*
   2441 -------------------------------------------------------------------------------
   2442 Returns the result of converting the double-precision floating-point value
   2443 `a' to the 64-bit two's complement integer format.  The conversion is
   2444 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2445 Arithmetic, except that the conversion is always rounded toward zero.
   2446 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2447 the conversion overflows, the largest integer with the same sign as `a' is
   2448 returned.
   2449 -------------------------------------------------------------------------------
   2450 */
   2451 int64 float64_to_int64_round_to_zero( float64 a )
   2452 {
   2453     flag aSign;
   2454     int16 aExp, shiftCount;
   2455     bits64 aSig;
   2456     int64 z;
   2457 
   2458     aSig = extractFloat64Frac( a );
   2459     aExp = extractFloat64Exp( a );
   2460     aSign = extractFloat64Sign( a );
   2461     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2462     shiftCount = aExp - 0x433;
   2463     if ( 0 <= shiftCount ) {
   2464         if ( 0x43E <= aExp ) {
   2465             if ( a != LIT64( 0xC3E0000000000000 ) ) {
   2466                 float_raise( float_flag_invalid );
   2467                 if (    ! aSign
   2468                      || (    ( aExp == 0x7FF )
   2469                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2470                    ) {
   2471                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   2472                 }
   2473             }
   2474             return (sbits64) LIT64( 0x8000000000000000 );
   2475         }
   2476         z = aSig<<shiftCount;
   2477     }
   2478     else {
   2479         if ( aExp < 0x3FE ) {
   2480             if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
   2481             return 0;
   2482         }
   2483         z = aSig>>( - shiftCount );
   2484         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   2485             float_exception_flags |= float_flag_inexact;
   2486         }
   2487     }
   2488     if ( aSign ) z = - z;
   2489     return z;
   2490 
   2491 }
   2492 #endif /* !SOFTFLOAT_FOR_GCC */
   2493 
   2494 /*
   2495 -------------------------------------------------------------------------------
   2496 Returns the result of converting the double-precision floating-point value
   2497 `a' to the single-precision floating-point format.  The conversion is
   2498 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2499 Arithmetic.
   2500 -------------------------------------------------------------------------------
   2501 */
   2502 float32 float64_to_float32( float64 a )
   2503 {
   2504     flag aSign;
   2505     int16 aExp;
   2506     bits64 aSig;
   2507     bits32 zSig;
   2508 
   2509     aSig = extractFloat64Frac( a );
   2510     aExp = extractFloat64Exp( a );
   2511     aSign = extractFloat64Sign( a );
   2512     if ( aExp == 0x7FF ) {
   2513         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
   2514         return packFloat32( aSign, 0xFF, 0 );
   2515     }
   2516     shift64RightJamming( aSig, 22, &aSig );
   2517     zSig = (bits32)aSig;
   2518     if ( aExp || zSig ) {
   2519         zSig |= 0x40000000;
   2520         aExp -= 0x381;
   2521     }
   2522     return roundAndPackFloat32( aSign, aExp, zSig );
   2523 
   2524 }
   2525 
   2526 #ifdef FLOATX80
   2527 
   2528 /*
   2529 -------------------------------------------------------------------------------
   2530 Returns the result of converting the double-precision floating-point value
   2531 `a' to the extended double-precision floating-point format.  The conversion
   2532 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2533 Arithmetic.
   2534 -------------------------------------------------------------------------------
   2535 */
   2536 floatx80 float64_to_floatx80( float64 a )
   2537 {
   2538     flag aSign;
   2539     int16 aExp;
   2540     bits64 aSig;
   2541 
   2542     aSig = extractFloat64Frac( a );
   2543     aExp = extractFloat64Exp( a );
   2544     aSign = extractFloat64Sign( a );
   2545     if ( aExp == 0x7FF ) {
   2546         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
   2547         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   2548     }
   2549     if ( aExp == 0 ) {
   2550         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   2551         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2552     }
   2553     return
   2554         packFloatx80(
   2555             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
   2556 
   2557 }
   2558 
   2559 #endif
   2560 
   2561 #ifdef FLOAT128
   2562 
   2563 /*
   2564 -------------------------------------------------------------------------------
   2565 Returns the result of converting the double-precision floating-point value
   2566 `a' to the quadruple-precision floating-point format.  The conversion is
   2567 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2568 Arithmetic.
   2569 -------------------------------------------------------------------------------
   2570 */
   2571 float128 float64_to_float128( float64 a )
   2572 {
   2573     flag aSign;
   2574     int16 aExp;
   2575     bits64 aSig, zSig0, zSig1;
   2576 
   2577     aSig = extractFloat64Frac( a );
   2578     aExp = extractFloat64Exp( a );
   2579     aSign = extractFloat64Sign( a );
   2580     if ( aExp == 0x7FF ) {
   2581         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
   2582         return packFloat128( aSign, 0x7FFF, 0, 0 );
   2583     }
   2584     if ( aExp == 0 ) {
   2585         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   2586         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2587         --aExp;
   2588     }
   2589     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
   2590     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
   2591 
   2592 }
   2593 
   2594 #endif
   2595 
   2596 #ifndef SOFTFLOAT_FOR_GCC
   2597 /*
   2598 -------------------------------------------------------------------------------
   2599 Rounds the double-precision floating-point value `a' to an integer, and
   2600 returns the result as a double-precision floating-point value.  The
   2601 operation is performed according to the IEC/IEEE Standard for Binary
   2602 Floating-Point Arithmetic.
   2603 -------------------------------------------------------------------------------
   2604 */
   2605 float64 float64_round_to_int( float64 a )
   2606 {
   2607     flag aSign;
   2608     int16 aExp;
   2609     bits64 lastBitMask, roundBitsMask;
   2610     int8 roundingMode;
   2611     float64 z;
   2612 
   2613     aExp = extractFloat64Exp( a );
   2614     if ( 0x433 <= aExp ) {
   2615         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
   2616             return propagateFloat64NaN( a, a );
   2617         }
   2618         return a;
   2619     }
   2620     if ( aExp < 0x3FF ) {
   2621         if ( (bits64) ( a<<1 ) == 0 ) return a;
   2622         float_exception_flags |= float_flag_inexact;
   2623         aSign = extractFloat64Sign( a );
   2624         switch ( float_rounding_mode ) {
   2625          case float_round_nearest_even:
   2626             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
   2627                 return packFloat64( aSign, 0x3FF, 0 );
   2628             }
   2629             break;
   2630 	 case float_round_to_zero:
   2631 	    break;
   2632          case float_round_down:
   2633             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
   2634          case float_round_up:
   2635             return
   2636             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
   2637         }
   2638         return packFloat64( aSign, 0, 0 );
   2639     }
   2640     lastBitMask = 1;
   2641     lastBitMask <<= 0x433 - aExp;
   2642     roundBitsMask = lastBitMask - 1;
   2643     z = a;
   2644     roundingMode = float_rounding_mode;
   2645     if ( roundingMode == float_round_nearest_even ) {
   2646         z += lastBitMask>>1;
   2647         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   2648     }
   2649     else if ( roundingMode != float_round_to_zero ) {
   2650         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   2651             z += roundBitsMask;
   2652         }
   2653     }
   2654     z &= ~ roundBitsMask;
   2655     if ( z != a ) float_exception_flags |= float_flag_inexact;
   2656     return z;
   2657 
   2658 }
   2659 #endif
   2660 
   2661 /*
   2662 -------------------------------------------------------------------------------
   2663 Returns the result of adding the absolute values of the double-precision
   2664 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   2665 before being returned.  `zSign' is ignored if the result is a NaN.
   2666 The addition is performed according to the IEC/IEEE Standard for Binary
   2667 Floating-Point Arithmetic.
   2668 -------------------------------------------------------------------------------
   2669 */
   2670 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   2671 {
   2672     int16 aExp, bExp, zExp;
   2673     bits64 aSig, bSig, zSig;
   2674     int16 expDiff;
   2675 
   2676     aSig = extractFloat64Frac( a );
   2677     aExp = extractFloat64Exp( a );
   2678     bSig = extractFloat64Frac( b );
   2679     bExp = extractFloat64Exp( b );
   2680     expDiff = aExp - bExp;
   2681     aSig <<= 9;
   2682     bSig <<= 9;
   2683     if ( 0 < expDiff ) {
   2684         if ( aExp == 0x7FF ) {
   2685             if ( aSig ) return propagateFloat64NaN( a, b );
   2686             return a;
   2687         }
   2688         if ( bExp == 0 ) {
   2689             --expDiff;
   2690         }
   2691         else {
   2692             bSig |= LIT64( 0x2000000000000000 );
   2693         }
   2694         shift64RightJamming( bSig, expDiff, &bSig );
   2695         zExp = aExp;
   2696     }
   2697     else if ( expDiff < 0 ) {
   2698         if ( bExp == 0x7FF ) {
   2699             if ( bSig ) return propagateFloat64NaN( a, b );
   2700             return packFloat64( zSign, 0x7FF, 0 );
   2701         }
   2702         if ( aExp == 0 ) {
   2703             ++expDiff;
   2704         }
   2705         else {
   2706             aSig |= LIT64( 0x2000000000000000 );
   2707         }
   2708         shift64RightJamming( aSig, - expDiff, &aSig );
   2709         zExp = bExp;
   2710     }
   2711     else {
   2712         if ( aExp == 0x7FF ) {
   2713             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2714             return a;
   2715         }
   2716         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
   2717         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
   2718         zExp = aExp;
   2719         goto roundAndPack;
   2720     }
   2721     aSig |= LIT64( 0x2000000000000000 );
   2722     zSig = ( aSig + bSig )<<1;
   2723     --zExp;
   2724     if ( (sbits64) zSig < 0 ) {
   2725         zSig = aSig + bSig;
   2726         ++zExp;
   2727     }
   2728  roundAndPack:
   2729     return roundAndPackFloat64( zSign, zExp, zSig );
   2730 
   2731 }
   2732 
   2733 /*
   2734 -------------------------------------------------------------------------------
   2735 Returns the result of subtracting the absolute values of the double-
   2736 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   2737 difference is negated before being returned.  `zSign' is ignored if the
   2738 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   2739 Standard for Binary Floating-Point Arithmetic.
   2740 -------------------------------------------------------------------------------
   2741 */
   2742 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   2743 {
   2744     int16 aExp, bExp, zExp;
   2745     bits64 aSig, bSig, zSig;
   2746     int16 expDiff;
   2747 
   2748     aSig = extractFloat64Frac( a );
   2749     aExp = extractFloat64Exp( a );
   2750     bSig = extractFloat64Frac( b );
   2751     bExp = extractFloat64Exp( b );
   2752     expDiff = aExp - bExp;
   2753     aSig <<= 10;
   2754     bSig <<= 10;
   2755     if ( 0 < expDiff ) goto aExpBigger;
   2756     if ( expDiff < 0 ) goto bExpBigger;
   2757     if ( aExp == 0x7FF ) {
   2758         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2759         float_raise( float_flag_invalid );
   2760         return float64_default_nan;
   2761     }
   2762     if ( aExp == 0 ) {
   2763         aExp = 1;
   2764         bExp = 1;
   2765     }
   2766     if ( bSig < aSig ) goto aBigger;
   2767     if ( aSig < bSig ) goto bBigger;
   2768     return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
   2769  bExpBigger:
   2770     if ( bExp == 0x7FF ) {
   2771         if ( bSig ) return propagateFloat64NaN( a, b );
   2772         return packFloat64( zSign ^ 1, 0x7FF, 0 );
   2773     }
   2774     if ( aExp == 0 ) {
   2775         ++expDiff;
   2776     }
   2777     else {
   2778         aSig |= LIT64( 0x4000000000000000 );
   2779     }
   2780     shift64RightJamming( aSig, - expDiff, &aSig );
   2781     bSig |= LIT64( 0x4000000000000000 );
   2782  bBigger:
   2783     zSig = bSig - aSig;
   2784     zExp = bExp;
   2785     zSign ^= 1;
   2786     goto normalizeRoundAndPack;
   2787  aExpBigger:
   2788     if ( aExp == 0x7FF ) {
   2789         if ( aSig ) return propagateFloat64NaN( a, b );
   2790         return a;
   2791     }
   2792     if ( bExp == 0 ) {
   2793         --expDiff;
   2794     }
   2795     else {
   2796         bSig |= LIT64( 0x4000000000000000 );
   2797     }
   2798     shift64RightJamming( bSig, expDiff, &bSig );
   2799     aSig |= LIT64( 0x4000000000000000 );
   2800  aBigger:
   2801     zSig = aSig - bSig;
   2802     zExp = aExp;
   2803  normalizeRoundAndPack:
   2804     --zExp;
   2805     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
   2806 
   2807 }
   2808 
   2809 /*
   2810 -------------------------------------------------------------------------------
   2811 Returns the result of adding the double-precision floating-point values `a'
   2812 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   2813 Binary Floating-Point Arithmetic.
   2814 -------------------------------------------------------------------------------
   2815 */
   2816 float64 float64_add( float64 a, float64 b )
   2817 {
   2818     flag aSign, bSign;
   2819 
   2820     aSign = extractFloat64Sign( a );
   2821     bSign = extractFloat64Sign( b );
   2822     if ( aSign == bSign ) {
   2823         return addFloat64Sigs( a, b, aSign );
   2824     }
   2825     else {
   2826         return subFloat64Sigs( a, b, aSign );
   2827     }
   2828 
   2829 }
   2830 
   2831 /*
   2832 -------------------------------------------------------------------------------
   2833 Returns the result of subtracting the double-precision floating-point values
   2834 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2835 for Binary Floating-Point Arithmetic.
   2836 -------------------------------------------------------------------------------
   2837 */
   2838 float64 float64_sub( float64 a, float64 b )
   2839 {
   2840     flag aSign, bSign;
   2841 
   2842     aSign = extractFloat64Sign( a );
   2843     bSign = extractFloat64Sign( b );
   2844     if ( aSign == bSign ) {
   2845         return subFloat64Sigs( a, b, aSign );
   2846     }
   2847     else {
   2848         return addFloat64Sigs( a, b, aSign );
   2849     }
   2850 
   2851 }
   2852 
   2853 /*
   2854 -------------------------------------------------------------------------------
   2855 Returns the result of multiplying the double-precision floating-point values
   2856 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2857 for Binary Floating-Point Arithmetic.
   2858 -------------------------------------------------------------------------------
   2859 */
   2860 float64 float64_mul( float64 a, float64 b )
   2861 {
   2862     flag aSign, bSign, zSign;
   2863     int16 aExp, bExp, zExp;
   2864     bits64 aSig, bSig, zSig0, zSig1;
   2865 
   2866     aSig = extractFloat64Frac( a );
   2867     aExp = extractFloat64Exp( a );
   2868     aSign = extractFloat64Sign( a );
   2869     bSig = extractFloat64Frac( b );
   2870     bExp = extractFloat64Exp( b );
   2871     bSign = extractFloat64Sign( b );
   2872     zSign = aSign ^ bSign;
   2873     if ( aExp == 0x7FF ) {
   2874         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2875             return propagateFloat64NaN( a, b );
   2876         }
   2877         if ( ( bExp | bSig ) == 0 ) {
   2878             float_raise( float_flag_invalid );
   2879             return float64_default_nan;
   2880         }
   2881         return packFloat64( zSign, 0x7FF, 0 );
   2882     }
   2883     if ( bExp == 0x7FF ) {
   2884         if ( bSig ) return propagateFloat64NaN( a, b );
   2885         if ( ( aExp | aSig ) == 0 ) {
   2886             float_raise( float_flag_invalid );
   2887             return float64_default_nan;
   2888         }
   2889         return packFloat64( zSign, 0x7FF, 0 );
   2890     }
   2891     if ( aExp == 0 ) {
   2892         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2893         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2894     }
   2895     if ( bExp == 0 ) {
   2896         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
   2897         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2898     }
   2899     zExp = aExp + bExp - 0x3FF;
   2900     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2901     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2902     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   2903     zSig0 |= ( zSig1 != 0 );
   2904     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
   2905         zSig0 <<= 1;
   2906         --zExp;
   2907     }
   2908     return roundAndPackFloat64( zSign, zExp, zSig0 );
   2909 
   2910 }
   2911 
   2912 /*
   2913 -------------------------------------------------------------------------------
   2914 Returns the result of dividing the double-precision floating-point value `a'
   2915 by the corresponding value `b'.  The operation is performed according to
   2916 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2917 -------------------------------------------------------------------------------
   2918 */
   2919 float64 float64_div( float64 a, float64 b )
   2920 {
   2921     flag aSign, bSign, zSign;
   2922     int16 aExp, bExp, zExp;
   2923     bits64 aSig, bSig, zSig;
   2924     bits64 rem0, rem1;
   2925     bits64 term0, term1;
   2926 
   2927     aSig = extractFloat64Frac( a );
   2928     aExp = extractFloat64Exp( a );
   2929     aSign = extractFloat64Sign( a );
   2930     bSig = extractFloat64Frac( b );
   2931     bExp = extractFloat64Exp( b );
   2932     bSign = extractFloat64Sign( b );
   2933     zSign = aSign ^ bSign;
   2934     if ( aExp == 0x7FF ) {
   2935         if ( aSig ) return propagateFloat64NaN( a, b );
   2936         if ( bExp == 0x7FF ) {
   2937             if ( bSig ) return propagateFloat64NaN( a, b );
   2938             float_raise( float_flag_invalid );
   2939             return float64_default_nan;
   2940         }
   2941         return packFloat64( zSign, 0x7FF, 0 );
   2942     }
   2943     if ( bExp == 0x7FF ) {
   2944         if ( bSig ) return propagateFloat64NaN( a, b );
   2945         return packFloat64( zSign, 0, 0 );
   2946     }
   2947     if ( bExp == 0 ) {
   2948         if ( bSig == 0 ) {
   2949             if ( ( aExp | aSig ) == 0 ) {
   2950                 float_raise( float_flag_invalid );
   2951                 return float64_default_nan;
   2952             }
   2953             float_raise( float_flag_divbyzero );
   2954             return packFloat64( zSign, 0x7FF, 0 );
   2955         }
   2956         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2957     }
   2958     if ( aExp == 0 ) {
   2959         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2960         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2961     }
   2962     zExp = aExp - bExp + 0x3FD;
   2963     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2964     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2965     if ( bSig <= ( aSig + aSig ) ) {
   2966         aSig >>= 1;
   2967         ++zExp;
   2968     }
   2969     zSig = estimateDiv128To64( aSig, 0, bSig );
   2970     if ( ( zSig & 0x1FF ) <= 2 ) {
   2971         mul64To128( bSig, zSig, &term0, &term1 );
   2972         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   2973         while ( (sbits64) rem0 < 0 ) {
   2974             --zSig;
   2975             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   2976         }
   2977         zSig |= ( rem1 != 0 );
   2978     }
   2979     return roundAndPackFloat64( zSign, zExp, zSig );
   2980 
   2981 }
   2982 
   2983 #ifndef SOFTFLOAT_FOR_GCC
   2984 /*
   2985 -------------------------------------------------------------------------------
   2986 Returns the remainder of the double-precision floating-point value `a'
   2987 with respect to the corresponding value `b'.  The operation is performed
   2988 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2989 -------------------------------------------------------------------------------
   2990 */
   2991 float64 float64_rem( float64 a, float64 b )
   2992 {
   2993     flag aSign, bSign, zSign;
   2994     int16 aExp, bExp, expDiff;
   2995     bits64 aSig, bSig;
   2996     bits64 q, alternateASig;
   2997     sbits64 sigMean;
   2998 
   2999     aSig = extractFloat64Frac( a );
   3000     aExp = extractFloat64Exp( a );
   3001     aSign = extractFloat64Sign( a );
   3002     bSig = extractFloat64Frac( b );
   3003     bExp = extractFloat64Exp( b );
   3004     bSign = extractFloat64Sign( b );
   3005     if ( aExp == 0x7FF ) {
   3006         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   3007             return propagateFloat64NaN( a, b );
   3008         }
   3009         float_raise( float_flag_invalid );
   3010         return float64_default_nan;
   3011     }
   3012     if ( bExp == 0x7FF ) {
   3013         if ( bSig ) return propagateFloat64NaN( a, b );
   3014         return a;
   3015     }
   3016     if ( bExp == 0 ) {
   3017         if ( bSig == 0 ) {
   3018             float_raise( float_flag_invalid );
   3019             return float64_default_nan;
   3020         }
   3021         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   3022     }
   3023     if ( aExp == 0 ) {
   3024         if ( aSig == 0 ) return a;
   3025         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3026     }
   3027     expDiff = aExp - bExp;
   3028     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
   3029     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   3030     if ( expDiff < 0 ) {
   3031         if ( expDiff < -1 ) return a;
   3032         aSig >>= 1;
   3033     }
   3034     q = ( bSig <= aSig );
   3035     if ( q ) aSig -= bSig;
   3036     expDiff -= 64;
   3037     while ( 0 < expDiff ) {
   3038         q = estimateDiv128To64( aSig, 0, bSig );
   3039         q = ( 2 < q ) ? q - 2 : 0;
   3040         aSig = - ( ( bSig>>2 ) * q );
   3041         expDiff -= 62;
   3042     }
   3043     expDiff += 64;
   3044     if ( 0 < expDiff ) {
   3045         q = estimateDiv128To64( aSig, 0, bSig );
   3046         q = ( 2 < q ) ? q - 2 : 0;
   3047         q >>= 64 - expDiff;
   3048         bSig >>= 2;
   3049         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   3050     }
   3051     else {
   3052         aSig >>= 2;
   3053         bSig >>= 2;
   3054     }
   3055     do {
   3056         alternateASig = aSig;
   3057         ++q;
   3058         aSig -= bSig;
   3059     } while ( 0 <= (sbits64) aSig );
   3060     sigMean = aSig + alternateASig;
   3061     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   3062         aSig = alternateASig;
   3063     }
   3064     zSign = ( (sbits64) aSig < 0 );
   3065     if ( zSign ) aSig = - aSig;
   3066     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
   3067 
   3068 }
   3069 
   3070 /*
   3071 -------------------------------------------------------------------------------
   3072 Returns the square root of the double-precision floating-point value `a'.
   3073 The operation is performed according to the IEC/IEEE Standard for Binary
   3074 Floating-Point Arithmetic.
   3075 -------------------------------------------------------------------------------
   3076 */
   3077 float64 float64_sqrt( float64 a )
   3078 {
   3079     flag aSign;
   3080     int16 aExp, zExp;
   3081     bits64 aSig, zSig, doubleZSig;
   3082     bits64 rem0, rem1, term0, term1;
   3083 
   3084     aSig = extractFloat64Frac( a );
   3085     aExp = extractFloat64Exp( a );
   3086     aSign = extractFloat64Sign( a );
   3087     if ( aExp == 0x7FF ) {
   3088         if ( aSig ) return propagateFloat64NaN( a, a );
   3089         if ( ! aSign ) return a;
   3090         float_raise( float_flag_invalid );
   3091         return float64_default_nan;
   3092     }
   3093     if ( aSign ) {
   3094         if ( ( aExp | aSig ) == 0 ) return a;
   3095         float_raise( float_flag_invalid );
   3096         return float64_default_nan;
   3097     }
   3098     if ( aExp == 0 ) {
   3099         if ( aSig == 0 ) return 0;
   3100         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3101     }
   3102     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   3103     aSig |= LIT64( 0x0010000000000000 );
   3104     zSig = estimateSqrt32( aExp, aSig>>21 );
   3105     aSig <<= 9 - ( aExp & 1 );
   3106     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
   3107     if ( ( zSig & 0x1FF ) <= 5 ) {
   3108         doubleZSig = zSig<<1;
   3109         mul64To128( zSig, zSig, &term0, &term1 );
   3110         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   3111         while ( (sbits64) rem0 < 0 ) {
   3112             --zSig;
   3113             doubleZSig -= 2;
   3114             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
   3115         }
   3116         zSig |= ( ( rem0 | rem1 ) != 0 );
   3117     }
   3118     return roundAndPackFloat64( 0, zExp, zSig );
   3119 
   3120 }
   3121 #endif
   3122 
   3123 /*
   3124 -------------------------------------------------------------------------------
   3125 Returns 1 if the double-precision floating-point value `a' is equal to the
   3126 corresponding value `b', and 0 otherwise.  The comparison is performed
   3127 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3128 -------------------------------------------------------------------------------
   3129 */
   3130 flag float64_eq( float64 a, float64 b )
   3131 {
   3132 
   3133     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3134          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3135        ) {
   3136         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3137             float_raise( float_flag_invalid );
   3138         }
   3139         return 0;
   3140     }
   3141     return ( a == b ) ||
   3142 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   3143 
   3144 }
   3145 
   3146 /*
   3147 -------------------------------------------------------------------------------
   3148 Returns 1 if the double-precision floating-point value `a' is less than or
   3149 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   3150 performed according to the IEC/IEEE Standard for Binary Floating-Point
   3151 Arithmetic.
   3152 -------------------------------------------------------------------------------
   3153 */
   3154 flag float64_le( float64 a, float64 b )
   3155 {
   3156     flag aSign, bSign;
   3157 
   3158     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3159          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3160        ) {
   3161         float_raise( float_flag_invalid );
   3162         return 0;
   3163     }
   3164     aSign = extractFloat64Sign( a );
   3165     bSign = extractFloat64Sign( b );
   3166     if ( aSign != bSign )
   3167 	return aSign ||
   3168 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   3169 	      0 );
   3170     return ( a == b ) ||
   3171 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3172 
   3173 }
   3174 
   3175 /*
   3176 -------------------------------------------------------------------------------
   3177 Returns 1 if the double-precision floating-point value `a' is less than
   3178 the corresponding value `b', and 0 otherwise.  The comparison is performed
   3179 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3180 -------------------------------------------------------------------------------
   3181 */
   3182 flag float64_lt( float64 a, float64 b )
   3183 {
   3184     flag aSign, bSign;
   3185 
   3186     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3187          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3188        ) {
   3189         float_raise( float_flag_invalid );
   3190         return 0;
   3191     }
   3192     aSign = extractFloat64Sign( a );
   3193     bSign = extractFloat64Sign( b );
   3194     if ( aSign != bSign )
   3195 	return aSign &&
   3196 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   3197 	      0 );
   3198     return ( a != b ) &&
   3199 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3200 
   3201 }
   3202 
   3203 #ifndef SOFTFLOAT_FOR_GCC
   3204 /*
   3205 -------------------------------------------------------------------------------
   3206 Returns 1 if the double-precision floating-point value `a' is equal to the
   3207 corresponding value `b', and 0 otherwise.  The invalid exception is raised
   3208 if either operand is a NaN.  Otherwise, the comparison is performed
   3209 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3210 -------------------------------------------------------------------------------
   3211 */
   3212 flag float64_eq_signaling( float64 a, float64 b )
   3213 {
   3214 
   3215     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3216          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3217        ) {
   3218         float_raise( float_flag_invalid );
   3219         return 0;
   3220     }
   3221     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3222 
   3223 }
   3224 
   3225 /*
   3226 -------------------------------------------------------------------------------
   3227 Returns 1 if the double-precision floating-point value `a' is less than or
   3228 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   3229 cause an exception.  Otherwise, the comparison is performed according to the
   3230 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3231 -------------------------------------------------------------------------------
   3232 */
   3233 flag float64_le_quiet( float64 a, float64 b )
   3234 {
   3235     flag aSign, bSign;
   3236 
   3237     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3238          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3239        ) {
   3240         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3241             float_raise( float_flag_invalid );
   3242         }
   3243         return 0;
   3244     }
   3245     aSign = extractFloat64Sign( a );
   3246     bSign = extractFloat64Sign( b );
   3247     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3248     return ( a == b ) || ( aSign ^ ( a < b ) );
   3249 
   3250 }
   3251 
   3252 /*
   3253 -------------------------------------------------------------------------------
   3254 Returns 1 if the double-precision floating-point value `a' is less than
   3255 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   3256 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   3257 Standard for Binary Floating-Point Arithmetic.
   3258 -------------------------------------------------------------------------------
   3259 */
   3260 flag float64_lt_quiet( float64 a, float64 b )
   3261 {
   3262     flag aSign, bSign;
   3263 
   3264     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3265          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3266        ) {
   3267         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3268             float_raise( float_flag_invalid );
   3269         }
   3270         return 0;
   3271     }
   3272     aSign = extractFloat64Sign( a );
   3273     bSign = extractFloat64Sign( b );
   3274     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   3275     return ( a != b ) && ( aSign ^ ( a < b ) );
   3276 
   3277 }
   3278 #endif
   3279 
   3280 #ifdef FLOATX80
   3281 
   3282 /*
   3283 -------------------------------------------------------------------------------
   3284 Returns the result of converting the extended double-precision floating-
   3285 point value `a' to the 32-bit two's complement integer format.  The
   3286 conversion is performed according to the IEC/IEEE Standard for Binary
   3287 Floating-Point Arithmetic---which means in particular that the conversion
   3288 is rounded according to the current rounding mode.  If `a' is a NaN, the
   3289 largest positive integer is returned.  Otherwise, if the conversion
   3290 overflows, the largest integer with the same sign as `a' is returned.
   3291 -------------------------------------------------------------------------------
   3292 */
   3293 int32 floatx80_to_int32( floatx80 a )
   3294 {
   3295     flag aSign;
   3296     int32 aExp, shiftCount;
   3297     bits64 aSig;
   3298 
   3299     aSig = extractFloatx80Frac( a );
   3300     aExp = extractFloatx80Exp( a );
   3301     aSign = extractFloatx80Sign( a );
   3302     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3303     shiftCount = 0x4037 - aExp;
   3304     if ( shiftCount <= 0 ) shiftCount = 1;
   3305     shift64RightJamming( aSig, shiftCount, &aSig );
   3306     return roundAndPackInt32( aSign, aSig );
   3307 
   3308 }
   3309 
   3310 /*
   3311 -------------------------------------------------------------------------------
   3312 Returns the result of converting the extended double-precision floating-
   3313 point value `a' to the 32-bit two's complement integer format.  The
   3314 conversion is performed according to the IEC/IEEE Standard for Binary
   3315 Floating-Point Arithmetic, except that the conversion is always rounded
   3316 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3317 Otherwise, if the conversion overflows, the largest integer with the same
   3318 sign as `a' is returned.
   3319 -------------------------------------------------------------------------------
   3320 */
   3321 int32 floatx80_to_int32_round_to_zero( floatx80 a )
   3322 {
   3323     flag aSign;
   3324     int32 aExp, shiftCount;
   3325     bits64 aSig, savedASig;
   3326     int32 z;
   3327 
   3328     aSig = extractFloatx80Frac( a );
   3329     aExp = extractFloatx80Exp( a );
   3330     aSign = extractFloatx80Sign( a );
   3331     if ( 0x401E < aExp ) {
   3332         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3333         goto invalid;
   3334     }
   3335     else if ( aExp < 0x3FFF ) {
   3336         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
   3337         return 0;
   3338     }
   3339     shiftCount = 0x403E - aExp;
   3340     savedASig = aSig;
   3341     aSig >>= shiftCount;
   3342     z = aSig;
   3343     if ( aSign ) z = - z;
   3344     if ( ( z < 0 ) ^ aSign ) {
   3345  invalid:
   3346         float_raise( float_flag_invalid );
   3347         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   3348     }
   3349     if ( ( aSig<<shiftCount ) != savedASig ) {
   3350         float_exception_flags |= float_flag_inexact;
   3351     }
   3352     return z;
   3353 
   3354 }
   3355 
   3356 /*
   3357 -------------------------------------------------------------------------------
   3358 Returns the result of converting the extended double-precision floating-
   3359 point value `a' to the 64-bit two's complement integer format.  The
   3360 conversion is performed according to the IEC/IEEE Standard for Binary
   3361 Floating-Point Arithmetic---which means in particular that the conversion
   3362 is rounded according to the current rounding mode.  If `a' is a NaN,
   3363 the largest positive integer is returned.  Otherwise, if the conversion
   3364 overflows, the largest integer with the same sign as `a' is returned.
   3365 -------------------------------------------------------------------------------
   3366 */
   3367 int64 floatx80_to_int64( floatx80 a )
   3368 {
   3369     flag aSign;
   3370     int32 aExp, shiftCount;
   3371     bits64 aSig, aSigExtra;
   3372 
   3373     aSig = extractFloatx80Frac( a );
   3374     aExp = extractFloatx80Exp( a );
   3375     aSign = extractFloatx80Sign( a );
   3376     shiftCount = 0x403E - aExp;
   3377     if ( shiftCount <= 0 ) {
   3378         if ( shiftCount ) {
   3379             float_raise( float_flag_invalid );
   3380             if (    ! aSign
   3381                  || (    ( aExp == 0x7FFF )
   3382                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
   3383                ) {
   3384                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3385             }
   3386             return (sbits64) LIT64( 0x8000000000000000 );
   3387         }
   3388         aSigExtra = 0;
   3389     }
   3390     else {
   3391         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   3392     }
   3393     return roundAndPackInt64( aSign, aSig, aSigExtra );
   3394 
   3395 }
   3396 
   3397 /*
   3398 -------------------------------------------------------------------------------
   3399 Returns the result of converting the extended double-precision floating-
   3400 point value `a' to the 64-bit two's complement integer format.  The
   3401 conversion is performed according to the IEC/IEEE Standard for Binary
   3402 Floating-Point Arithmetic, except that the conversion is always rounded
   3403 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3404 Otherwise, if the conversion overflows, the largest integer with the same
   3405 sign as `a' is returned.
   3406 -------------------------------------------------------------------------------
   3407 */
   3408 int64 floatx80_to_int64_round_to_zero( floatx80 a )
   3409 {
   3410     flag aSign;
   3411     int32 aExp, shiftCount;
   3412     bits64 aSig;
   3413     int64 z;
   3414 
   3415     aSig = extractFloatx80Frac( a );
   3416     aExp = extractFloatx80Exp( a );
   3417     aSign = extractFloatx80Sign( a );
   3418     shiftCount = aExp - 0x403E;
   3419     if ( 0 <= shiftCount ) {
   3420         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
   3421         if ( ( a.high != 0xC03E ) || aSig ) {
   3422             float_raise( float_flag_invalid );
   3423             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
   3424                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3425             }
   3426         }
   3427         return (sbits64) LIT64( 0x8000000000000000 );
   3428     }
   3429     else if ( aExp < 0x3FFF ) {
   3430         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
   3431         return 0;
   3432     }
   3433     z = aSig>>( - shiftCount );
   3434     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   3435         float_exception_flags |= float_flag_inexact;
   3436     }
   3437     if ( aSign ) z = - z;
   3438     return z;
   3439 
   3440 }
   3441 
   3442 /*
   3443 -------------------------------------------------------------------------------
   3444 Returns the result of converting the extended double-precision floating-
   3445 point value `a' to the single-precision floating-point format.  The
   3446 conversion is performed according to the IEC/IEEE Standard for Binary
   3447 Floating-Point Arithmetic.
   3448 -------------------------------------------------------------------------------
   3449 */
   3450 float32 floatx80_to_float32( floatx80 a )
   3451 {
   3452     flag aSign;
   3453     int32 aExp;
   3454     bits64 aSig;
   3455 
   3456     aSig = extractFloatx80Frac( a );
   3457     aExp = extractFloatx80Exp( a );
   3458     aSign = extractFloatx80Sign( a );
   3459     if ( aExp == 0x7FFF ) {
   3460         if ( (bits64) ( aSig<<1 ) ) {
   3461             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
   3462         }
   3463         return packFloat32( aSign, 0xFF, 0 );
   3464     }
   3465     shift64RightJamming( aSig, 33, &aSig );
   3466     if ( aExp || aSig ) aExp -= 0x3F81;
   3467     return roundAndPackFloat32( aSign, aExp, aSig );
   3468 
   3469 }
   3470 
   3471 /*
   3472 -------------------------------------------------------------------------------
   3473 Returns the result of converting the extended double-precision floating-
   3474 point value `a' to the double-precision floating-point format.  The
   3475 conversion is performed according to the IEC/IEEE Standard for Binary
   3476 Floating-Point Arithmetic.
   3477 -------------------------------------------------------------------------------
   3478 */
   3479 float64 floatx80_to_float64( floatx80 a )
   3480 {
   3481     flag aSign;
   3482     int32 aExp;
   3483     bits64 aSig, zSig;
   3484 
   3485     aSig = extractFloatx80Frac( a );
   3486     aExp = extractFloatx80Exp( a );
   3487     aSign = extractFloatx80Sign( a );
   3488     if ( aExp == 0x7FFF ) {
   3489         if ( (bits64) ( aSig<<1 ) ) {
   3490             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
   3491         }
   3492         return packFloat64( aSign, 0x7FF, 0 );
   3493     }
   3494     shift64RightJamming( aSig, 1, &zSig );
   3495     if ( aExp || aSig ) aExp -= 0x3C01;
   3496     return roundAndPackFloat64( aSign, aExp, zSig );
   3497 
   3498 }
   3499 
   3500 #ifdef FLOAT128
   3501 
   3502 /*
   3503 -------------------------------------------------------------------------------
   3504 Returns the result of converting the extended double-precision floating-
   3505 point value `a' to the quadruple-precision floating-point format.  The
   3506 conversion is performed according to the IEC/IEEE Standard for Binary
   3507 Floating-Point Arithmetic.
   3508 -------------------------------------------------------------------------------
   3509 */
   3510 float128 floatx80_to_float128( floatx80 a )
   3511 {
   3512     flag aSign;
   3513     int16 aExp;
   3514     bits64 aSig, zSig0, zSig1;
   3515 
   3516     aSig = extractFloatx80Frac( a );
   3517     aExp = extractFloatx80Exp( a );
   3518     aSign = extractFloatx80Sign( a );
   3519     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
   3520         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
   3521     }
   3522     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
   3523     return packFloat128( aSign, aExp, zSig0, zSig1 );
   3524 
   3525 }
   3526 
   3527 #endif
   3528 
   3529 /*
   3530 -------------------------------------------------------------------------------
   3531 Rounds the extended double-precision floating-point value `a' to an integer,
   3532 and returns the result as an extended quadruple-precision floating-point
   3533 value.  The operation is performed according to the IEC/IEEE Standard for
   3534 Binary Floating-Point Arithmetic.
   3535 -------------------------------------------------------------------------------
   3536 */
   3537 floatx80 floatx80_round_to_int( floatx80 a )
   3538 {
   3539     flag aSign;
   3540     int32 aExp;
   3541     bits64 lastBitMask, roundBitsMask;
   3542     int8 roundingMode;
   3543     floatx80 z;
   3544 
   3545     aExp = extractFloatx80Exp( a );
   3546     if ( 0x403E <= aExp ) {
   3547         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
   3548             return propagateFloatx80NaN( a, a );
   3549         }
   3550         return a;
   3551     }
   3552     if ( aExp < 0x3FFF ) {
   3553         if (    ( aExp == 0 )
   3554              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
   3555             return a;
   3556         }
   3557         float_exception_flags |= float_flag_inexact;
   3558         aSign = extractFloatx80Sign( a );
   3559         switch ( float_rounding_mode ) {
   3560          case float_round_nearest_even:
   3561             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
   3562                ) {
   3563                 return
   3564                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3565             }
   3566             break;
   3567 	 case float_round_to_zero:
   3568 	    break;
   3569          case float_round_down:
   3570             return
   3571                   aSign ?
   3572                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
   3573                 : packFloatx80( 0, 0, 0 );
   3574          case float_round_up:
   3575             return
   3576                   aSign ? packFloatx80( 1, 0, 0 )
   3577                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3578         }
   3579         return packFloatx80( aSign, 0, 0 );
   3580     }
   3581     lastBitMask = 1;
   3582     lastBitMask <<= 0x403E - aExp;
   3583     roundBitsMask = lastBitMask - 1;
   3584     z = a;
   3585     roundingMode = float_rounding_mode;
   3586     if ( roundingMode == float_round_nearest_even ) {
   3587         z.low += lastBitMask>>1;
   3588         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   3589     }
   3590     else if ( roundingMode != float_round_to_zero ) {
   3591         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   3592             z.low += roundBitsMask;
   3593         }
   3594     }
   3595     z.low &= ~ roundBitsMask;
   3596     if ( z.low == 0 ) {
   3597         ++z.high;
   3598         z.low = LIT64( 0x8000000000000000 );
   3599     }
   3600     if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
   3601     return z;
   3602 
   3603 }
   3604 
   3605 /*
   3606 -------------------------------------------------------------------------------
   3607 Returns the result of adding the absolute values of the extended double-
   3608 precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
   3609 negated before being returned.  `zSign' is ignored if the result is a NaN.
   3610 The addition is performed according to the IEC/IEEE Standard for Binary
   3611 Floating-Point Arithmetic.
   3612 -------------------------------------------------------------------------------
   3613 */
   3614 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3615 {
   3616     int32 aExp, bExp, zExp;
   3617     bits64 aSig, bSig, zSig0, zSig1;
   3618     int32 expDiff;
   3619 
   3620     aSig = extractFloatx80Frac( a );
   3621     aExp = extractFloatx80Exp( a );
   3622     bSig = extractFloatx80Frac( b );
   3623     bExp = extractFloatx80Exp( b );
   3624     expDiff = aExp - bExp;
   3625     if ( 0 < expDiff ) {
   3626         if ( aExp == 0x7FFF ) {
   3627             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3628             return a;
   3629         }
   3630         if ( bExp == 0 ) --expDiff;
   3631         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3632         zExp = aExp;
   3633     }
   3634     else if ( expDiff < 0 ) {
   3635         if ( bExp == 0x7FFF ) {
   3636             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3637             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3638         }
   3639         if ( aExp == 0 ) ++expDiff;
   3640         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3641         zExp = bExp;
   3642     }
   3643     else {
   3644         if ( aExp == 0x7FFF ) {
   3645             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3646                 return propagateFloatx80NaN( a, b );
   3647             }
   3648             return a;
   3649         }
   3650         zSig1 = 0;
   3651         zSig0 = aSig + bSig;
   3652         if ( aExp == 0 ) {
   3653             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
   3654             goto roundAndPack;
   3655         }
   3656         zExp = aExp;
   3657         goto shiftRight1;
   3658     }
   3659     zSig0 = aSig + bSig;
   3660     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
   3661  shiftRight1:
   3662     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3663     zSig0 |= LIT64( 0x8000000000000000 );
   3664     ++zExp;
   3665  roundAndPack:
   3666     return
   3667         roundAndPackFloatx80(
   3668             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3669 
   3670 }
   3671 
   3672 /*
   3673 -------------------------------------------------------------------------------
   3674 Returns the result of subtracting the absolute values of the extended
   3675 double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
   3676 difference is negated before being returned.  `zSign' is ignored if the
   3677 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   3678 Standard for Binary Floating-Point Arithmetic.
   3679 -------------------------------------------------------------------------------
   3680 */
   3681 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3682 {
   3683     int32 aExp, bExp, zExp;
   3684     bits64 aSig, bSig, zSig0, zSig1;
   3685     int32 expDiff;
   3686     floatx80 z;
   3687 
   3688     aSig = extractFloatx80Frac( a );
   3689     aExp = extractFloatx80Exp( a );
   3690     bSig = extractFloatx80Frac( b );
   3691     bExp = extractFloatx80Exp( b );
   3692     expDiff = aExp - bExp;
   3693     if ( 0 < expDiff ) goto aExpBigger;
   3694     if ( expDiff < 0 ) goto bExpBigger;
   3695     if ( aExp == 0x7FFF ) {
   3696         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3697             return propagateFloatx80NaN( a, b );
   3698         }
   3699         float_raise( float_flag_invalid );
   3700         z.low = floatx80_default_nan_low;
   3701         z.high = floatx80_default_nan_high;
   3702         return z;
   3703     }
   3704     if ( aExp == 0 ) {
   3705         aExp = 1;
   3706         bExp = 1;
   3707     }
   3708     zSig1 = 0;
   3709     if ( bSig < aSig ) goto aBigger;
   3710     if ( aSig < bSig ) goto bBigger;
   3711     return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
   3712  bExpBigger:
   3713     if ( bExp == 0x7FFF ) {
   3714         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3715         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3716     }
   3717     if ( aExp == 0 ) ++expDiff;
   3718     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3719  bBigger:
   3720     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
   3721     zExp = bExp;
   3722     zSign ^= 1;
   3723     goto normalizeRoundAndPack;
   3724  aExpBigger:
   3725     if ( aExp == 0x7FFF ) {
   3726         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3727         return a;
   3728     }
   3729     if ( bExp == 0 ) --expDiff;
   3730     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3731  aBigger:
   3732     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
   3733     zExp = aExp;
   3734  normalizeRoundAndPack:
   3735     return
   3736         normalizeRoundAndPackFloatx80(
   3737             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3738 
   3739 }
   3740 
   3741 /*
   3742 -------------------------------------------------------------------------------
   3743 Returns the result of adding the extended double-precision floating-point
   3744 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   3745 Standard for Binary Floating-Point Arithmetic.
   3746 -------------------------------------------------------------------------------
   3747 */
   3748 floatx80 floatx80_add( floatx80 a, floatx80 b )
   3749 {
   3750     flag aSign, bSign;
   3751 
   3752     aSign = extractFloatx80Sign( a );
   3753     bSign = extractFloatx80Sign( b );
   3754     if ( aSign == bSign ) {
   3755         return addFloatx80Sigs( a, b, aSign );
   3756     }
   3757     else {
   3758         return subFloatx80Sigs( a, b, aSign );
   3759     }
   3760 
   3761 }
   3762 
   3763 /*
   3764 -------------------------------------------------------------------------------
   3765 Returns the result of subtracting the extended double-precision floating-
   3766 point values `a' and `b'.  The operation is performed according to the
   3767 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3768 -------------------------------------------------------------------------------
   3769 */
   3770 floatx80 floatx80_sub( floatx80 a, floatx80 b )
   3771 {
   3772     flag aSign, bSign;
   3773 
   3774     aSign = extractFloatx80Sign( a );
   3775     bSign = extractFloatx80Sign( b );
   3776     if ( aSign == bSign ) {
   3777         return subFloatx80Sigs( a, b, aSign );
   3778     }
   3779     else {
   3780         return addFloatx80Sigs( a, b, aSign );
   3781     }
   3782 
   3783 }
   3784 
   3785 /*
   3786 -------------------------------------------------------------------------------
   3787 Returns the result of multiplying the extended double-precision floating-
   3788 point values `a' and `b'.  The operation is performed according to the
   3789 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3790 -------------------------------------------------------------------------------
   3791 */
   3792 floatx80 floatx80_mul( floatx80 a, floatx80 b )
   3793 {
   3794     flag aSign, bSign, zSign;
   3795     int32 aExp, bExp, zExp;
   3796     bits64 aSig, bSig, zSig0, zSig1;
   3797     floatx80 z;
   3798 
   3799     aSig = extractFloatx80Frac( a );
   3800     aExp = extractFloatx80Exp( a );
   3801     aSign = extractFloatx80Sign( a );
   3802     bSig = extractFloatx80Frac( b );
   3803     bExp = extractFloatx80Exp( b );
   3804     bSign = extractFloatx80Sign( b );
   3805     zSign = aSign ^ bSign;
   3806     if ( aExp == 0x7FFF ) {
   3807         if (    (bits64) ( aSig<<1 )
   3808              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3809             return propagateFloatx80NaN( a, b );
   3810         }
   3811         if ( ( bExp | bSig ) == 0 ) goto invalid;
   3812         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3813     }
   3814     if ( bExp == 0x7FFF ) {
   3815         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3816         if ( ( aExp | aSig ) == 0 ) {
   3817  invalid:
   3818             float_raise( float_flag_invalid );
   3819             z.low = floatx80_default_nan_low;
   3820             z.high = floatx80_default_nan_high;
   3821             return z;
   3822         }
   3823         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3824     }
   3825     if ( aExp == 0 ) {
   3826         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3827         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3828     }
   3829     if ( bExp == 0 ) {
   3830         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3831         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3832     }
   3833     zExp = aExp + bExp - 0x3FFE;
   3834     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   3835     if ( 0 < (sbits64) zSig0 ) {
   3836         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3837         --zExp;
   3838     }
   3839     return
   3840         roundAndPackFloatx80(
   3841             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3842 
   3843 }
   3844 
   3845 /*
   3846 -------------------------------------------------------------------------------
   3847 Returns the result of dividing the extended double-precision floating-point
   3848 value `a' by the corresponding value `b'.  The operation is performed
   3849 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3850 -------------------------------------------------------------------------------
   3851 */
   3852 floatx80 floatx80_div( floatx80 a, floatx80 b )
   3853 {
   3854     flag aSign, bSign, zSign;
   3855     int32 aExp, bExp, zExp;
   3856     bits64 aSig, bSig, zSig0, zSig1;
   3857     bits64 rem0, rem1, rem2, term0, term1, term2;
   3858     floatx80 z;
   3859 
   3860     aSig = extractFloatx80Frac( a );
   3861     aExp = extractFloatx80Exp( a );
   3862     aSign = extractFloatx80Sign( a );
   3863     bSig = extractFloatx80Frac( b );
   3864     bExp = extractFloatx80Exp( b );
   3865     bSign = extractFloatx80Sign( b );
   3866     zSign = aSign ^ bSign;
   3867     if ( aExp == 0x7FFF ) {
   3868         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3869         if ( bExp == 0x7FFF ) {
   3870             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3871             goto invalid;
   3872         }
   3873         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3874     }
   3875     if ( bExp == 0x7FFF ) {
   3876         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3877         return packFloatx80( zSign, 0, 0 );
   3878     }
   3879     if ( bExp == 0 ) {
   3880         if ( bSig == 0 ) {
   3881             if ( ( aExp | aSig ) == 0 ) {
   3882  invalid:
   3883                 float_raise( float_flag_invalid );
   3884                 z.low = floatx80_default_nan_low;
   3885                 z.high = floatx80_default_nan_high;
   3886                 return z;
   3887             }
   3888             float_raise( float_flag_divbyzero );
   3889             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3890         }
   3891         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3892     }
   3893     if ( aExp == 0 ) {
   3894         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3895         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3896     }
   3897     zExp = aExp - bExp + 0x3FFE;
   3898     rem1 = 0;
   3899     if ( bSig <= aSig ) {
   3900         shift128Right( aSig, 0, 1, &aSig, &rem1 );
   3901         ++zExp;
   3902     }
   3903     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
   3904     mul64To128( bSig, zSig0, &term0, &term1 );
   3905     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
   3906     while ( (sbits64) rem0 < 0 ) {
   3907         --zSig0;
   3908         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   3909     }
   3910     zSig1 = estimateDiv128To64( rem1, 0, bSig );
   3911     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
   3912         mul64To128( bSig, zSig1, &term1, &term2 );
   3913         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   3914         while ( (sbits64) rem1 < 0 ) {
   3915             --zSig1;
   3916             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
   3917         }
   3918         zSig1 |= ( ( rem1 | rem2 ) != 0 );
   3919     }
   3920     return
   3921         roundAndPackFloatx80(
   3922             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3923 
   3924 }
   3925 
   3926 /*
   3927 -------------------------------------------------------------------------------
   3928 Returns the remainder of the extended double-precision floating-point value
   3929 `a' with respect to the corresponding value `b'.  The operation is performed
   3930 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3931 -------------------------------------------------------------------------------
   3932 */
   3933 floatx80 floatx80_rem( floatx80 a, floatx80 b )
   3934 {
   3935     flag aSign, bSign, zSign;
   3936     int32 aExp, bExp, expDiff;
   3937     bits64 aSig0, aSig1, bSig;
   3938     bits64 q, term0, term1, alternateASig0, alternateASig1;
   3939     floatx80 z;
   3940 
   3941     aSig0 = extractFloatx80Frac( a );
   3942     aExp = extractFloatx80Exp( a );
   3943     aSign = extractFloatx80Sign( a );
   3944     bSig = extractFloatx80Frac( b );
   3945     bExp = extractFloatx80Exp( b );
   3946     bSign = extractFloatx80Sign( b );
   3947     if ( aExp == 0x7FFF ) {
   3948         if (    (bits64) ( aSig0<<1 )
   3949              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3950             return propagateFloatx80NaN( a, b );
   3951         }
   3952         goto invalid;
   3953     }
   3954     if ( bExp == 0x7FFF ) {
   3955         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3956         return a;
   3957     }
   3958     if ( bExp == 0 ) {
   3959         if ( bSig == 0 ) {
   3960  invalid:
   3961             float_raise( float_flag_invalid );
   3962             z.low = floatx80_default_nan_low;
   3963             z.high = floatx80_default_nan_high;
   3964             return z;
   3965         }
   3966         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3967     }
   3968     if ( aExp == 0 ) {
   3969         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
   3970         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   3971     }
   3972     bSig |= LIT64( 0x8000000000000000 );
   3973     zSign = aSign;
   3974     expDiff = aExp - bExp;
   3975     aSig1 = 0;
   3976     if ( expDiff < 0 ) {
   3977         if ( expDiff < -1 ) return a;
   3978         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
   3979         expDiff = 0;
   3980     }
   3981     q = ( bSig <= aSig0 );
   3982     if ( q ) aSig0 -= bSig;
   3983     expDiff -= 64;
   3984     while ( 0 < expDiff ) {
   3985         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3986         q = ( 2 < q ) ? q - 2 : 0;
   3987         mul64To128( bSig, q, &term0, &term1 );
   3988         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3989         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
   3990         expDiff -= 62;
   3991     }
   3992     expDiff += 64;
   3993     if ( 0 < expDiff ) {
   3994         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3995         q = ( 2 < q ) ? q - 2 : 0;
   3996         q >>= 64 - expDiff;
   3997         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
   3998         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3999         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
   4000         while ( le128( term0, term1, aSig0, aSig1 ) ) {
   4001             ++q;
   4002             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4003         }
   4004     }
   4005     else {
   4006         term1 = 0;
   4007         term0 = bSig;
   4008     }
   4009     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
   4010     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4011          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4012               && ( q & 1 ) )
   4013        ) {
   4014         aSig0 = alternateASig0;
   4015         aSig1 = alternateASig1;
   4016         zSign = ! zSign;
   4017     }
   4018     return
   4019         normalizeRoundAndPackFloatx80(
   4020             80, zSign, bExp + expDiff, aSig0, aSig1 );
   4021 
   4022 }
   4023 
   4024 /*
   4025 -------------------------------------------------------------------------------
   4026 Returns the square root of the extended double-precision floating-point
   4027 value `a'.  The operation is performed according to the IEC/IEEE Standard
   4028 for Binary Floating-Point Arithmetic.
   4029 -------------------------------------------------------------------------------
   4030 */
   4031 floatx80 floatx80_sqrt( floatx80 a )
   4032 {
   4033     flag aSign;
   4034     int32 aExp, zExp;
   4035     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
   4036     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   4037     floatx80 z;
   4038 
   4039     aSig0 = extractFloatx80Frac( a );
   4040     aExp = extractFloatx80Exp( a );
   4041     aSign = extractFloatx80Sign( a );
   4042     if ( aExp == 0x7FFF ) {
   4043         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
   4044         if ( ! aSign ) return a;
   4045         goto invalid;
   4046     }
   4047     if ( aSign ) {
   4048         if ( ( aExp | aSig0 ) == 0 ) return a;
   4049  invalid:
   4050         float_raise( float_flag_invalid );
   4051         z.low = floatx80_default_nan_low;
   4052         z.high = floatx80_default_nan_high;
   4053         return z;
   4054     }
   4055     if ( aExp == 0 ) {
   4056         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
   4057         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4058     }
   4059     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
   4060     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
   4061     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
   4062     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   4063     doubleZSig0 = zSig0<<1;
   4064     mul64To128( zSig0, zSig0, &term0, &term1 );
   4065     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   4066     while ( (sbits64) rem0 < 0 ) {
   4067         --zSig0;
   4068         doubleZSig0 -= 2;
   4069         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   4070     }
   4071     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   4072     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
   4073         if ( zSig1 == 0 ) zSig1 = 1;
   4074         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   4075         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   4076         mul64To128( zSig1, zSig1, &term2, &term3 );
   4077         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   4078         while ( (sbits64) rem1 < 0 ) {
   4079             --zSig1;
   4080             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   4081             term3 |= 1;
   4082             term2 |= doubleZSig0;
   4083             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   4084         }
   4085         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   4086     }
   4087     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
   4088     zSig0 |= doubleZSig0;
   4089     return
   4090         roundAndPackFloatx80(
   4091             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
   4092 
   4093 }
   4094 
   4095 /*
   4096 -------------------------------------------------------------------------------
   4097 Returns 1 if the extended double-precision floating-point value `a' is
   4098 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   4099 performed according to the IEC/IEEE Standard for Binary Floating-Point
   4100 Arithmetic.
   4101 -------------------------------------------------------------------------------
   4102 */
   4103 flag floatx80_eq( floatx80 a, floatx80 b )
   4104 {
   4105 
   4106     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4107               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4108          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4109               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4110        ) {
   4111         if (    floatx80_is_signaling_nan( a )
   4112              || floatx80_is_signaling_nan( b ) ) {
   4113             float_raise( float_flag_invalid );
   4114         }
   4115         return 0;
   4116     }
   4117     return
   4118            ( a.low == b.low )
   4119         && (    ( a.high == b.high )
   4120              || (    ( a.low == 0 )
   4121                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
   4122            );
   4123 
   4124 }
   4125 
   4126 /*
   4127 -------------------------------------------------------------------------------
   4128 Returns 1 if the extended double-precision floating-point value `a' is
   4129 less than or equal to the corresponding value `b', and 0 otherwise.  The
   4130 comparison is performed according to the IEC/IEEE Standard for Binary
   4131 Floating-Point Arithmetic.
   4132 -------------------------------------------------------------------------------
   4133 */
   4134 flag floatx80_le( floatx80 a, floatx80 b )
   4135 {
   4136     flag aSign, bSign;
   4137 
   4138     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4139               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4140          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4141               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4142        ) {
   4143         float_raise( float_flag_invalid );
   4144         return 0;
   4145     }
   4146     aSign = extractFloatx80Sign( a );
   4147     bSign = extractFloatx80Sign( b );
   4148     if ( aSign != bSign ) {
   4149         return
   4150                aSign
   4151             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4152                  == 0 );
   4153     }
   4154     return
   4155           aSign ? le128( b.high, b.low, a.high, a.low )
   4156         : le128( a.high, a.low, b.high, b.low );
   4157 
   4158 }
   4159 
   4160 /*
   4161 -------------------------------------------------------------------------------
   4162 Returns 1 if the extended double-precision floating-point value `a' is
   4163 less than the corresponding value `b', and 0 otherwise.  The comparison
   4164 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4165 Arithmetic.
   4166 -------------------------------------------------------------------------------
   4167 */
   4168 flag floatx80_lt( floatx80 a, floatx80 b )
   4169 {
   4170     flag aSign, bSign;
   4171 
   4172     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4173               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4174          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4175               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4176        ) {
   4177         float_raise( float_flag_invalid );
   4178         return 0;
   4179     }
   4180     aSign = extractFloatx80Sign( a );
   4181     bSign = extractFloatx80Sign( b );
   4182     if ( aSign != bSign ) {
   4183         return
   4184                aSign
   4185             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4186                  != 0 );
   4187     }
   4188     return
   4189           aSign ? lt128( b.high, b.low, a.high, a.low )
   4190         : lt128( a.high, a.low, b.high, b.low );
   4191 
   4192 }
   4193 
   4194 /*
   4195 -------------------------------------------------------------------------------
   4196 Returns 1 if the extended double-precision floating-point value `a' is equal
   4197 to the corresponding value `b', and 0 otherwise.  The invalid exception is
   4198 raised if either operand is a NaN.  Otherwise, the comparison is performed
   4199 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4200 -------------------------------------------------------------------------------
   4201 */
   4202 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
   4203 {
   4204 
   4205     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4206               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4207          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4208               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4209        ) {
   4210         float_raise( float_flag_invalid );
   4211         return 0;
   4212     }
   4213     return
   4214            ( a.low == b.low )
   4215         && (    ( a.high == b.high )
   4216              || (    ( a.low == 0 )
   4217                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
   4218            );
   4219 
   4220 }
   4221 
   4222 /*
   4223 -------------------------------------------------------------------------------
   4224 Returns 1 if the extended double-precision floating-point value `a' is less
   4225 than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
   4226 do not cause an exception.  Otherwise, the comparison is performed according
   4227 to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4228 -------------------------------------------------------------------------------
   4229 */
   4230 flag floatx80_le_quiet( floatx80 a, floatx80 b )
   4231 {
   4232     flag aSign, bSign;
   4233 
   4234     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4235               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4236          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4237               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4238        ) {
   4239         if (    floatx80_is_signaling_nan( a )
   4240              || floatx80_is_signaling_nan( b ) ) {
   4241             float_raise( float_flag_invalid );
   4242         }
   4243         return 0;
   4244     }
   4245     aSign = extractFloatx80Sign( a );
   4246     bSign = extractFloatx80Sign( b );
   4247     if ( aSign != bSign ) {
   4248         return
   4249                aSign
   4250             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4251                  == 0 );
   4252     }
   4253     return
   4254           aSign ? le128( b.high, b.low, a.high, a.low )
   4255         : le128( a.high, a.low, b.high, b.low );
   4256 
   4257 }
   4258 
   4259 /*
   4260 -------------------------------------------------------------------------------
   4261 Returns 1 if the extended double-precision floating-point value `a' is less
   4262 than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
   4263 an exception.  Otherwise, the comparison is performed according to the
   4264 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4265 -------------------------------------------------------------------------------
   4266 */
   4267 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
   4268 {
   4269     flag aSign, bSign;
   4270 
   4271     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4272               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4273          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4274               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4275        ) {
   4276         if (    floatx80_is_signaling_nan( a )
   4277              || floatx80_is_signaling_nan( b ) ) {
   4278             float_raise( float_flag_invalid );
   4279         }
   4280         return 0;
   4281     }
   4282     aSign = extractFloatx80Sign( a );
   4283     bSign = extractFloatx80Sign( b );
   4284     if ( aSign != bSign ) {
   4285         return
   4286                aSign
   4287             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4288                  != 0 );
   4289     }
   4290     return
   4291           aSign ? lt128( b.high, b.low, a.high, a.low )
   4292         : lt128( a.high, a.low, b.high, b.low );
   4293 
   4294 }
   4295 
   4296 #endif
   4297 
   4298 #ifdef FLOAT128
   4299 
   4300 /*
   4301 -------------------------------------------------------------------------------
   4302 Returns the result of converting the quadruple-precision floating-point
   4303 value `a' to the 32-bit two's complement integer format.  The conversion
   4304 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4305 Arithmetic---which means in particular that the conversion is rounded
   4306 according to the current rounding mode.  If `a' is a NaN, the largest
   4307 positive integer is returned.  Otherwise, if the conversion overflows, the
   4308 largest integer with the same sign as `a' is returned.
   4309 -------------------------------------------------------------------------------
   4310 */
   4311 int32 float128_to_int32( float128 a )
   4312 {
   4313     flag aSign;
   4314     int32 aExp, shiftCount;
   4315     bits64 aSig0, aSig1;
   4316 
   4317     aSig1 = extractFloat128Frac1( a );
   4318     aSig0 = extractFloat128Frac0( a );
   4319     aExp = extractFloat128Exp( a );
   4320     aSign = extractFloat128Sign( a );
   4321     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   4322     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4323     aSig0 |= ( aSig1 != 0 );
   4324     shiftCount = 0x4028 - aExp;
   4325     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
   4326     return roundAndPackInt32( aSign, aSig0 );
   4327 
   4328 }
   4329 
   4330 /*
   4331 -------------------------------------------------------------------------------
   4332 Returns the result of converting the quadruple-precision floating-point
   4333 value `a' to the 32-bit two's complement integer format.  The conversion
   4334 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4335 Arithmetic, except that the conversion is always rounded toward zero.  If
   4336 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   4337 conversion overflows, the largest integer with the same sign as `a' is
   4338 returned.
   4339 -------------------------------------------------------------------------------
   4340 */
   4341 int32 float128_to_int32_round_to_zero( float128 a )
   4342 {
   4343     flag aSign;
   4344     int32 aExp, shiftCount;
   4345     bits64 aSig0, aSig1, savedASig;
   4346     int32 z;
   4347 
   4348     aSig1 = extractFloat128Frac1( a );
   4349     aSig0 = extractFloat128Frac0( a );
   4350     aExp = extractFloat128Exp( a );
   4351     aSign = extractFloat128Sign( a );
   4352     aSig0 |= ( aSig1 != 0 );
   4353     if ( 0x401E < aExp ) {
   4354         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
   4355         goto invalid;
   4356     }
   4357     else if ( aExp < 0x3FFF ) {
   4358         if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
   4359         return 0;
   4360     }
   4361     aSig0 |= LIT64( 0x0001000000000000 );
   4362     shiftCount = 0x402F - aExp;
   4363     savedASig = aSig0;
   4364     aSig0 >>= shiftCount;
   4365     z = (int32)aSig0;
   4366     if ( aSign ) z = - z;
   4367     if ( ( z < 0 ) ^ aSign ) {
   4368  invalid:
   4369         float_raise( float_flag_invalid );
   4370         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   4371     }
   4372     if ( ( aSig0<<shiftCount ) != savedASig ) {
   4373         float_exception_flags |= float_flag_inexact;
   4374     }
   4375     return z;
   4376 
   4377 }
   4378 
   4379 /*
   4380 -------------------------------------------------------------------------------
   4381 Returns the result of converting the quadruple-precision floating-point
   4382 value `a' to the 64-bit two's complement integer format.  The conversion
   4383 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4384 Arithmetic---which means in particular that the conversion is rounded
   4385 according to the current rounding mode.  If `a' is a NaN, the largest
   4386 positive integer is returned.  Otherwise, if the conversion overflows, the
   4387 largest integer with the same sign as `a' is returned.
   4388 -------------------------------------------------------------------------------
   4389 */
   4390 int64 float128_to_int64( float128 a )
   4391 {
   4392     flag aSign;
   4393     int32 aExp, shiftCount;
   4394     bits64 aSig0, aSig1;
   4395 
   4396     aSig1 = extractFloat128Frac1( a );
   4397     aSig0 = extractFloat128Frac0( a );
   4398     aExp = extractFloat128Exp( a );
   4399     aSign = extractFloat128Sign( a );
   4400     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4401     shiftCount = 0x402F - aExp;
   4402     if ( shiftCount <= 0 ) {
   4403         if ( 0x403E < aExp ) {
   4404             float_raise( float_flag_invalid );
   4405             if (    ! aSign
   4406                  || (    ( aExp == 0x7FFF )
   4407                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
   4408                     )
   4409                ) {
   4410                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   4411             }
   4412             return (sbits64) LIT64( 0x8000000000000000 );
   4413         }
   4414         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
   4415     }
   4416     else {
   4417         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
   4418     }
   4419     return roundAndPackInt64( aSign, aSig0, aSig1 );
   4420 
   4421 }
   4422 
   4423 /*
   4424 -------------------------------------------------------------------------------
   4425 Returns the result of converting the quadruple-precision floating-point
   4426 value `a' to the 64-bit two's complement integer format.  The conversion
   4427 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4428 Arithmetic, except that the conversion is always rounded toward zero.
   4429 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   4430 the conversion overflows, the largest integer with the same sign as `a' is
   4431 returned.
   4432 -------------------------------------------------------------------------------
   4433 */
   4434 int64 float128_to_int64_round_to_zero( float128 a )
   4435 {
   4436     flag aSign;
   4437     int32 aExp, shiftCount;
   4438     bits64 aSig0, aSig1;
   4439     int64 z;
   4440 
   4441     aSig1 = extractFloat128Frac1( a );
   4442     aSig0 = extractFloat128Frac0( a );
   4443     aExp = extractFloat128Exp( a );
   4444     aSign = extractFloat128Sign( a );
   4445     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4446     shiftCount = aExp - 0x402F;
   4447     if ( 0 < shiftCount ) {
   4448         if ( 0x403E <= aExp ) {
   4449             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4450             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4451                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4452                 if ( aSig1 ) float_exception_flags |= float_flag_inexact;
   4453             }
   4454             else {
   4455                 float_raise( float_flag_invalid );
   4456                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
   4457                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   4458                 }
   4459             }
   4460             return (sbits64) LIT64( 0x8000000000000000 );
   4461         }
   4462         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4463         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4464             float_exception_flags |= float_flag_inexact;
   4465         }
   4466     }
   4467     else {
   4468         if ( aExp < 0x3FFF ) {
   4469             if ( aExp | aSig0 | aSig1 ) {
   4470                 float_exception_flags |= float_flag_inexact;
   4471             }
   4472             return 0;
   4473         }
   4474         z = aSig0>>( - shiftCount );
   4475         if (    aSig1
   4476              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4477             float_exception_flags |= float_flag_inexact;
   4478         }
   4479     }
   4480     if ( aSign ) z = - z;
   4481     return z;
   4482 
   4483 }
   4484 
   4485 #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
   4486     && defined(SOFTFLOAT_NEED_FIXUNS)
   4487 /*
   4488  * just like above - but do not care for overflow of signed results
   4489  */
   4490 uint64 float128_to_uint64_round_to_zero( float128 a )
   4491 {
   4492     flag aSign;
   4493     int32 aExp, shiftCount;
   4494     bits64 aSig0, aSig1;
   4495     uint64 z;
   4496 
   4497     aSig1 = extractFloat128Frac1( a );
   4498     aSig0 = extractFloat128Frac0( a );
   4499     aExp = extractFloat128Exp( a );
   4500     aSign = extractFloat128Sign( a );
   4501     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4502     shiftCount = aExp - 0x402F;
   4503     if ( 0 < shiftCount ) {
   4504         if ( 0x403F <= aExp ) {
   4505             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4506             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4507                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4508                 if ( aSig1 ) float_exception_flags |= float_flag_inexact;
   4509             }
   4510             else {
   4511                 float_raise( float_flag_invalid );
   4512             }
   4513             return LIT64( 0xFFFFFFFFFFFFFFFF );
   4514         }
   4515         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4516         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4517             float_exception_flags |= float_flag_inexact;
   4518         }
   4519     }
   4520     else {
   4521         if ( aExp < 0x3FFF ) {
   4522             if ( aExp | aSig0 | aSig1 ) {
   4523                 float_exception_flags |= float_flag_inexact;
   4524             }
   4525             return 0;
   4526         }
   4527         z = aSig0>>( - shiftCount );
   4528         if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4529             float_exception_flags |= float_flag_inexact;
   4530         }
   4531     }
   4532     if ( aSign ) z = - z;
   4533     return z;
   4534 
   4535 }
   4536 #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
   4537 
   4538 /*
   4539 -------------------------------------------------------------------------------
   4540 Returns the result of converting the quadruple-precision floating-point
   4541 value `a' to the single-precision floating-point format.  The conversion
   4542 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4543 Arithmetic.
   4544 -------------------------------------------------------------------------------
   4545 */
   4546 float32 float128_to_float32( float128 a )
   4547 {
   4548     flag aSign;
   4549     int32 aExp;
   4550     bits64 aSig0, aSig1;
   4551     bits32 zSig;
   4552 
   4553     aSig1 = extractFloat128Frac1( a );
   4554     aSig0 = extractFloat128Frac0( a );
   4555     aExp = extractFloat128Exp( a );
   4556     aSign = extractFloat128Sign( a );
   4557     if ( aExp == 0x7FFF ) {
   4558         if ( aSig0 | aSig1 ) {
   4559             return commonNaNToFloat32( float128ToCommonNaN( a ) );
   4560         }
   4561         return packFloat32( aSign, 0xFF, 0 );
   4562     }
   4563     aSig0 |= ( aSig1 != 0 );
   4564     shift64RightJamming( aSig0, 18, &aSig0 );
   4565     zSig = (bits32)aSig0;
   4566     if ( aExp || zSig ) {
   4567         zSig |= 0x40000000;
   4568         aExp -= 0x3F81;
   4569     }
   4570     return roundAndPackFloat32( aSign, aExp, zSig );
   4571 
   4572 }
   4573 
   4574 /*
   4575 -------------------------------------------------------------------------------
   4576 Returns the result of converting the quadruple-precision floating-point
   4577 value `a' to the double-precision floating-point format.  The conversion
   4578 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4579 Arithmetic.
   4580 -------------------------------------------------------------------------------
   4581 */
   4582 float64 float128_to_float64( float128 a )
   4583 {
   4584     flag aSign;
   4585     int32 aExp;
   4586     bits64 aSig0, aSig1;
   4587 
   4588     aSig1 = extractFloat128Frac1( a );
   4589     aSig0 = extractFloat128Frac0( a );
   4590     aExp = extractFloat128Exp( a );
   4591     aSign = extractFloat128Sign( a );
   4592     if ( aExp == 0x7FFF ) {
   4593         if ( aSig0 | aSig1 ) {
   4594             return commonNaNToFloat64( float128ToCommonNaN( a ) );
   4595         }
   4596         return packFloat64( aSign, 0x7FF, 0 );
   4597     }
   4598     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4599     aSig0 |= ( aSig1 != 0 );
   4600     if ( aExp || aSig0 ) {
   4601         aSig0 |= LIT64( 0x4000000000000000 );
   4602         aExp -= 0x3C01;
   4603     }
   4604     return roundAndPackFloat64( aSign, aExp, aSig0 );
   4605 
   4606 }
   4607 
   4608 #ifdef FLOATX80
   4609 
   4610 /*
   4611 -------------------------------------------------------------------------------
   4612 Returns the result of converting the quadruple-precision floating-point
   4613 value `a' to the extended double-precision floating-point format.  The
   4614 conversion is performed according to the IEC/IEEE Standard for Binary
   4615 Floating-Point Arithmetic.
   4616 -------------------------------------------------------------------------------
   4617 */
   4618 floatx80 float128_to_floatx80( float128 a )
   4619 {
   4620     flag aSign;
   4621     int32 aExp;
   4622     bits64 aSig0, aSig1;
   4623 
   4624     aSig1 = extractFloat128Frac1( a );
   4625     aSig0 = extractFloat128Frac0( a );
   4626     aExp = extractFloat128Exp( a );
   4627     aSign = extractFloat128Sign( a );
   4628     if ( aExp == 0x7FFF ) {
   4629         if ( aSig0 | aSig1 ) {
   4630             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
   4631         }
   4632         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   4633     }
   4634     if ( aExp == 0 ) {
   4635         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
   4636         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4637     }
   4638     else {
   4639         aSig0 |= LIT64( 0x0001000000000000 );
   4640     }
   4641     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
   4642     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
   4643 
   4644 }
   4645 
   4646 #endif
   4647 
   4648 /*
   4649 -------------------------------------------------------------------------------
   4650 Rounds the quadruple-precision floating-point value `a' to an integer, and
   4651 returns the result as a quadruple-precision floating-point value.  The
   4652 operation is performed according to the IEC/IEEE Standard for Binary
   4653 Floating-Point Arithmetic.
   4654 -------------------------------------------------------------------------------
   4655 */
   4656 float128 float128_round_to_int( float128 a )
   4657 {
   4658     flag aSign;
   4659     int32 aExp;
   4660     bits64 lastBitMask, roundBitsMask;
   4661     int8 roundingMode;
   4662     float128 z;
   4663 
   4664     aExp = extractFloat128Exp( a );
   4665     if ( 0x402F <= aExp ) {
   4666         if ( 0x406F <= aExp ) {
   4667             if (    ( aExp == 0x7FFF )
   4668                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
   4669                ) {
   4670                 return propagateFloat128NaN( a, a );
   4671             }
   4672             return a;
   4673         }
   4674         lastBitMask = 1;
   4675         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
   4676         roundBitsMask = lastBitMask - 1;
   4677         z = a;
   4678         roundingMode = float_rounding_mode;
   4679         if ( roundingMode == float_round_nearest_even ) {
   4680             if ( lastBitMask ) {
   4681                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   4682                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   4683             }
   4684             else {
   4685                 if ( (sbits64) z.low < 0 ) {
   4686                     ++z.high;
   4687                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
   4688                 }
   4689             }
   4690         }
   4691         else if ( roundingMode != float_round_to_zero ) {
   4692             if (   extractFloat128Sign( z )
   4693                  ^ ( roundingMode == float_round_up ) ) {
   4694                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   4695             }
   4696         }
   4697         z.low &= ~ roundBitsMask;
   4698     }
   4699     else {
   4700         if ( aExp < 0x3FFF ) {
   4701             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   4702             float_exception_flags |= float_flag_inexact;
   4703             aSign = extractFloat128Sign( a );
   4704             switch ( float_rounding_mode ) {
   4705              case float_round_nearest_even:
   4706                 if (    ( aExp == 0x3FFE )
   4707                      && (   extractFloat128Frac0( a )
   4708                           | extractFloat128Frac1( a ) )
   4709                    ) {
   4710                     return packFloat128( aSign, 0x3FFF, 0, 0 );
   4711                 }
   4712                 break;
   4713 	     case float_round_to_zero:
   4714 		break;
   4715              case float_round_down:
   4716                 return
   4717                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
   4718                     : packFloat128( 0, 0, 0, 0 );
   4719              case float_round_up:
   4720                 return
   4721                       aSign ? packFloat128( 1, 0, 0, 0 )
   4722                     : packFloat128( 0, 0x3FFF, 0, 0 );
   4723             }
   4724             return packFloat128( aSign, 0, 0, 0 );
   4725         }
   4726         lastBitMask = 1;
   4727         lastBitMask <<= 0x402F - aExp;
   4728         roundBitsMask = lastBitMask - 1;
   4729         z.low = 0;
   4730         z.high = a.high;
   4731         roundingMode = float_rounding_mode;
   4732         if ( roundingMode == float_round_nearest_even ) {
   4733             z.high += lastBitMask>>1;
   4734             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   4735                 z.high &= ~ lastBitMask;
   4736             }
   4737         }
   4738         else if ( roundingMode != float_round_to_zero ) {
   4739             if (   extractFloat128Sign( z )
   4740                  ^ ( roundingMode == float_round_up ) ) {
   4741                 z.high |= ( a.low != 0 );
   4742                 z.high += roundBitsMask;
   4743             }
   4744         }
   4745         z.high &= ~ roundBitsMask;
   4746     }
   4747     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   4748         float_exception_flags |= float_flag_inexact;
   4749     }
   4750     return z;
   4751 
   4752 }
   4753 
   4754 /*
   4755 -------------------------------------------------------------------------------
   4756 Returns the result of adding the absolute values of the quadruple-precision
   4757 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   4758 before being returned.  `zSign' is ignored if the result is a NaN.
   4759 The addition is performed according to the IEC/IEEE Standard for Binary
   4760 Floating-Point Arithmetic.
   4761 -------------------------------------------------------------------------------
   4762 */
   4763 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
   4764 {
   4765     int32 aExp, bExp, zExp;
   4766     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4767     int32 expDiff;
   4768 
   4769     aSig1 = extractFloat128Frac1( a );
   4770     aSig0 = extractFloat128Frac0( a );
   4771     aExp = extractFloat128Exp( a );
   4772     bSig1 = extractFloat128Frac1( b );
   4773     bSig0 = extractFloat128Frac0( b );
   4774     bExp = extractFloat128Exp( b );
   4775     expDiff = aExp - bExp;
   4776     if ( 0 < expDiff ) {
   4777         if ( aExp == 0x7FFF ) {
   4778             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4779             return a;
   4780         }
   4781         if ( bExp == 0 ) {
   4782             --expDiff;
   4783         }
   4784         else {
   4785             bSig0 |= LIT64( 0x0001000000000000 );
   4786         }
   4787         shift128ExtraRightJamming(
   4788             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   4789         zExp = aExp;
   4790     }
   4791     else if ( expDiff < 0 ) {
   4792         if ( bExp == 0x7FFF ) {
   4793             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4794             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4795         }
   4796         if ( aExp == 0 ) {
   4797             ++expDiff;
   4798         }
   4799         else {
   4800             aSig0 |= LIT64( 0x0001000000000000 );
   4801         }
   4802         shift128ExtraRightJamming(
   4803             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   4804         zExp = bExp;
   4805     }
   4806     else {
   4807         if ( aExp == 0x7FFF ) {
   4808             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4809                 return propagateFloat128NaN( a, b );
   4810             }
   4811             return a;
   4812         }
   4813         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4814         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
   4815         zSig2 = 0;
   4816         zSig0 |= LIT64( 0x0002000000000000 );
   4817         zExp = aExp;
   4818         goto shiftRight1;
   4819     }
   4820     aSig0 |= LIT64( 0x0001000000000000 );
   4821     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4822     --zExp;
   4823     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
   4824     ++zExp;
   4825  shiftRight1:
   4826     shift128ExtraRightJamming(
   4827         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4828  roundAndPack:
   4829     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4830 
   4831 }
   4832 
   4833 /*
   4834 -------------------------------------------------------------------------------
   4835 Returns the result of subtracting the absolute values of the quadruple-
   4836 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   4837 difference is negated before being returned.  `zSign' is ignored if the
   4838 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   4839 Standard for Binary Floating-Point Arithmetic.
   4840 -------------------------------------------------------------------------------
   4841 */
   4842 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
   4843 {
   4844     int32 aExp, bExp, zExp;
   4845     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   4846     int32 expDiff;
   4847     float128 z;
   4848 
   4849     aSig1 = extractFloat128Frac1( a );
   4850     aSig0 = extractFloat128Frac0( a );
   4851     aExp = extractFloat128Exp( a );
   4852     bSig1 = extractFloat128Frac1( b );
   4853     bSig0 = extractFloat128Frac0( b );
   4854     bExp = extractFloat128Exp( b );
   4855     expDiff = aExp - bExp;
   4856     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4857     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
   4858     if ( 0 < expDiff ) goto aExpBigger;
   4859     if ( expDiff < 0 ) goto bExpBigger;
   4860     if ( aExp == 0x7FFF ) {
   4861         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4862             return propagateFloat128NaN( a, b );
   4863         }
   4864         float_raise( float_flag_invalid );
   4865         z.low = float128_default_nan_low;
   4866         z.high = float128_default_nan_high;
   4867         return z;
   4868     }
   4869     if ( aExp == 0 ) {
   4870         aExp = 1;
   4871         bExp = 1;
   4872     }
   4873     if ( bSig0 < aSig0 ) goto aBigger;
   4874     if ( aSig0 < bSig0 ) goto bBigger;
   4875     if ( bSig1 < aSig1 ) goto aBigger;
   4876     if ( aSig1 < bSig1 ) goto bBigger;
   4877     return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
   4878  bExpBigger:
   4879     if ( bExp == 0x7FFF ) {
   4880         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4881         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
   4882     }
   4883     if ( aExp == 0 ) {
   4884         ++expDiff;
   4885     }
   4886     else {
   4887         aSig0 |= LIT64( 0x4000000000000000 );
   4888     }
   4889     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   4890     bSig0 |= LIT64( 0x4000000000000000 );
   4891  bBigger:
   4892     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4893     zExp = bExp;
   4894     zSign ^= 1;
   4895     goto normalizeRoundAndPack;
   4896  aExpBigger:
   4897     if ( aExp == 0x7FFF ) {
   4898         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4899         return a;
   4900     }
   4901     if ( bExp == 0 ) {
   4902         --expDiff;
   4903     }
   4904     else {
   4905         bSig0 |= LIT64( 0x4000000000000000 );
   4906     }
   4907     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   4908     aSig0 |= LIT64( 0x4000000000000000 );
   4909  aBigger:
   4910     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4911     zExp = aExp;
   4912  normalizeRoundAndPack:
   4913     --zExp;
   4914     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
   4915 
   4916 }
   4917 
   4918 /*
   4919 -------------------------------------------------------------------------------
   4920 Returns the result of adding the quadruple-precision floating-point values
   4921 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   4922 for Binary Floating-Point Arithmetic.
   4923 -------------------------------------------------------------------------------
   4924 */
   4925 float128 float128_add( float128 a, float128 b )
   4926 {
   4927     flag aSign, bSign;
   4928 
   4929     aSign = extractFloat128Sign( a );
   4930     bSign = extractFloat128Sign( b );
   4931     if ( aSign == bSign ) {
   4932         return addFloat128Sigs( a, b, aSign );
   4933     }
   4934     else {
   4935         return subFloat128Sigs( a, b, aSign );
   4936     }
   4937 
   4938 }
   4939 
   4940 /*
   4941 -------------------------------------------------------------------------------
   4942 Returns the result of subtracting the quadruple-precision floating-point
   4943 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4944 Standard for Binary Floating-Point Arithmetic.
   4945 -------------------------------------------------------------------------------
   4946 */
   4947 float128 float128_sub( float128 a, float128 b )
   4948 {
   4949     flag aSign, bSign;
   4950 
   4951     aSign = extractFloat128Sign( a );
   4952     bSign = extractFloat128Sign( b );
   4953     if ( aSign == bSign ) {
   4954         return subFloat128Sigs( a, b, aSign );
   4955     }
   4956     else {
   4957         return addFloat128Sigs( a, b, aSign );
   4958     }
   4959 
   4960 }
   4961 
   4962 /*
   4963 -------------------------------------------------------------------------------
   4964 Returns the result of multiplying the quadruple-precision floating-point
   4965 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4966 Standard for Binary Floating-Point Arithmetic.
   4967 -------------------------------------------------------------------------------
   4968 */
   4969 float128 float128_mul( float128 a, float128 b )
   4970 {
   4971     flag aSign, bSign, zSign;
   4972     int32 aExp, bExp, zExp;
   4973     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   4974     float128 z;
   4975 
   4976     aSig1 = extractFloat128Frac1( a );
   4977     aSig0 = extractFloat128Frac0( a );
   4978     aExp = extractFloat128Exp( a );
   4979     aSign = extractFloat128Sign( a );
   4980     bSig1 = extractFloat128Frac1( b );
   4981     bSig0 = extractFloat128Frac0( b );
   4982     bExp = extractFloat128Exp( b );
   4983     bSign = extractFloat128Sign( b );
   4984     zSign = aSign ^ bSign;
   4985     if ( aExp == 0x7FFF ) {
   4986         if (    ( aSig0 | aSig1 )
   4987              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   4988             return propagateFloat128NaN( a, b );
   4989         }
   4990         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   4991         return packFloat128( zSign, 0x7FFF, 0, 0 );
   4992     }
   4993     if ( bExp == 0x7FFF ) {
   4994         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4995         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   4996  invalid:
   4997             float_raise( float_flag_invalid );
   4998             z.low = float128_default_nan_low;
   4999             z.high = float128_default_nan_high;
   5000             return z;
   5001         }
   5002         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5003     }
   5004     if ( aExp == 0 ) {
   5005         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5006         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5007     }
   5008     if ( bExp == 0 ) {
   5009         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5010         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5011     }
   5012     zExp = aExp + bExp - 0x4000;
   5013     aSig0 |= LIT64( 0x0001000000000000 );
   5014     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
   5015     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   5016     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   5017     zSig2 |= ( zSig3 != 0 );
   5018     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
   5019         shift128ExtraRightJamming(
   5020             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   5021         ++zExp;
   5022     }
   5023     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5024 
   5025 }
   5026 
   5027 /*
   5028 -------------------------------------------------------------------------------
   5029 Returns the result of dividing the quadruple-precision floating-point value
   5030 `a' by the corresponding value `b'.  The operation is performed according to
   5031 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5032 -------------------------------------------------------------------------------
   5033 */
   5034 float128 float128_div( float128 a, float128 b )
   5035 {
   5036     flag aSign, bSign, zSign;
   5037     int32 aExp, bExp, zExp;
   5038     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   5039     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5040     float128 z;
   5041 
   5042     aSig1 = extractFloat128Frac1( a );
   5043     aSig0 = extractFloat128Frac0( a );
   5044     aExp = extractFloat128Exp( a );
   5045     aSign = extractFloat128Sign( a );
   5046     bSig1 = extractFloat128Frac1( b );
   5047     bSig0 = extractFloat128Frac0( b );
   5048     bExp = extractFloat128Exp( b );
   5049     bSign = extractFloat128Sign( b );
   5050     zSign = aSign ^ bSign;
   5051     if ( aExp == 0x7FFF ) {
   5052         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   5053         if ( bExp == 0x7FFF ) {
   5054             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5055             goto invalid;
   5056         }
   5057         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5058     }
   5059     if ( bExp == 0x7FFF ) {
   5060         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5061         return packFloat128( zSign, 0, 0, 0 );
   5062     }
   5063     if ( bExp == 0 ) {
   5064         if ( ( bSig0 | bSig1 ) == 0 ) {
   5065             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5066  invalid:
   5067                 float_raise( float_flag_invalid );
   5068                 z.low = float128_default_nan_low;
   5069                 z.high = float128_default_nan_high;
   5070                 return z;
   5071             }
   5072             float_raise( float_flag_divbyzero );
   5073             return packFloat128( zSign, 0x7FFF, 0, 0 );
   5074         }
   5075         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5076     }
   5077     if ( aExp == 0 ) {
   5078         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5079         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5080     }
   5081     zExp = aExp - bExp + 0x3FFD;
   5082     shortShift128Left(
   5083         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
   5084     shortShift128Left(
   5085         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5086     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
   5087         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   5088         ++zExp;
   5089     }
   5090     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5091     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   5092     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   5093     while ( (sbits64) rem0 < 0 ) {
   5094         --zSig0;
   5095         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   5096     }
   5097     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
   5098     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
   5099         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   5100         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   5101         while ( (sbits64) rem1 < 0 ) {
   5102             --zSig1;
   5103             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   5104         }
   5105         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5106     }
   5107     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
   5108     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5109 
   5110 }
   5111 
   5112 /*
   5113 -------------------------------------------------------------------------------
   5114 Returns the remainder of the quadruple-precision floating-point value `a'
   5115 with respect to the corresponding value `b'.  The operation is performed
   5116 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5117 -------------------------------------------------------------------------------
   5118 */
   5119 float128 float128_rem( float128 a, float128 b )
   5120 {
   5121     flag aSign, zSign;
   5122     int32 aExp, bExp, expDiff;
   5123     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   5124     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
   5125     sbits64 sigMean0;
   5126     float128 z;
   5127 
   5128     aSig1 = extractFloat128Frac1( a );
   5129     aSig0 = extractFloat128Frac0( a );
   5130     aExp = extractFloat128Exp( a );
   5131     aSign = extractFloat128Sign( a );
   5132     bSig1 = extractFloat128Frac1( b );
   5133     bSig0 = extractFloat128Frac0( b );
   5134     bExp = extractFloat128Exp( b );
   5135     if ( aExp == 0x7FFF ) {
   5136         if (    ( aSig0 | aSig1 )
   5137              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5138             return propagateFloat128NaN( a, b );
   5139         }
   5140         goto invalid;
   5141     }
   5142     if ( bExp == 0x7FFF ) {
   5143         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5144         return a;
   5145     }
   5146     if ( bExp == 0 ) {
   5147         if ( ( bSig0 | bSig1 ) == 0 ) {
   5148  invalid:
   5149             float_raise( float_flag_invalid );
   5150             z.low = float128_default_nan_low;
   5151             z.high = float128_default_nan_high;
   5152             return z;
   5153         }
   5154         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5155     }
   5156     if ( aExp == 0 ) {
   5157         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   5158         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5159     }
   5160     expDiff = aExp - bExp;
   5161     if ( expDiff < -1 ) return a;
   5162     shortShift128Left(
   5163         aSig0 | LIT64( 0x0001000000000000 ),
   5164         aSig1,
   5165         15 - ( expDiff < 0 ),
   5166         &aSig0,
   5167         &aSig1
   5168     );
   5169     shortShift128Left(
   5170         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5171     q = le128( bSig0, bSig1, aSig0, aSig1 );
   5172     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5173     expDiff -= 64;
   5174     while ( 0 < expDiff ) {
   5175         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5176         q = ( 4 < q ) ? q - 4 : 0;
   5177         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5178         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
   5179         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
   5180         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   5181         expDiff -= 61;
   5182     }
   5183     if ( -64 < expDiff ) {
   5184         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5185         q = ( 4 < q ) ? q - 4 : 0;
   5186         q >>= - expDiff;
   5187         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5188         expDiff += 52;
   5189         if ( expDiff < 0 ) {
   5190             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   5191         }
   5192         else {
   5193             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   5194         }
   5195         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5196         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   5197     }
   5198     else {
   5199         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
   5200         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5201     }
   5202     do {
   5203         alternateASig0 = aSig0;
   5204         alternateASig1 = aSig1;
   5205         ++q;
   5206         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5207     } while ( 0 <= (sbits64) aSig0 );
   5208     add128(
   5209         aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
   5210     if (    ( sigMean0 < 0 )
   5211          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   5212         aSig0 = alternateASig0;
   5213         aSig1 = alternateASig1;
   5214     }
   5215     zSign = ( (sbits64) aSig0 < 0 );
   5216     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   5217     return
   5218         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   5219 
   5220 }
   5221 
   5222 /*
   5223 -------------------------------------------------------------------------------
   5224 Returns the square root of the quadruple-precision floating-point value `a'.
   5225 The operation is performed according to the IEC/IEEE Standard for Binary
   5226 Floating-Point Arithmetic.
   5227 -------------------------------------------------------------------------------
   5228 */
   5229 float128 float128_sqrt( float128 a )
   5230 {
   5231     flag aSign;
   5232     int32 aExp, zExp;
   5233     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   5234     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5235     float128 z;
   5236 
   5237     aSig1 = extractFloat128Frac1( a );
   5238     aSig0 = extractFloat128Frac0( a );
   5239     aExp = extractFloat128Exp( a );
   5240     aSign = extractFloat128Sign( a );
   5241     if ( aExp == 0x7FFF ) {
   5242         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
   5243         if ( ! aSign ) return a;
   5244         goto invalid;
   5245     }
   5246     if ( aSign ) {
   5247         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   5248  invalid:
   5249         float_raise( float_flag_invalid );
   5250         z.low = float128_default_nan_low;
   5251         z.high = float128_default_nan_high;
   5252         return z;
   5253     }
   5254     if ( aExp == 0 ) {
   5255         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
   5256         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5257     }
   5258     zExp = ( (unsigned int)(aExp - 0x3FFF) >> 1) + 0x3FFE;
   5259     aSig0 |= LIT64( 0x0001000000000000 );
   5260     zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
   5261     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
   5262     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   5263     doubleZSig0 = zSig0<<1;
   5264     mul64To128( zSig0, zSig0, &term0, &term1 );
   5265     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   5266     while ( (sbits64) rem0 < 0 ) {
   5267         --zSig0;
   5268         doubleZSig0 -= 2;
   5269         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   5270     }
   5271     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   5272     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
   5273         if ( zSig1 == 0 ) zSig1 = 1;
   5274         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   5275         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   5276         mul64To128( zSig1, zSig1, &term2, &term3 );
   5277         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   5278         while ( (sbits64) rem1 < 0 ) {
   5279             --zSig1;
   5280             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   5281             term3 |= 1;
   5282             term2 |= doubleZSig0;
   5283             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   5284         }
   5285         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5286     }
   5287     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
   5288     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
   5289 
   5290 }
   5291 
   5292 /*
   5293 -------------------------------------------------------------------------------
   5294 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5295 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5296 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5297 -------------------------------------------------------------------------------
   5298 */
   5299 flag float128_eq( float128 a, float128 b )
   5300 {
   5301 
   5302     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5303               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5304          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5305               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5306        ) {
   5307         if (    float128_is_signaling_nan( a )
   5308              || float128_is_signaling_nan( b ) ) {
   5309             float_raise( float_flag_invalid );
   5310         }
   5311         return 0;
   5312     }
   5313     return
   5314            ( a.low == b.low )
   5315         && (    ( a.high == b.high )
   5316              || (    ( a.low == 0 )
   5317                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5318            );
   5319 
   5320 }
   5321 
   5322 /*
   5323 -------------------------------------------------------------------------------
   5324 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5325 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   5326 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   5327 Arithmetic.
   5328 -------------------------------------------------------------------------------
   5329 */
   5330 flag float128_le( float128 a, float128 b )
   5331 {
   5332     flag aSign, bSign;
   5333 
   5334     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5335               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5336          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5337               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5338        ) {
   5339         float_raise( float_flag_invalid );
   5340         return 0;
   5341     }
   5342     aSign = extractFloat128Sign( a );
   5343     bSign = extractFloat128Sign( b );
   5344     if ( aSign != bSign ) {
   5345         return
   5346                aSign
   5347             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5348                  == 0 );
   5349     }
   5350     return
   5351           aSign ? le128( b.high, b.low, a.high, a.low )
   5352         : le128( a.high, a.low, b.high, b.low );
   5353 
   5354 }
   5355 
   5356 /*
   5357 -------------------------------------------------------------------------------
   5358 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5359 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5360 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5361 -------------------------------------------------------------------------------
   5362 */
   5363 flag float128_lt( float128 a, float128 b )
   5364 {
   5365     flag aSign, bSign;
   5366 
   5367     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5368               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5369          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5370               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5371        ) {
   5372         float_raise( float_flag_invalid );
   5373         return 0;
   5374     }
   5375     aSign = extractFloat128Sign( a );
   5376     bSign = extractFloat128Sign( b );
   5377     if ( aSign != bSign ) {
   5378         return
   5379                aSign
   5380             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5381                  != 0 );
   5382     }
   5383     return
   5384           aSign ? lt128( b.high, b.low, a.high, a.low )
   5385         : lt128( a.high, a.low, b.high, b.low );
   5386 
   5387 }
   5388 
   5389 /*
   5390 -------------------------------------------------------------------------------
   5391 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5392 the corresponding value `b', and 0 otherwise.  The invalid exception is
   5393 raised if either operand is a NaN.  Otherwise, the comparison is performed
   5394 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5395 -------------------------------------------------------------------------------
   5396 */
   5397 flag float128_eq_signaling( float128 a, float128 b )
   5398 {
   5399 
   5400     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5401               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5402          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5403               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5404        ) {
   5405         float_raise( float_flag_invalid );
   5406         return 0;
   5407     }
   5408     return
   5409            ( a.low == b.low )
   5410         && (    ( a.high == b.high )
   5411              || (    ( a.low == 0 )
   5412                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5413            );
   5414 
   5415 }
   5416 
   5417 /*
   5418 -------------------------------------------------------------------------------
   5419 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5420 or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   5421 cause an exception.  Otherwise, the comparison is performed according to the
   5422 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5423 -------------------------------------------------------------------------------
   5424 */
   5425 flag float128_le_quiet( float128 a, float128 b )
   5426 {
   5427     flag aSign, bSign;
   5428 
   5429     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5430               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5431          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5432               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5433        ) {
   5434         if (    float128_is_signaling_nan( a )
   5435              || float128_is_signaling_nan( b ) ) {
   5436             float_raise( float_flag_invalid );
   5437         }
   5438         return 0;
   5439     }
   5440     aSign = extractFloat128Sign( a );
   5441     bSign = extractFloat128Sign( b );
   5442     if ( aSign != bSign ) {
   5443         return
   5444                aSign
   5445             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5446                  == 0 );
   5447     }
   5448     return
   5449           aSign ? le128( b.high, b.low, a.high, a.low )
   5450         : le128( a.high, a.low, b.high, b.low );
   5451 
   5452 }
   5453 
   5454 /*
   5455 -------------------------------------------------------------------------------
   5456 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5457 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   5458 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   5459 Standard for Binary Floating-Point Arithmetic.
   5460 -------------------------------------------------------------------------------
   5461 */
   5462 flag float128_lt_quiet( float128 a, float128 b )
   5463 {
   5464     flag aSign, bSign;
   5465 
   5466     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5467               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5468          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5469               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5470        ) {
   5471         if (    float128_is_signaling_nan( a )
   5472              || float128_is_signaling_nan( b ) ) {
   5473             float_raise( float_flag_invalid );
   5474         }
   5475         return 0;
   5476     }
   5477     aSign = extractFloat128Sign( a );
   5478     bSign = extractFloat128Sign( b );
   5479     if ( aSign != bSign ) {
   5480         return
   5481                aSign
   5482             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5483                  != 0 );
   5484     }
   5485     return
   5486           aSign ? lt128( b.high, b.low, a.high, a.low )
   5487         : lt128( a.high, a.low, b.high, b.low );
   5488 
   5489 }
   5490 
   5491 #endif
   5492 
   5493 
   5494 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
   5495 
   5496 /*
   5497  * These two routines are not part of the original softfloat distribution.
   5498  *
   5499  * They are based on the corresponding conversions to integer but return
   5500  * unsigned numbers instead since these functions are required by GCC.
   5501  *
   5502  * Added by Mark Brinicombe <mark (at) NetBSD.org>	27/09/97
   5503  *
   5504  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
   5505  */
   5506 
   5507 /*
   5508 -------------------------------------------------------------------------------
   5509 Returns the result of converting the double-precision floating-point value
   5510 `a' to the 32-bit unsigned integer format.  The conversion is
   5511 performed according to the IEC/IEEE Standard for Binary Floating-point
   5512 Arithmetic, except that the conversion is always rounded toward zero.  If
   5513 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5514 overflows, the largest integer positive is returned.
   5515 -------------------------------------------------------------------------------
   5516 */
   5517 uint32 float64_to_uint32_round_to_zero( float64 a )
   5518 {
   5519     flag aSign;
   5520     int16 aExp, shiftCount;
   5521     bits64 aSig, savedASig;
   5522     uint32 z;
   5523 
   5524     aSig = extractFloat64Frac( a );
   5525     aExp = extractFloat64Exp( a );
   5526     aSign = extractFloat64Sign( a );
   5527 
   5528     if (aSign) {
   5529         float_raise( float_flag_invalid );
   5530     	return(0);
   5531     }
   5532 
   5533     if ( 0x41E < aExp ) {
   5534         float_raise( float_flag_invalid );
   5535         return 0xffffffff;
   5536     }
   5537     else if ( aExp < 0x3FF ) {
   5538         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
   5539         return 0;
   5540     }
   5541     aSig |= LIT64( 0x0010000000000000 );
   5542     shiftCount = 0x433 - aExp;
   5543     savedASig = aSig;
   5544     aSig >>= shiftCount;
   5545     z = (uint32)aSig;
   5546     if ( ( aSig<<shiftCount ) != savedASig ) {
   5547         float_exception_flags |= float_flag_inexact;
   5548     }
   5549     return z;
   5550 
   5551 }
   5552 
   5553 /*
   5554 -------------------------------------------------------------------------------
   5555 Returns the result of converting the single-precision floating-point value
   5556 `a' to the 32-bit unsigned integer format.  The conversion is
   5557 performed according to the IEC/IEEE Standard for Binary Floating-point
   5558 Arithmetic, except that the conversion is always rounded toward zero.  If
   5559 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5560 overflows, the largest positive integer is returned.
   5561 -------------------------------------------------------------------------------
   5562 */
   5563 uint32 float32_to_uint32_round_to_zero( float32 a )
   5564 {
   5565     flag aSign;
   5566     int16 aExp, shiftCount;
   5567     bits32 aSig;
   5568     uint32 z;
   5569 
   5570     aSig = extractFloat32Frac( a );
   5571     aExp = extractFloat32Exp( a );
   5572     aSign = extractFloat32Sign( a );
   5573     shiftCount = aExp - 0x9E;
   5574 
   5575     if (aSign) {
   5576         float_raise( float_flag_invalid );
   5577     	return(0);
   5578     }
   5579     if ( 0 < shiftCount ) {
   5580         float_raise( float_flag_invalid );
   5581         return 0xFFFFFFFF;
   5582     }
   5583     else if ( aExp <= 0x7E ) {
   5584         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
   5585         return 0;
   5586     }
   5587     aSig = ( aSig | 0x800000 )<<8;
   5588     z = aSig>>( - shiftCount );
   5589     if ( aSig<<( shiftCount & 31 ) ) {
   5590         float_exception_flags |= float_flag_inexact;
   5591     }
   5592     return z;
   5593 
   5594 }
   5595 
   5596 #endif
   5597