Home | History | Annotate | Line # | Download | only in bits64
      1 /* $NetBSD: softfloat.c,v 1.18 2025/09/29 02:50:20 nat Exp $ */
      2 
      3 /*
      4  * This version hacked for use with gcc -msoft-float by bjh21.
      5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  *  itself).
      7  */
      8 
      9 /*
     10  * Things you may want to define:
     11  *
     12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  *   properly renamed.
     15  */
     16 
     17 /*
     18 ===============================================================================
     19 
     20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21 Arithmetic Package, Release 2a.
     22 
     23 Written by John R. Hauser.  This work was made possible in part by the
     24 International Computer Science Institute, located at Suite 600, 1947 Center
     25 Street, Berkeley, California 94704.  Funding was partially provided by the
     26 National Science Foundation under grant MIP-9311980.  The original version
     27 of this code was written as part of a project to build a fixed-point vector
     28 processor in collaboration with the University of California at Berkeley,
     29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31 arithmetic/SoftFloat.html'.
     32 
     33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38 
     39 Derivative works are acceptable, even for commercial purposes, so long as
     40 (1) they include prominent notice that the work is derivative, and (2) they
     41 include prominent notice akin to these four paragraphs for those parts of
     42 this code that are retained.
     43 
     44 ===============================================================================
     45 */
     46 
     47 #include <sys/cdefs.h>
     48 #if defined(LIBC_SCCS) && !defined(lint)
     49 __RCSID("$NetBSD: softfloat.c,v 1.18 2025/09/29 02:50:20 nat Exp $");
     50 #endif /* LIBC_SCCS and not lint */
     51 
     52 #ifdef SOFTFLOAT_FOR_GCC
     53 #include "softfloat-for-gcc.h"
     54 #endif
     55 
     56 #include "milieu.h"
     57 #include "softfloat.h"
     58 
     59 /*
     60  * Conversions between floats as stored in memory and floats as
     61  * SoftFloat uses them
     62  */
     63 #ifndef FLOAT64_DEMANGLE
     64 #define FLOAT64_DEMANGLE(a)	(a)
     65 #endif
     66 #ifndef FLOAT64_MANGLE
     67 #define FLOAT64_MANGLE(a)	(a)
     68 #endif
     69 
     70 #ifndef X80SHIFT
     71 #define X80SHIFT		0
     72 #endif
     73 /*
     74 -------------------------------------------------------------------------------
     75 Floating-point rounding mode, extended double-precision rounding precision,
     76 and exception flags.
     77 -------------------------------------------------------------------------------
     78 */
     79 #ifndef set_float_rounding_mode
     80 fp_rnd float_rounding_mode = float_round_nearest_even;
     81 fp_except float_exception_flags = 0;
     82 #endif
     83 #ifndef set_float_exception_inexact_flag
     84 #define	set_float_exception_inexact_flag() \
     85 	((void)(float_exception_flags |= float_flag_inexact))
     86 #endif
     87 #ifdef FLOATX80
     88 int8 floatx80_rounding_precision = 80;
     89 #endif
     90 
     91 /*
     92 -------------------------------------------------------------------------------
     93 Primitive arithmetic functions, including multi-word arithmetic, and
     94 division and square root approximations.  (Can be specialized to target if
     95 desired.)
     96 -------------------------------------------------------------------------------
     97 */
     98 #include "softfloat-macros"
     99 
    100 /*
    101 -------------------------------------------------------------------------------
    102 Functions and definitions to determine:  (1) whether tininess for underflow
    103 is detected before or after rounding by default, (2) what (if anything)
    104 happens when exceptions are raised, (3) how signaling NaNs are distinguished
    105 from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    106 are propagated from function inputs to output.  These details are target-
    107 specific.
    108 -------------------------------------------------------------------------------
    109 */
    110 #include "softfloat-specialize"
    111 
    112 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    113 /*
    114 -------------------------------------------------------------------------------
    115 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
    116 and 7, and returns the properly rounded 32-bit integer corresponding to the
    117 input.  If `zSign' is 1, the input is negated before being converted to an
    118 integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
    119 is simply rounded to an integer, with the inexact exception raised if the
    120 input cannot be represented exactly as an integer.  However, if the fixed-
    121 point input is too large, the invalid exception is raised and the largest
    122 positive or negative integer is returned.
    123 -------------------------------------------------------------------------------
    124 */
    125 static int32 roundAndPackInt32( flag zSign, bits64 absZ )
    126 {
    127     int8 roundingMode;
    128     flag roundNearestEven;
    129     int8 roundIncrement, roundBits;
    130     int32 z;
    131 
    132     roundingMode = float_rounding_mode;
    133     roundNearestEven = ( roundingMode == float_round_nearest_even );
    134     roundIncrement = 0x40;
    135     if ( ! roundNearestEven ) {
    136         if ( roundingMode == float_round_to_zero ) {
    137             roundIncrement = 0;
    138         }
    139         else {
    140             roundIncrement = 0x7F;
    141             if ( zSign ) {
    142                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    143             }
    144             else {
    145                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    146             }
    147         }
    148     }
    149     roundBits = (int8)(absZ & 0x7F);
    150     absZ = ( absZ + roundIncrement )>>7;
    151     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    152     z = (int32)absZ;
    153     if ( zSign ) z = - z;
    154     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
    155         float_raise( float_flag_invalid );
    156         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
    157     }
    158     if ( roundBits ) set_float_exception_inexact_flag();
    159     return z;
    160 
    161 }
    162 
    163 /*
    164 -------------------------------------------------------------------------------
    165 Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
    166 `absZ1', with binary point between bits 63 and 64 (between the input words),
    167 and returns the properly rounded 64-bit integer corresponding to the input.
    168 If `zSign' is 1, the input is negated before being converted to an integer.
    169 Ordinarily, the fixed-point input is simply rounded to an integer, with
    170 the inexact exception raised if the input cannot be represented exactly as
    171 an integer.  However, if the fixed-point input is too large, the invalid
    172 exception is raised and the largest positive or negative integer is
    173 returned.
    174 -------------------------------------------------------------------------------
    175 */
    176 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
    177 {
    178     int8 roundingMode;
    179     flag roundNearestEven, increment;
    180     int64 z;
    181 
    182     roundingMode = float_rounding_mode;
    183     roundNearestEven = ( roundingMode == float_round_nearest_even );
    184     increment = ( (sbits64) absZ1 < 0 );
    185     if ( ! roundNearestEven ) {
    186         if ( roundingMode == float_round_to_zero ) {
    187             increment = 0;
    188         }
    189         else {
    190             if ( zSign ) {
    191                 increment = ( roundingMode == float_round_down ) && absZ1;
    192             }
    193             else {
    194                 increment = ( roundingMode == float_round_up ) && absZ1;
    195             }
    196         }
    197     }
    198     if ( increment ) {
    199         ++absZ0;
    200         if ( absZ0 == 0 ) goto overflow;
    201         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
    202     }
    203     z = absZ0;
    204     if ( zSign ) z = - z;
    205     if ( z && ( ( z < 0 ) ^ zSign ) ) {
    206  overflow:
    207         float_raise( float_flag_invalid );
    208         return
    209               zSign ? (sbits64) LIT64( 0x8000000000000000 )
    210             : LIT64( 0x7FFFFFFFFFFFFFFF );
    211     }
    212     if ( absZ1 ) set_float_exception_inexact_flag();
    213     return z;
    214 
    215 }
    216 #endif
    217 
    218 /*
    219 -------------------------------------------------------------------------------
    220 Returns the fraction bits of the single-precision floating-point value `a'.
    221 -------------------------------------------------------------------------------
    222 */
    223 INLINE bits32 extractFloat32Frac( float32 a )
    224 {
    225 
    226     return a & 0x007FFFFF;
    227 
    228 }
    229 
    230 /*
    231 -------------------------------------------------------------------------------
    232 Returns the exponent bits of the single-precision floating-point value `a'.
    233 -------------------------------------------------------------------------------
    234 */
    235 INLINE int16 extractFloat32Exp( float32 a )
    236 {
    237 
    238     return ( a>>23 ) & 0xFF;
    239 
    240 }
    241 
    242 /*
    243 -------------------------------------------------------------------------------
    244 Returns the sign bit of the single-precision floating-point value `a'.
    245 -------------------------------------------------------------------------------
    246 */
    247 INLINE flag extractFloat32Sign( float32 a )
    248 {
    249 
    250     return a>>31;
    251 
    252 }
    253 
    254 /*
    255 -------------------------------------------------------------------------------
    256 Normalizes the subnormal single-precision floating-point value represented
    257 by the denormalized significand `aSig'.  The normalized exponent and
    258 significand are stored at the locations pointed to by `zExpPtr' and
    259 `zSigPtr', respectively.
    260 -------------------------------------------------------------------------------
    261 */
    262 static void
    263  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    264 {
    265     int8 shiftCount;
    266 
    267     shiftCount = countLeadingZeros32( aSig ) - 8;
    268     *zSigPtr = aSig<<shiftCount;
    269     *zExpPtr = 1 - shiftCount;
    270 
    271 }
    272 
    273 /*
    274 -------------------------------------------------------------------------------
    275 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    276 single-precision floating-point value, returning the result.  After being
    277 shifted into the proper positions, the three fields are simply added
    278 together to form the result.  This means that any integer portion of `zSig'
    279 will be added into the exponent.  Since a properly normalized significand
    280 will have an integer portion equal to 1, the `zExp' input should be 1 less
    281 than the desired result exponent whenever `zSig' is a complete, normalized
    282 significand.
    283 -------------------------------------------------------------------------------
    284 */
    285 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    286 {
    287 
    288     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    289 
    290 }
    291 
    292 /*
    293 -------------------------------------------------------------------------------
    294 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    295 and significand `zSig', and returns the proper single-precision floating-
    296 point value corresponding to the abstract input.  Ordinarily, the abstract
    297 value is simply rounded and packed into the single-precision format, with
    298 the inexact exception raised if the abstract input cannot be represented
    299 exactly.  However, if the abstract value is too large, the overflow and
    300 inexact exceptions are raised and an infinity or maximal finite value is
    301 returned.  If the abstract value is too small, the input value is rounded to
    302 a subnormal number, and the underflow and inexact exceptions are raised if
    303 the abstract input cannot be represented exactly as a subnormal single-
    304 precision floating-point number.
    305     The input significand `zSig' has its binary point between bits 30
    306 and 29, which is 7 bits to the left of the usual location.  This shifted
    307 significand must be normalized or smaller.  If `zSig' is not normalized,
    308 `zExp' must be 0; in that case, the result returned is a subnormal number,
    309 and it must not require rounding.  In the usual case that `zSig' is
    310 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    311 The handling of underflow and overflow follows the IEC/IEEE Standard for
    312 Binary Floating-Point Arithmetic.
    313 -------------------------------------------------------------------------------
    314 */
    315 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    316 {
    317     int8 roundingMode;
    318     flag roundNearestEven;
    319     int8 roundIncrement, roundBits;
    320     flag isTiny;
    321 
    322     roundingMode = float_rounding_mode;
    323     roundNearestEven = ( roundingMode == float_round_nearest_even );
    324     roundIncrement = 0x40;
    325     if ( ! roundNearestEven ) {
    326         if ( roundingMode == float_round_to_zero ) {
    327             roundIncrement = 0;
    328         }
    329         else {
    330             roundIncrement = 0x7F;
    331             if ( zSign ) {
    332                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    333             }
    334             else {
    335                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    336             }
    337         }
    338     }
    339     roundBits = zSig & 0x7F;
    340     if ( 0xFD <= (bits16) zExp ) {
    341         if (    ( 0xFD < zExp )
    342              || (    ( zExp == 0xFD )
    343                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    344            ) {
    345             float_raise( float_flag_overflow | float_flag_inexact );
    346             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    347         }
    348         if ( zExp < 0 ) {
    349             isTiny =
    350                    ( float_detect_tininess == float_tininess_before_rounding )
    351                 || ( zExp < -1 )
    352                 || ( zSig + roundIncrement < 0x80000000U );
    353             shift32RightJamming( zSig, - zExp, &zSig );
    354             zExp = 0;
    355             roundBits = zSig & 0x7F;
    356             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    357         }
    358     }
    359     if ( roundBits ) set_float_exception_inexact_flag();
    360     zSig = ( zSig + roundIncrement )>>7;
    361     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    362     if ( zSig == 0 ) zExp = 0;
    363     return packFloat32( zSign, zExp, zSig );
    364 
    365 }
    366 
    367 /*
    368 -------------------------------------------------------------------------------
    369 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    370 and significand `zSig', and returns the proper single-precision floating-
    371 point value corresponding to the abstract input.  This routine is just like
    372 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    373 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    374 floating-point exponent.
    375 -------------------------------------------------------------------------------
    376 */
    377 static float32
    378  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    379 {
    380     int8 shiftCount;
    381 
    382     shiftCount = countLeadingZeros32( zSig ) - 1;
    383     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    384 
    385 }
    386 
    387 /*
    388 -------------------------------------------------------------------------------
    389 Returns the fraction bits of the double-precision floating-point value `a'.
    390 -------------------------------------------------------------------------------
    391 */
    392 INLINE bits64 extractFloat64Frac( float64 a )
    393 {
    394 
    395     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
    396 
    397 }
    398 
    399 /*
    400 -------------------------------------------------------------------------------
    401 Returns the exponent bits of the double-precision floating-point value `a'.
    402 -------------------------------------------------------------------------------
    403 */
    404 INLINE int16 extractFloat64Exp( float64 a )
    405 {
    406 
    407     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    408 
    409 }
    410 
    411 /*
    412 -------------------------------------------------------------------------------
    413 Returns the sign bit of the double-precision floating-point value `a'.
    414 -------------------------------------------------------------------------------
    415 */
    416 INLINE flag extractFloat64Sign( float64 a )
    417 {
    418 
    419     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    420 
    421 }
    422 
    423 /*
    424 -------------------------------------------------------------------------------
    425 Normalizes the subnormal double-precision floating-point value represented
    426 by the denormalized significand `aSig'.  The normalized exponent and
    427 significand are stored at the locations pointed to by `zExpPtr' and
    428 `zSigPtr', respectively.
    429 -------------------------------------------------------------------------------
    430 */
    431 static void
    432  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
    433 {
    434     int8 shiftCount;
    435 
    436     shiftCount = countLeadingZeros64( aSig ) - 11;
    437     *zSigPtr = aSig<<shiftCount;
    438     *zExpPtr = 1 - shiftCount;
    439 
    440 }
    441 
    442 /*
    443 -------------------------------------------------------------------------------
    444 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    445 double-precision floating-point value, returning the result.  After being
    446 shifted into the proper positions, the three fields are simply added
    447 together to form the result.  This means that any integer portion of `zSig'
    448 will be added into the exponent.  Since a properly normalized significand
    449 will have an integer portion equal to 1, the `zExp' input should be 1 less
    450 than the desired result exponent whenever `zSig' is a complete, normalized
    451 significand.
    452 -------------------------------------------------------------------------------
    453 */
    454 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
    455 {
    456 
    457     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    458 			   ( ( (bits64) zExp )<<52 ) + zSig );
    459 
    460 }
    461 
    462 /*
    463 -------------------------------------------------------------------------------
    464 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    465 and significand `zSig', and returns the proper double-precision floating-
    466 point value corresponding to the abstract input.  Ordinarily, the abstract
    467 value is simply rounded and packed into the double-precision format, with
    468 the inexact exception raised if the abstract input cannot be represented
    469 exactly.  However, if the abstract value is too large, the overflow and
    470 inexact exceptions are raised and an infinity or maximal finite value is
    471 returned.  If the abstract value is too small, the input value is rounded to
    472 a subnormal number, and the underflow and inexact exceptions are raised if
    473 the abstract input cannot be represented exactly as a subnormal double-
    474 precision floating-point number.
    475     The input significand `zSig' has its binary point between bits 62
    476 and 61, which is 10 bits to the left of the usual location.  This shifted
    477 significand must be normalized or smaller.  If `zSig' is not normalized,
    478 `zExp' must be 0; in that case, the result returned is a subnormal number,
    479 and it must not require rounding.  In the usual case that `zSig' is
    480 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    481 The handling of underflow and overflow follows the IEC/IEEE Standard for
    482 Binary Floating-Point Arithmetic.
    483 -------------------------------------------------------------------------------
    484 */
    485 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    486 {
    487     int8 roundingMode;
    488     flag roundNearestEven;
    489     int16 roundIncrement, roundBits;
    490     flag isTiny;
    491 
    492     roundingMode = float_rounding_mode;
    493     roundNearestEven = ( roundingMode == float_round_nearest_even );
    494     roundIncrement = 0x200;
    495     if ( ! roundNearestEven ) {
    496         if ( roundingMode == float_round_to_zero ) {
    497             roundIncrement = 0;
    498         }
    499         else {
    500             roundIncrement = 0x3FF;
    501             if ( zSign ) {
    502                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    503             }
    504             else {
    505                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    506             }
    507         }
    508     }
    509     roundBits = (int16)(zSig & 0x3FF);
    510     if ( 0x7FD <= (bits16) zExp ) {
    511         if (    ( 0x7FD < zExp )
    512              || (    ( zExp == 0x7FD )
    513                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
    514            ) {
    515             float_raise( float_flag_overflow | float_flag_inexact );
    516             return FLOAT64_MANGLE(
    517 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
    518 		( roundIncrement == 0 ));
    519         }
    520         if ( zExp < 0 ) {
    521             isTiny =
    522                    ( float_detect_tininess == float_tininess_before_rounding )
    523                 || ( zExp < -1 )
    524                 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
    525             shift64RightJamming( zSig, - zExp, &zSig );
    526             zExp = 0;
    527             roundBits = (int16)(zSig & 0x3FF);
    528             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    529         }
    530     }
    531     if ( roundBits ) set_float_exception_inexact_flag();
    532     zSig = ( zSig + roundIncrement )>>10;
    533     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
    534     if ( zSig == 0 ) zExp = 0;
    535     return packFloat64( zSign, zExp, zSig );
    536 
    537 }
    538 
    539 /*
    540 -------------------------------------------------------------------------------
    541 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    542 and significand `zSig', and returns the proper double-precision floating-
    543 point value corresponding to the abstract input.  This routine is just like
    544 `roundAndPackFloat64' except that `zSig' does not have to be normalized.
    545 Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    546 floating-point exponent.
    547 -------------------------------------------------------------------------------
    548 */
    549 static float64
    550  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    551 {
    552     int8 shiftCount;
    553 
    554     shiftCount = countLeadingZeros64( zSig ) - 1;
    555     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
    556 
    557 }
    558 
    559 #ifdef FLOATX80
    560 
    561 /*
    562 -------------------------------------------------------------------------------
    563 Returns the fraction bits of the extended double-precision floating-point
    564 value `a'.
    565 -------------------------------------------------------------------------------
    566 */
    567 INLINE bits64 extractFloatx80Frac( floatx80 a )
    568 {
    569 
    570     return a.low;
    571 
    572 }
    573 
    574 /*
    575 -------------------------------------------------------------------------------
    576 Returns the exponent bits of the extended double-precision floating-point
    577 value `a'.
    578 -------------------------------------------------------------------------------
    579 */
    580 INLINE int32 extractFloatx80Exp( floatx80 a )
    581 {
    582 
    583     return (a.high>>X80SHIFT) & 0x7FFF;
    584 
    585 }
    586 
    587 /*
    588 -------------------------------------------------------------------------------
    589 Returns the sign bit of the extended double-precision floating-point value
    590 `a'.
    591 -------------------------------------------------------------------------------
    592 */
    593 INLINE flag extractFloatx80Sign( floatx80 a )
    594 {
    595 
    596     return a.high>>(15 + X80SHIFT);
    597 
    598 }
    599 
    600 /*
    601 -------------------------------------------------------------------------------
    602 Normalizes the subnormal extended double-precision floating-point value
    603 represented by the denormalized significand `aSig'.  The normalized exponent
    604 and significand are stored at the locations pointed to by `zExpPtr' and
    605 `zSigPtr', respectively.
    606 -------------------------------------------------------------------------------
    607 */
    608 static void
    609  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
    610 {
    611     int8 shiftCount;
    612 
    613     shiftCount = countLeadingZeros64( aSig );
    614     *zSigPtr = aSig<<shiftCount;
    615     *zExpPtr = 1 - shiftCount;
    616 
    617 }
    618 
    619 /*
    620 -------------------------------------------------------------------------------
    621 Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
    622 extended double-precision floating-point value, returning the result.
    623 -------------------------------------------------------------------------------
    624 */
    625 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
    626 {
    627     floatx80 z;
    628 
    629     z.low = (bits64)zSig;
    630     z.high = (bits32)( ( ( (bits32) zSign )<<15 ) + zExp )<<X80SHIFT;
    631     return z;
    632 
    633 }
    634 
    635 /*
    636 -------------------------------------------------------------------------------
    637 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    638 and extended significand formed by the concatenation of `zSig0' and `zSig1',
    639 and returns the proper extended double-precision floating-point value
    640 corresponding to the abstract input.  Ordinarily, the abstract value is
    641 rounded and packed into the extended double-precision format, with the
    642 inexact exception raised if the abstract input cannot be represented
    643 exactly.  However, if the abstract value is too large, the overflow and
    644 inexact exceptions are raised and an infinity or maximal finite value is
    645 returned.  If the abstract value is too small, the input value is rounded to
    646 a subnormal number, and the underflow and inexact exceptions are raised if
    647 the abstract input cannot be represented exactly as a subnormal extended
    648 double-precision floating-point number.
    649     If `roundingPrecision' is 32 or 64, the result is rounded to the same
    650 number of bits as single or double precision, respectively.  Otherwise, the
    651 result is rounded to the full precision of the extended double-precision
    652 format.
    653     The input significand must be normalized or smaller.  If the input
    654 significand is not normalized, `zExp' must be 0; in that case, the result
    655 returned is a subnormal number, and it must not require rounding.  The
    656 handling of underflow and overflow follows the IEC/IEEE Standard for Binary
    657 Floating-Point Arithmetic.
    658 -------------------------------------------------------------------------------
    659 */
    660 static floatx80
    661  roundAndPackFloatx80(
    662      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    663  )
    664 {
    665     int8 roundingMode;
    666     flag roundNearestEven, increment, isTiny;
    667     uint64 roundIncrement, roundMask, roundBits;
    668 
    669     roundingMode = float_rounding_mode;
    670     roundNearestEven = ( roundingMode == float_round_nearest_even );
    671     if ( roundingPrecision == 80 ) goto precision80;
    672     if ( roundingPrecision == 64 ) {
    673         roundIncrement = LIT64( 0x0000000000000400 );
    674         roundMask = LIT64( 0x00000000000007FF );
    675     }
    676     else if ( roundingPrecision == 32 ) {
    677         roundIncrement = LIT64( 0x0000008000000000 );
    678         roundMask = LIT64( 0x000000FFFFFFFFFF );
    679     }
    680     else {
    681         goto precision80;
    682     }
    683     zSig0 |= ( zSig1 != 0 );
    684     if ( ! roundNearestEven ) {
    685         if ( roundingMode == float_round_to_zero ) {
    686             roundIncrement = 0;
    687         }
    688         else {
    689             roundIncrement = roundMask;
    690             if ( zSign ) {
    691                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    692             }
    693             else {
    694                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    695             }
    696         }
    697     }
    698     roundBits = zSig0 & roundMask;
    699     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    700         if (    ( 0x7FFE < zExp )
    701              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
    702            ) {
    703             goto overflow;
    704         }
    705         if ( zExp <= 0 ) {
    706             isTiny =
    707                    ( float_detect_tininess == float_tininess_before_rounding )
    708                 || ( zExp < 0 )
    709                 || ( zSig0 <= zSig0 + roundIncrement );
    710             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
    711             zExp = 0;
    712             roundBits = zSig0 & roundMask;
    713             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    714             if ( roundBits ) set_float_exception_inexact_flag();
    715             zSig0 += roundIncrement;
    716             if ( (sbits64) zSig0 < 0 ) zExp = 1;
    717             roundIncrement = roundMask + 1;
    718             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    719                 roundMask |= roundIncrement;
    720             }
    721             zSig0 &= ~ roundMask;
    722             return packFloatx80( zSign, zExp, zSig0 );
    723         }
    724     }
    725     if ( roundBits ) set_float_exception_inexact_flag();
    726     zSig0 += roundIncrement;
    727     if ( zSig0 < roundIncrement ) {
    728         ++zExp;
    729         zSig0 = LIT64( 0x8000000000000000 );
    730     }
    731     roundIncrement = roundMask + 1;
    732     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    733         roundMask |= roundIncrement;
    734     }
    735     zSig0 &= ~ roundMask;
    736     if ( zSig0 == 0 ) zExp = 0;
    737     return packFloatx80( zSign, zExp, zSig0 );
    738  precision80:
    739     increment = ( (sbits64) zSig1 < 0 );
    740     if ( ! roundNearestEven ) {
    741         if ( roundingMode == float_round_to_zero ) {
    742             increment = 0;
    743         }
    744         else {
    745             if ( zSign ) {
    746                 increment = ( roundingMode == float_round_down ) && zSig1;
    747             }
    748             else {
    749                 increment = ( roundingMode == float_round_up ) && zSig1;
    750             }
    751         }
    752     }
    753     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    754         if (    ( 0x7FFE < zExp )
    755              || (    ( zExp == 0x7FFE )
    756                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
    757                   && increment
    758                 )
    759            ) {
    760             roundMask = 0;
    761  overflow:
    762             float_raise( float_flag_overflow | float_flag_inexact );
    763             if (    ( roundingMode == float_round_to_zero )
    764                  || ( zSign && ( roundingMode == float_round_up ) )
    765                  || ( ! zSign && ( roundingMode == float_round_down ) )
    766                ) {
    767                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
    768             }
    769             return packFloatx80( zSign, 0x7FFF, 0 );
    770         }
    771         if ( zExp <= 0 ) {
    772             isTiny =
    773                    ( float_detect_tininess == float_tininess_before_rounding )
    774                 || ( zExp < 0 )
    775                 || ! increment
    776                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
    777             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
    778             zExp = 0;
    779             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
    780             if ( zSig1 ) set_float_exception_inexact_flag();
    781             if ( roundNearestEven ) {
    782                 increment = ( (sbits64) zSig1 < 0 );
    783             }
    784             else {
    785                 if ( zSign ) {
    786                     increment = ( roundingMode == float_round_down ) && zSig1;
    787                 }
    788                 else {
    789                     increment = ( roundingMode == float_round_up ) && zSig1;
    790                 }
    791             }
    792             if ( increment ) {
    793                 ++zSig0;
    794                 zSig0 &=
    795                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    796                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
    797             }
    798             return packFloatx80( zSign, zExp, zSig0 );
    799         }
    800     }
    801     if ( zSig1 ) set_float_exception_inexact_flag();
    802     if ( increment ) {
    803         ++zSig0;
    804         if ( zSig0 == 0 ) {
    805             ++zExp;
    806             zSig0 = LIT64( 0x8000000000000000 );
    807         }
    808         else {
    809             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    810         }
    811     }
    812     else {
    813         if ( zSig0 == 0 ) zExp = 0;
    814     }
    815     return packFloatx80( zSign, zExp, zSig0 );
    816 
    817 }
    818 
    819 /*
    820 -------------------------------------------------------------------------------
    821 Takes an abstract floating-point value having sign `zSign', exponent
    822 `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
    823 and returns the proper extended double-precision floating-point value
    824 corresponding to the abstract input.  This routine is just like
    825 `roundAndPackFloatx80' except that the input significand does not have to be
    826 normalized.
    827 -------------------------------------------------------------------------------
    828 */
    829 static floatx80
    830  normalizeRoundAndPackFloatx80(
    831      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    832  )
    833 {
    834     int8 shiftCount;
    835 
    836     if ( zSig0 == 0 ) {
    837         zSig0 = zSig1;
    838         zSig1 = 0;
    839         zExp -= 64;
    840     }
    841     shiftCount = countLeadingZeros64( zSig0 );
    842     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    843     zExp -= shiftCount;
    844     return
    845         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
    846 
    847 }
    848 
    849 #endif
    850 
    851 #ifdef FLOAT128
    852 
    853 /*
    854 -------------------------------------------------------------------------------
    855 Returns the least-significant 64 fraction bits of the quadruple-precision
    856 floating-point value `a'.
    857 -------------------------------------------------------------------------------
    858 */
    859 INLINE bits64 extractFloat128Frac1( float128 a )
    860 {
    861 
    862     return a.low;
    863 
    864 }
    865 
    866 /*
    867 -------------------------------------------------------------------------------
    868 Returns the most-significant 48 fraction bits of the quadruple-precision
    869 floating-point value `a'.
    870 -------------------------------------------------------------------------------
    871 */
    872 INLINE bits64 extractFloat128Frac0( float128 a )
    873 {
    874 
    875     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
    876 
    877 }
    878 
    879 /*
    880 -------------------------------------------------------------------------------
    881 Returns the exponent bits of the quadruple-precision floating-point value
    882 `a'.
    883 -------------------------------------------------------------------------------
    884 */
    885 INLINE int32 extractFloat128Exp( float128 a )
    886 {
    887 
    888     return (int32)((a.high >> 48) & 0x7FFF);
    889 
    890 }
    891 
    892 /*
    893 -------------------------------------------------------------------------------
    894 Returns the sign bit of the quadruple-precision floating-point value `a'.
    895 -------------------------------------------------------------------------------
    896 */
    897 INLINE flag extractFloat128Sign( float128 a )
    898 {
    899 
    900     return (flag)(a.high >> 63);
    901 
    902 }
    903 
    904 /*
    905 -------------------------------------------------------------------------------
    906 Normalizes the subnormal quadruple-precision floating-point value
    907 represented by the denormalized significand formed by the concatenation of
    908 `aSig0' and `aSig1'.  The normalized exponent is stored at the location
    909 pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
    910 significand are stored at the location pointed to by `zSig0Ptr', and the
    911 least significant 64 bits of the normalized significand are stored at the
    912 location pointed to by `zSig1Ptr'.
    913 -------------------------------------------------------------------------------
    914 */
    915 static void
    916  normalizeFloat128Subnormal(
    917      bits64 aSig0,
    918      bits64 aSig1,
    919      int32 *zExpPtr,
    920      bits64 *zSig0Ptr,
    921      bits64 *zSig1Ptr
    922  )
    923 {
    924     int8 shiftCount;
    925 
    926     if ( aSig0 == 0 ) {
    927         shiftCount = countLeadingZeros64( aSig1 ) - 15;
    928         if ( shiftCount < 0 ) {
    929             *zSig0Ptr = aSig1>>( - shiftCount );
    930             *zSig1Ptr = aSig1<<( shiftCount & 63 );
    931         }
    932         else {
    933             *zSig0Ptr = aSig1<<shiftCount;
    934             *zSig1Ptr = 0;
    935         }
    936         *zExpPtr = - shiftCount - 63;
    937     }
    938     else {
    939         shiftCount = countLeadingZeros64( aSig0 ) - 15;
    940         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    941         *zExpPtr = 1 - shiftCount;
    942     }
    943 
    944 }
    945 
    946 /*
    947 -------------------------------------------------------------------------------
    948 Packs the sign `zSign', the exponent `zExp', and the significand formed
    949 by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
    950 floating-point value, returning the result.  After being shifted into the
    951 proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
    952 added together to form the most significant 32 bits of the result.  This
    953 means that any integer portion of `zSig0' will be added into the exponent.
    954 Since a properly normalized significand will have an integer portion equal
    955 to 1, the `zExp' input should be 1 less than the desired result exponent
    956 whenever `zSig0' and `zSig1' concatenated form a complete, normalized
    957 significand.
    958 -------------------------------------------------------------------------------
    959 */
    960 INLINE float128
    961  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
    962 {
    963     float128 z;
    964 
    965     z.low = zSig1;
    966     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    967     return z;
    968 
    969 }
    970 
    971 /*
    972 -------------------------------------------------------------------------------
    973 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    974 and extended significand formed by the concatenation of `zSig0', `zSig1',
    975 and `zSig2', and returns the proper quadruple-precision floating-point value
    976 corresponding to the abstract input.  Ordinarily, the abstract value is
    977 simply rounded and packed into the quadruple-precision format, with the
    978 inexact exception raised if the abstract input cannot be represented
    979 exactly.  However, if the abstract value is too large, the overflow and
    980 inexact exceptions are raised and an infinity or maximal finite value is
    981 returned.  If the abstract value is too small, the input value is rounded to
    982 a subnormal number, and the underflow and inexact exceptions are raised if
    983 the abstract input cannot be represented exactly as a subnormal quadruple-
    984 precision floating-point number.
    985     The input significand must be normalized or smaller.  If the input
    986 significand is not normalized, `zExp' must be 0; in that case, the result
    987 returned is a subnormal number, and it must not require rounding.  In the
    988 usual case that the input significand is normalized, `zExp' must be 1 less
    989 than the ``true'' floating-point exponent.  The handling of underflow and
    990 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    991 -------------------------------------------------------------------------------
    992 */
    993 static float128
    994  roundAndPackFloat128(
    995      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
    996 {
    997     int8 roundingMode;
    998     flag roundNearestEven, increment, isTiny;
    999 
   1000     roundingMode = float_rounding_mode;
   1001     roundNearestEven = ( roundingMode == float_round_nearest_even );
   1002     increment = ( (sbits64) zSig2 < 0 );
   1003     if ( ! roundNearestEven ) {
   1004         if ( roundingMode == float_round_to_zero ) {
   1005             increment = 0;
   1006         }
   1007         else {
   1008             if ( zSign ) {
   1009                 increment = ( roundingMode == float_round_down ) && zSig2;
   1010             }
   1011             else {
   1012                 increment = ( roundingMode == float_round_up ) && zSig2;
   1013             }
   1014         }
   1015     }
   1016     if ( 0x7FFD <= (bits32) zExp ) {
   1017         if (    ( 0x7FFD < zExp )
   1018              || (    ( zExp == 0x7FFD )
   1019                   && eq128(
   1020                          LIT64( 0x0001FFFFFFFFFFFF ),
   1021                          LIT64( 0xFFFFFFFFFFFFFFFF ),
   1022                          zSig0,
   1023                          zSig1
   1024                      )
   1025                   && increment
   1026                 )
   1027            ) {
   1028             float_raise( float_flag_overflow | float_flag_inexact );
   1029             if (    ( roundingMode == float_round_to_zero )
   1030                  || ( zSign && ( roundingMode == float_round_up ) )
   1031                  || ( ! zSign && ( roundingMode == float_round_down ) )
   1032                ) {
   1033                 return
   1034                     packFloat128(
   1035                         zSign,
   1036                         0x7FFE,
   1037                         LIT64( 0x0000FFFFFFFFFFFF ),
   1038                         LIT64( 0xFFFFFFFFFFFFFFFF )
   1039                     );
   1040             }
   1041             return packFloat128( zSign, 0x7FFF, 0, 0 );
   1042         }
   1043         if ( zExp < 0 ) {
   1044             isTiny =
   1045                    ( float_detect_tininess == float_tininess_before_rounding )
   1046                 || ( zExp < -1 )
   1047                 || ! increment
   1048                 || lt128(
   1049                        zSig0,
   1050                        zSig1,
   1051                        LIT64( 0x0001FFFFFFFFFFFF ),
   1052                        LIT64( 0xFFFFFFFFFFFFFFFF )
   1053                    );
   1054             shift128ExtraRightJamming(
   1055                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
   1056             zExp = 0;
   1057             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
   1058             if ( roundNearestEven ) {
   1059                 increment = ( (sbits64) zSig2 < 0 );
   1060             }
   1061             else {
   1062                 if ( zSign ) {
   1063                     increment = ( roundingMode == float_round_down ) && zSig2;
   1064                 }
   1065                 else {
   1066                     increment = ( roundingMode == float_round_up ) && zSig2;
   1067                 }
   1068             }
   1069         }
   1070     }
   1071     if ( zSig2 ) set_float_exception_inexact_flag();
   1072     if ( increment ) {
   1073         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
   1074         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
   1075     }
   1076     else {
   1077         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
   1078     }
   1079     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1080 
   1081 }
   1082 
   1083 /*
   1084 -------------------------------------------------------------------------------
   1085 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   1086 and significand formed by the concatenation of `zSig0' and `zSig1', and
   1087 returns the proper quadruple-precision floating-point value corresponding
   1088 to the abstract input.  This routine is just like `roundAndPackFloat128'
   1089 except that the input significand has fewer bits and does not have to be
   1090 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
   1091 point exponent.
   1092 -------------------------------------------------------------------------------
   1093 */
   1094 static float128
   1095  normalizeRoundAndPackFloat128(
   1096      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
   1097 {
   1098     int8 shiftCount;
   1099     bits64 zSig2;
   1100 
   1101     if ( zSig0 == 0 ) {
   1102         zSig0 = zSig1;
   1103         zSig1 = 0;
   1104         zExp -= 64;
   1105     }
   1106     shiftCount = countLeadingZeros64( zSig0 ) - 15;
   1107     if ( 0 <= shiftCount ) {
   1108         zSig2 = 0;
   1109         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1110     }
   1111     else {
   1112         shift128ExtraRightJamming(
   1113             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
   1114     }
   1115     zExp -= shiftCount;
   1116     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   1117 
   1118 }
   1119 
   1120 #endif
   1121 
   1122 /*
   1123 -------------------------------------------------------------------------------
   1124 Returns the result of converting the 32-bit two's complement integer `a'
   1125 to the single-precision floating-point format.  The conversion is performed
   1126 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1127 -------------------------------------------------------------------------------
   1128 */
   1129 float32 int32_to_float32( int32 a )
   1130 {
   1131     flag zSign;
   1132 
   1133     if ( a == 0 ) return 0;
   1134     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
   1135     zSign = ( a < 0 );
   1136     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
   1137 
   1138 }
   1139 
   1140 float32 uint32_to_float32( uint32 a )
   1141 {
   1142     if ( a == 0 ) return 0;
   1143     if ( a & (bits32) 0x80000000 )
   1144 	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
   1145     return normalizeRoundAndPackFloat32( 0, 0x9C, a );
   1146 }
   1147 
   1148 
   1149 /*
   1150 -------------------------------------------------------------------------------
   1151 Returns the result of converting the 32-bit two's complement integer `a'
   1152 to the double-precision floating-point format.  The conversion is performed
   1153 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1154 -------------------------------------------------------------------------------
   1155 */
   1156 float64 int32_to_float64( int32 a )
   1157 {
   1158     flag zSign;
   1159     uint32 absA;
   1160     int8 shiftCount;
   1161     bits64 zSig;
   1162 
   1163     if ( a == 0 ) return 0;
   1164     zSign = ( a < 0 );
   1165     absA = zSign ? - a : a;
   1166     shiftCount = countLeadingZeros32( absA ) + 21;
   1167     zSig = absA;
   1168     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
   1169 
   1170 }
   1171 
   1172 float64 uint32_to_float64( uint32 a )
   1173 {
   1174     int8 shiftCount;
   1175     bits64 zSig = a;
   1176 
   1177     if ( a == 0 ) return 0;
   1178     shiftCount = countLeadingZeros32( a ) + 21;
   1179     return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
   1180 
   1181 }
   1182 
   1183 #ifdef FLOATX80
   1184 
   1185 /*
   1186 -------------------------------------------------------------------------------
   1187 Returns the result of converting the 32-bit two's complement integer `a'
   1188 to the extended double-precision floating-point format.  The conversion
   1189 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1190 Arithmetic.
   1191 -------------------------------------------------------------------------------
   1192 */
   1193 floatx80 int32_to_floatx80( int32 a )
   1194 {
   1195     flag zSign;
   1196     uint32 absA;
   1197     int8 shiftCount;
   1198     bits64 zSig;
   1199 
   1200     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1201     zSign = ( a < 0 );
   1202     absA = zSign ? - a : a;
   1203     shiftCount = countLeadingZeros32( absA ) + 32;
   1204     zSig = absA;
   1205     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
   1206 
   1207 }
   1208 
   1209 floatx80 uint32_to_floatx80( uint32 a )
   1210 {
   1211     int8 shiftCount;
   1212     bits64 zSig = a;
   1213 
   1214     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1215     shiftCount = countLeadingZeros32( a ) + 32;
   1216     return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
   1217 
   1218 }
   1219 
   1220 #endif
   1221 
   1222 #ifdef FLOAT128
   1223 
   1224 /*
   1225 -------------------------------------------------------------------------------
   1226 Returns the result of converting the 32-bit two's complement integer `a' to
   1227 the quadruple-precision floating-point format.  The conversion is performed
   1228 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1229 -------------------------------------------------------------------------------
   1230 */
   1231 float128 int32_to_float128( int32 a )
   1232 {
   1233     flag zSign;
   1234     uint32 absA;
   1235     int8 shiftCount;
   1236     bits64 zSig0;
   1237 
   1238     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1239     zSign = ( a < 0 );
   1240     absA = zSign ? - a : a;
   1241     shiftCount = countLeadingZeros32( absA ) + 17;
   1242     zSig0 = absA;
   1243     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1244 
   1245 }
   1246 
   1247 float128 uint32_to_float128( uint32 a )
   1248 {
   1249     int8 shiftCount;
   1250     bits64 zSig0 = a;
   1251 
   1252     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1253     shiftCount = countLeadingZeros32( a ) + 17;
   1254     return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1255 
   1256 }
   1257 
   1258 #endif
   1259 
   1260 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
   1261 /*
   1262 -------------------------------------------------------------------------------
   1263 Returns the result of converting the 64-bit two's complement integer `a'
   1264 to the single-precision floating-point format.  The conversion is performed
   1265 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1266 -------------------------------------------------------------------------------
   1267 */
   1268 float32 int64_to_float32( int64 a )
   1269 {
   1270     flag zSign;
   1271     uint64 absA;
   1272     int8 shiftCount;
   1273 
   1274     if ( a == 0 ) return 0;
   1275     zSign = ( a < 0 );
   1276     absA = zSign ? - a : a;
   1277     shiftCount = countLeadingZeros64( absA ) - 40;
   1278     if ( 0 <= shiftCount ) {
   1279         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
   1280     }
   1281     else {
   1282         shiftCount += 7;
   1283         if ( shiftCount < 0 ) {
   1284             shift64RightJamming( absA, - shiftCount, &absA );
   1285         }
   1286         else {
   1287             absA <<= shiftCount;
   1288         }
   1289         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
   1290     }
   1291 
   1292 }
   1293 
   1294 /*
   1295 -------------------------------------------------------------------------------
   1296 Returns the result of converting the 64-bit two's complement integer `a'
   1297 to the double-precision floating-point format.  The conversion is performed
   1298 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1299 -------------------------------------------------------------------------------
   1300 */
   1301 float64 int64_to_float64( int64 a )
   1302 {
   1303     flag zSign;
   1304 
   1305     if ( a == 0 ) return 0;
   1306     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
   1307         return packFloat64( 1, 0x43E, 0 );
   1308     }
   1309     zSign = ( a < 0 );
   1310     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
   1311 
   1312 }
   1313 
   1314 #ifdef FLOATX80
   1315 
   1316 /*
   1317 -------------------------------------------------------------------------------
   1318 Returns the result of converting the 64-bit two's complement integer `a'
   1319 to the extended double-precision floating-point format.  The conversion
   1320 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1321 Arithmetic.
   1322 -------------------------------------------------------------------------------
   1323 */
   1324 floatx80 int64_to_floatx80( int64 a )
   1325 {
   1326     flag zSign;
   1327     uint64 absA;
   1328     int8 shiftCount;
   1329 
   1330     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1331     zSign = ( a < 0 );
   1332     absA = zSign ? - a : a;
   1333     shiftCount = countLeadingZeros64( absA );
   1334     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
   1335 
   1336 }
   1337 
   1338 #endif
   1339 
   1340 #endif /* !SOFTFLOAT_FOR_GCC */
   1341 
   1342 #ifdef FLOAT128
   1343 
   1344 /*
   1345 -------------------------------------------------------------------------------
   1346 Returns the result of converting the 64-bit two's complement integer `a' to
   1347 the quadruple-precision floating-point format.  The conversion is performed
   1348 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1349 -------------------------------------------------------------------------------
   1350 */
   1351 float128 int64_to_float128( int64 a )
   1352 {
   1353     flag zSign;
   1354     uint64 absA;
   1355     int8 shiftCount;
   1356     int32 zExp;
   1357     bits64 zSig0, zSig1;
   1358 
   1359     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1360     zSign = ( a < 0 );
   1361     absA = zSign ? - a : a;
   1362     shiftCount = countLeadingZeros64( absA ) + 49;
   1363     zExp = 0x406E - shiftCount;
   1364     if ( 64 <= shiftCount ) {
   1365         zSig1 = 0;
   1366         zSig0 = absA;
   1367         shiftCount -= 64;
   1368     }
   1369     else {
   1370         zSig1 = absA;
   1371         zSig0 = 0;
   1372     }
   1373     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1374     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1375 
   1376 }
   1377 
   1378 #endif
   1379 
   1380 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1381 /*
   1382 -------------------------------------------------------------------------------
   1383 Returns the result of converting the single-precision floating-point value
   1384 `a' to the 32-bit two's complement integer format.  The conversion is
   1385 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1386 Arithmetic---which means in particular that the conversion is rounded
   1387 according to the current rounding mode.  If `a' is a NaN, the largest
   1388 positive integer is returned.  Otherwise, if the conversion overflows, the
   1389 largest integer with the same sign as `a' is returned.
   1390 -------------------------------------------------------------------------------
   1391 */
   1392 int32 float32_to_int32( float32 a )
   1393 {
   1394     flag aSign;
   1395     int16 aExp, shiftCount;
   1396     bits32 aSig;
   1397     bits64 aSig64;
   1398 
   1399     aSig = extractFloat32Frac( a );
   1400     aExp = extractFloat32Exp( a );
   1401     aSign = extractFloat32Sign( a );
   1402     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
   1403     if ( aExp ) aSig |= 0x00800000;
   1404     shiftCount = 0xAF - aExp;
   1405     aSig64 = aSig;
   1406     aSig64 <<= 32;
   1407     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
   1408     return roundAndPackInt32( aSign, aSig64 );
   1409 
   1410 }
   1411 #endif /* !SOFTFLOAT_FOR_GCC */
   1412 
   1413 /*
   1414 -------------------------------------------------------------------------------
   1415 Returns the result of converting the single-precision floating-point value
   1416 `a' to the 32-bit two's complement integer format.  The conversion is
   1417 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1418 Arithmetic, except that the conversion is always rounded toward zero.
   1419 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1420 the conversion overflows, the largest integer with the same sign as `a' is
   1421 returned.
   1422 -------------------------------------------------------------------------------
   1423 */
   1424 int32 float32_to_int32_round_to_zero( float32 a )
   1425 {
   1426     flag aSign;
   1427     int16 aExp, shiftCount;
   1428     bits32 aSig;
   1429     int32 z;
   1430 
   1431     aSig = extractFloat32Frac( a );
   1432     aExp = extractFloat32Exp( a );
   1433     aSign = extractFloat32Sign( a );
   1434     shiftCount = aExp - 0x9E;
   1435     if ( 0 <= shiftCount ) {
   1436         if ( a != 0xCF000000 ) {
   1437             float_raise( float_flag_invalid );
   1438             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
   1439         }
   1440         return (sbits32) 0x80000000;
   1441     }
   1442     else if ( aExp <= 0x7E ) {
   1443         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1444         return 0;
   1445     }
   1446     aSig = ( aSig | 0x00800000 )<<8;
   1447     z = aSig>>( - shiftCount );
   1448     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
   1449         set_float_exception_inexact_flag();
   1450     }
   1451     if ( aSign ) z = - z;
   1452     return z;
   1453 
   1454 }
   1455 
   1456 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
   1457 /*
   1458 -------------------------------------------------------------------------------
   1459 Returns the result of converting the single-precision floating-point value
   1460 `a' to the 64-bit two's complement integer format.  The conversion is
   1461 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1462 Arithmetic---which means in particular that the conversion is rounded
   1463 according to the current rounding mode.  If `a' is a NaN, the largest
   1464 positive integer is returned.  Otherwise, if the conversion overflows, the
   1465 largest integer with the same sign as `a' is returned.
   1466 -------------------------------------------------------------------------------
   1467 */
   1468 int64 float32_to_int64( float32 a )
   1469 {
   1470     flag aSign;
   1471     int16 aExp, shiftCount;
   1472     bits32 aSig;
   1473     bits64 aSig64, aSigExtra;
   1474 
   1475     aSig = extractFloat32Frac( a );
   1476     aExp = extractFloat32Exp( a );
   1477     aSign = extractFloat32Sign( a );
   1478     shiftCount = 0xBE - aExp;
   1479     if ( shiftCount < 0 ) {
   1480         float_raise( float_flag_invalid );
   1481         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1482             return LIT64( 0x7FFFFFFFFFFFFFFF );
   1483         }
   1484         return (sbits64) LIT64( 0x8000000000000000 );
   1485     }
   1486     if ( aExp ) aSig |= 0x00800000;
   1487     aSig64 = aSig;
   1488     aSig64 <<= 40;
   1489     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
   1490     return roundAndPackInt64( aSign, aSig64, aSigExtra );
   1491 
   1492 }
   1493 
   1494 /*
   1495 -------------------------------------------------------------------------------
   1496 Returns the result of converting the single-precision floating-point value
   1497 `a' to the 64-bit two's complement integer format.  The conversion is
   1498 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1499 Arithmetic, except that the conversion is always rounded toward zero.  If
   1500 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   1501 conversion overflows, the largest integer with the same sign as `a' is
   1502 returned.
   1503 -------------------------------------------------------------------------------
   1504 */
   1505 int64 float32_to_int64_round_to_zero( float32 a )
   1506 {
   1507     flag aSign;
   1508     int16 aExp, shiftCount;
   1509     bits32 aSig;
   1510     bits64 aSig64;
   1511     int64 z;
   1512 
   1513     aSig = extractFloat32Frac( a );
   1514     aExp = extractFloat32Exp( a );
   1515     aSign = extractFloat32Sign( a );
   1516     shiftCount = aExp - 0xBE;
   1517     if ( 0 <= shiftCount ) {
   1518         if ( a != 0xDF000000 ) {
   1519             float_raise( float_flag_invalid );
   1520             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1521                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   1522             }
   1523         }
   1524         return (sbits64) LIT64( 0x8000000000000000 );
   1525     }
   1526     else if ( aExp <= 0x7E ) {
   1527         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1528         return 0;
   1529     }
   1530     aSig64 = aSig | 0x00800000;
   1531     aSig64 <<= 40;
   1532     z = aSig64>>( - shiftCount );
   1533     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
   1534         set_float_exception_inexact_flag();
   1535     }
   1536     if ( aSign ) z = - z;
   1537     return z;
   1538 
   1539 }
   1540 #endif /* !SOFTFLOAT_FOR_GCC */
   1541 
   1542 /*
   1543 -------------------------------------------------------------------------------
   1544 Returns the result of converting the single-precision floating-point value
   1545 `a' to the double-precision floating-point format.  The conversion is
   1546 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1547 Arithmetic.
   1548 -------------------------------------------------------------------------------
   1549 */
   1550 float64 float32_to_float64( float32 a )
   1551 {
   1552     flag aSign;
   1553     int16 aExp;
   1554     bits32 aSig;
   1555 
   1556     aSig = extractFloat32Frac( a );
   1557     aExp = extractFloat32Exp( a );
   1558     aSign = extractFloat32Sign( a );
   1559     if ( aExp == 0xFF ) {
   1560         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
   1561         return packFloat64( aSign, 0x7FF, 0 );
   1562     }
   1563     if ( aExp == 0 ) {
   1564         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
   1565         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1566         --aExp;
   1567     }
   1568     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
   1569 
   1570 }
   1571 
   1572 #ifdef FLOATX80
   1573 
   1574 /*
   1575 -------------------------------------------------------------------------------
   1576 Returns the result of converting the single-precision floating-point value
   1577 `a' to the extended double-precision floating-point format.  The conversion
   1578 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1579 Arithmetic.
   1580 -------------------------------------------------------------------------------
   1581 */
   1582 floatx80 float32_to_floatx80( float32 a )
   1583 {
   1584     flag aSign;
   1585     int16 aExp;
   1586     bits32 aSig;
   1587 
   1588     aSig = extractFloat32Frac( a );
   1589     aExp = extractFloat32Exp( a );
   1590     aSign = extractFloat32Sign( a );
   1591     if ( aExp == 0xFF ) {
   1592         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
   1593         return packFloatx80( aSign, 0x7FFF, 0 );
   1594     }
   1595     if ( aExp == 0 ) {
   1596         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   1597         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1598     }
   1599     aSig |= 0x00800000;
   1600     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
   1601 
   1602 }
   1603 
   1604 #endif
   1605 
   1606 #ifdef FLOAT128
   1607 
   1608 /*
   1609 -------------------------------------------------------------------------------
   1610 Returns the result of converting the single-precision floating-point value
   1611 `a' to the double-precision floating-point format.  The conversion is
   1612 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1613 Arithmetic.
   1614 -------------------------------------------------------------------------------
   1615 */
   1616 float128 float32_to_float128( float32 a )
   1617 {
   1618     flag aSign;
   1619     int16 aExp;
   1620     bits32 aSig;
   1621 
   1622     aSig = extractFloat32Frac( a );
   1623     aExp = extractFloat32Exp( a );
   1624     aSign = extractFloat32Sign( a );
   1625     if ( aExp == 0xFF ) {
   1626         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
   1627         return packFloat128( aSign, 0x7FFF, 0, 0 );
   1628     }
   1629     if ( aExp == 0 ) {
   1630         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   1631         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1632         --aExp;
   1633     }
   1634     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
   1635 
   1636 }
   1637 
   1638 #endif
   1639 
   1640 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1641 /*
   1642 -------------------------------------------------------------------------------
   1643 Rounds the single-precision floating-point value `a' to an integer, and
   1644 returns the result as a single-precision floating-point value.  The
   1645 operation is performed according to the IEC/IEEE Standard for Binary
   1646 Floating-Point Arithmetic.
   1647 -------------------------------------------------------------------------------
   1648 */
   1649 float32 float32_round_to_int( float32 a )
   1650 {
   1651     flag aSign;
   1652     int16 aExp;
   1653     bits32 lastBitMask, roundBitsMask;
   1654     int8 roundingMode;
   1655     float32 z;
   1656 
   1657     aExp = extractFloat32Exp( a );
   1658     if ( 0x96 <= aExp ) {
   1659         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
   1660             return propagateFloat32NaN( a, a );
   1661         }
   1662         return a;
   1663     }
   1664     if ( aExp <= 0x7E ) {
   1665         if ( (bits32) ( a<<1 ) == 0 ) return a;
   1666         set_float_exception_inexact_flag();
   1667         aSign = extractFloat32Sign( a );
   1668         switch ( float_rounding_mode ) {
   1669          case float_round_nearest_even:
   1670             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
   1671                 return packFloat32( aSign, 0x7F, 0 );
   1672             }
   1673             break;
   1674 	 case float_round_to_zero:
   1675 	    break;
   1676          case float_round_down:
   1677             return aSign ? 0xBF800000 : 0;
   1678          case float_round_up:
   1679             return aSign ? 0x80000000 : 0x3F800000;
   1680         }
   1681         return packFloat32( aSign, 0, 0 );
   1682     }
   1683     lastBitMask = 1;
   1684     lastBitMask <<= 0x96 - aExp;
   1685     roundBitsMask = lastBitMask - 1;
   1686     z = a;
   1687     roundingMode = float_rounding_mode;
   1688     if ( roundingMode == float_round_nearest_even ) {
   1689         z += lastBitMask>>1;
   1690         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   1691     }
   1692     else if ( roundingMode != float_round_to_zero ) {
   1693         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   1694             z += roundBitsMask;
   1695         }
   1696     }
   1697     z &= ~ roundBitsMask;
   1698     if ( z != a ) set_float_exception_inexact_flag();
   1699     return z;
   1700 
   1701 }
   1702 #endif /* !SOFTFLOAT_FOR_GCC */
   1703 
   1704 /*
   1705 -------------------------------------------------------------------------------
   1706 Returns the result of adding the absolute values of the single-precision
   1707 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1708 before being returned.  `zSign' is ignored if the result is a NaN.
   1709 The addition is performed according to the IEC/IEEE Standard for Binary
   1710 Floating-Point Arithmetic.
   1711 -------------------------------------------------------------------------------
   1712 */
   1713 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
   1714 {
   1715     int16 aExp, bExp, zExp;
   1716     bits32 aSig, bSig, zSig;
   1717     int16 expDiff;
   1718 
   1719     aSig = extractFloat32Frac( a );
   1720     aExp = extractFloat32Exp( a );
   1721     bSig = extractFloat32Frac( b );
   1722     bExp = extractFloat32Exp( b );
   1723     expDiff = aExp - bExp;
   1724     aSig <<= 6;
   1725     bSig <<= 6;
   1726     if ( 0 < expDiff ) {
   1727         if ( aExp == 0xFF ) {
   1728             if ( aSig ) return propagateFloat32NaN( a, b );
   1729             return a;
   1730         }
   1731         if ( bExp == 0 ) {
   1732             --expDiff;
   1733         }
   1734         else {
   1735             bSig |= 0x20000000;
   1736         }
   1737         shift32RightJamming( bSig, expDiff, &bSig );
   1738         zExp = aExp;
   1739     }
   1740     else if ( expDiff < 0 ) {
   1741         if ( bExp == 0xFF ) {
   1742             if ( bSig ) return propagateFloat32NaN( a, b );
   1743             return packFloat32( zSign, 0xFF, 0 );
   1744         }
   1745         if ( aExp == 0 ) {
   1746             ++expDiff;
   1747         }
   1748         else {
   1749             aSig |= 0x20000000;
   1750         }
   1751         shift32RightJamming( aSig, - expDiff, &aSig );
   1752         zExp = bExp;
   1753     }
   1754     else {
   1755         if ( aExp == 0xFF ) {
   1756             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1757             return a;
   1758         }
   1759         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
   1760         zSig = 0x40000000 + aSig + bSig;
   1761         zExp = aExp;
   1762         goto roundAndPack;
   1763     }
   1764     aSig |= 0x20000000;
   1765     zSig = ( aSig + bSig )<<1;
   1766     --zExp;
   1767     if ( (sbits32) zSig < 0 ) {
   1768         zSig = aSig + bSig;
   1769         ++zExp;
   1770     }
   1771  roundAndPack:
   1772     return roundAndPackFloat32( zSign, zExp, zSig );
   1773 
   1774 }
   1775 
   1776 /*
   1777 -------------------------------------------------------------------------------
   1778 Returns the result of subtracting the absolute values of the single-
   1779 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1780 difference is negated before being returned.  `zSign' is ignored if the
   1781 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1782 Standard for Binary Floating-Point Arithmetic.
   1783 -------------------------------------------------------------------------------
   1784 */
   1785 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
   1786 {
   1787     int16 aExp, bExp, zExp;
   1788     bits32 aSig, bSig, zSig;
   1789     int16 expDiff;
   1790 
   1791     aSig = extractFloat32Frac( a );
   1792     aExp = extractFloat32Exp( a );
   1793     bSig = extractFloat32Frac( b );
   1794     bExp = extractFloat32Exp( b );
   1795     expDiff = aExp - bExp;
   1796     aSig <<= 7;
   1797     bSig <<= 7;
   1798     if ( 0 < expDiff ) goto aExpBigger;
   1799     if ( expDiff < 0 ) goto bExpBigger;
   1800     if ( aExp == 0xFF ) {
   1801         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1802         float_raise( float_flag_invalid );
   1803         return float32_default_nan;
   1804     }
   1805     if ( aExp == 0 ) {
   1806         aExp = 1;
   1807         bExp = 1;
   1808     }
   1809     if ( bSig < aSig ) goto aBigger;
   1810     if ( aSig < bSig ) goto bBigger;
   1811     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
   1812  bExpBigger:
   1813     if ( bExp == 0xFF ) {
   1814         if ( bSig ) return propagateFloat32NaN( a, b );
   1815         return packFloat32( zSign ^ 1, 0xFF, 0 );
   1816     }
   1817     if ( aExp == 0 ) {
   1818         ++expDiff;
   1819     }
   1820     else {
   1821         aSig |= 0x40000000;
   1822     }
   1823     shift32RightJamming( aSig, - expDiff, &aSig );
   1824     bSig |= 0x40000000;
   1825  bBigger:
   1826     zSig = bSig - aSig;
   1827     zExp = bExp;
   1828     zSign ^= 1;
   1829     goto normalizeRoundAndPack;
   1830  aExpBigger:
   1831     if ( aExp == 0xFF ) {
   1832         if ( aSig ) return propagateFloat32NaN( a, b );
   1833         return a;
   1834     }
   1835     if ( bExp == 0 ) {
   1836         --expDiff;
   1837     }
   1838     else {
   1839         bSig |= 0x40000000;
   1840     }
   1841     shift32RightJamming( bSig, expDiff, &bSig );
   1842     aSig |= 0x40000000;
   1843  aBigger:
   1844     zSig = aSig - bSig;
   1845     zExp = aExp;
   1846  normalizeRoundAndPack:
   1847     --zExp;
   1848     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
   1849 
   1850 }
   1851 
   1852 /*
   1853 -------------------------------------------------------------------------------
   1854 Returns the result of adding the single-precision floating-point values `a'
   1855 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1856 Binary Floating-Point Arithmetic.
   1857 -------------------------------------------------------------------------------
   1858 */
   1859 float32 float32_add( float32 a, float32 b )
   1860 {
   1861     flag aSign, bSign;
   1862 
   1863     aSign = extractFloat32Sign( a );
   1864     bSign = extractFloat32Sign( b );
   1865     if ( aSign == bSign ) {
   1866         return addFloat32Sigs( a, b, aSign );
   1867     }
   1868     else {
   1869         return subFloat32Sigs( a, b, aSign );
   1870     }
   1871 
   1872 }
   1873 
   1874 /*
   1875 -------------------------------------------------------------------------------
   1876 Returns the result of subtracting the single-precision floating-point values
   1877 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1878 for Binary Floating-Point Arithmetic.
   1879 -------------------------------------------------------------------------------
   1880 */
   1881 float32 float32_sub( float32 a, float32 b )
   1882 {
   1883     flag aSign, bSign;
   1884 
   1885     aSign = extractFloat32Sign( a );
   1886     bSign = extractFloat32Sign( b );
   1887     if ( aSign == bSign ) {
   1888         return subFloat32Sigs( a, b, aSign );
   1889     }
   1890     else {
   1891         return addFloat32Sigs( a, b, aSign );
   1892     }
   1893 
   1894 }
   1895 
   1896 /*
   1897 -------------------------------------------------------------------------------
   1898 Returns the result of multiplying the single-precision floating-point values
   1899 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1900 for Binary Floating-Point Arithmetic.
   1901 -------------------------------------------------------------------------------
   1902 */
   1903 float32 float32_mul( float32 a, float32 b )
   1904 {
   1905     flag aSign, bSign, zSign;
   1906     int16 aExp, bExp, zExp;
   1907     bits32 aSig, bSig;
   1908     bits64 zSig64;
   1909     bits32 zSig;
   1910 
   1911     aSig = extractFloat32Frac( a );
   1912     aExp = extractFloat32Exp( a );
   1913     aSign = extractFloat32Sign( a );
   1914     bSig = extractFloat32Frac( b );
   1915     bExp = extractFloat32Exp( b );
   1916     bSign = extractFloat32Sign( b );
   1917     zSign = aSign ^ bSign;
   1918     if ( aExp == 0xFF ) {
   1919         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1920             return propagateFloat32NaN( a, b );
   1921         }
   1922         if ( ( bExp | bSig ) == 0 ) {
   1923             float_raise( float_flag_invalid );
   1924             return float32_default_nan;
   1925         }
   1926         return packFloat32( zSign, 0xFF, 0 );
   1927     }
   1928     if ( bExp == 0xFF ) {
   1929         if ( bSig ) return propagateFloat32NaN( a, b );
   1930         if ( ( aExp | aSig ) == 0 ) {
   1931             float_raise( float_flag_invalid );
   1932             return float32_default_nan;
   1933         }
   1934         return packFloat32( zSign, 0xFF, 0 );
   1935     }
   1936     if ( aExp == 0 ) {
   1937         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1938         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1939     }
   1940     if ( bExp == 0 ) {
   1941         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1942         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1943     }
   1944     zExp = aExp + bExp - 0x7F;
   1945     aSig = ( aSig | 0x00800000 )<<7;
   1946     bSig = ( bSig | 0x00800000 )<<8;
   1947     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
   1948     zSig = (bits32)zSig64;
   1949     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
   1950         zSig <<= 1;
   1951         --zExp;
   1952     }
   1953     return roundAndPackFloat32( zSign, zExp, zSig );
   1954 
   1955 }
   1956 
   1957 /*
   1958 -------------------------------------------------------------------------------
   1959 Returns the result of dividing the single-precision floating-point value `a'
   1960 by the corresponding value `b'.  The operation is performed according to the
   1961 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1962 -------------------------------------------------------------------------------
   1963 */
   1964 float32 float32_div( float32 a, float32 b )
   1965 {
   1966     flag aSign, bSign, zSign;
   1967     int16 aExp, bExp, zExp;
   1968     bits32 aSig, bSig, zSig;
   1969 
   1970     aSig = extractFloat32Frac( a );
   1971     aExp = extractFloat32Exp( a );
   1972     aSign = extractFloat32Sign( a );
   1973     bSig = extractFloat32Frac( b );
   1974     bExp = extractFloat32Exp( b );
   1975     bSign = extractFloat32Sign( b );
   1976     zSign = aSign ^ bSign;
   1977     if ( aExp == 0xFF ) {
   1978         if ( aSig ) return propagateFloat32NaN( a, b );
   1979         if ( bExp == 0xFF ) {
   1980             if ( bSig ) return propagateFloat32NaN( a, b );
   1981             float_raise( float_flag_invalid );
   1982             return float32_default_nan;
   1983         }
   1984         return packFloat32( zSign, 0xFF, 0 );
   1985     }
   1986     if ( bExp == 0xFF ) {
   1987         if ( bSig ) return propagateFloat32NaN( a, b );
   1988         return packFloat32( zSign, 0, 0 );
   1989     }
   1990     if ( bExp == 0 ) {
   1991         if ( bSig == 0 ) {
   1992             if ( ( aExp | aSig ) == 0 ) {
   1993                 float_raise( float_flag_invalid );
   1994                 return float32_default_nan;
   1995             }
   1996             float_raise( float_flag_divbyzero );
   1997             return packFloat32( zSign, 0xFF, 0 );
   1998         }
   1999         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2000     }
   2001     if ( aExp == 0 ) {
   2002         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   2003         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2004     }
   2005     zExp = aExp - bExp + 0x7D;
   2006     aSig = ( aSig | 0x00800000 )<<7;
   2007     bSig = ( bSig | 0x00800000 )<<8;
   2008     if ( bSig <= ( aSig + aSig ) ) {
   2009         aSig >>= 1;
   2010         ++zExp;
   2011     }
   2012     zSig = (bits32)((((bits64) aSig) << 32) / bSig);
   2013     if ( ( zSig & 0x3F ) == 0 ) {
   2014         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
   2015     }
   2016     return roundAndPackFloat32( zSign, zExp, zSig );
   2017 
   2018 }
   2019 
   2020 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2021 /*
   2022 -------------------------------------------------------------------------------
   2023 Returns the remainder of the single-precision floating-point value `a'
   2024 with respect to the corresponding value `b'.  The operation is performed
   2025 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2026 -------------------------------------------------------------------------------
   2027 */
   2028 float32 float32_rem( float32 a, float32 b )
   2029 {
   2030     flag aSign, bSign, zSign;
   2031     int16 aExp, bExp, expDiff;
   2032     bits32 aSig, bSig;
   2033     bits32 q;
   2034     bits64 aSig64, bSig64, q64;
   2035     bits32 alternateASig;
   2036     sbits32 sigMean;
   2037 
   2038     aSig = extractFloat32Frac( a );
   2039     aExp = extractFloat32Exp( a );
   2040     aSign = extractFloat32Sign( a );
   2041     bSig = extractFloat32Frac( b );
   2042     bExp = extractFloat32Exp( b );
   2043     bSign = extractFloat32Sign( b );
   2044     if ( aExp == 0xFF ) {
   2045         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   2046             return propagateFloat32NaN( a, b );
   2047         }
   2048         float_raise( float_flag_invalid );
   2049         return float32_default_nan;
   2050     }
   2051     if ( bExp == 0xFF ) {
   2052         if ( bSig ) return propagateFloat32NaN( a, b );
   2053         return a;
   2054     }
   2055     if ( bExp == 0 ) {
   2056         if ( bSig == 0 ) {
   2057             float_raise( float_flag_invalid );
   2058             return float32_default_nan;
   2059         }
   2060         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2061     }
   2062     if ( aExp == 0 ) {
   2063         if ( aSig == 0 ) return a;
   2064         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2065     }
   2066     expDiff = aExp - bExp;
   2067     aSig |= 0x00800000;
   2068     bSig |= 0x00800000;
   2069     if ( expDiff < 32 ) {
   2070         aSig <<= 8;
   2071         bSig <<= 8;
   2072         if ( expDiff < 0 ) {
   2073             if ( expDiff < -1 ) return a;
   2074             aSig >>= 1;
   2075         }
   2076         q = ( bSig <= aSig );
   2077         if ( q ) aSig -= bSig;
   2078         if ( 0 < expDiff ) {
   2079             q = ( ( (bits64) aSig )<<32 ) / bSig;
   2080             q >>= 32 - expDiff;
   2081             bSig >>= 2;
   2082             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   2083         }
   2084         else {
   2085             aSig >>= 2;
   2086             bSig >>= 2;
   2087         }
   2088     }
   2089     else {
   2090         if ( bSig <= aSig ) aSig -= bSig;
   2091         aSig64 = ( (bits64) aSig )<<40;
   2092         bSig64 = ( (bits64) bSig )<<40;
   2093         expDiff -= 64;
   2094         while ( 0 < expDiff ) {
   2095             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2096             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2097             aSig64 = - ( ( bSig * q64 )<<38 );
   2098             expDiff -= 62;
   2099         }
   2100         expDiff += 64;
   2101         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2102         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2103         q = q64>>( 64 - expDiff );
   2104         bSig <<= 6;
   2105         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
   2106     }
   2107     do {
   2108         alternateASig = aSig;
   2109         ++q;
   2110         aSig -= bSig;
   2111     } while ( 0 <= (sbits32) aSig );
   2112     sigMean = aSig + alternateASig;
   2113     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   2114         aSig = alternateASig;
   2115     }
   2116     zSign = ( (sbits32) aSig < 0 );
   2117     if ( zSign ) aSig = - aSig;
   2118     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   2119 
   2120 }
   2121 #endif /* !SOFTFLOAT_FOR_GCC */
   2122 
   2123 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2124 /*
   2125 -------------------------------------------------------------------------------
   2126 Returns the square root of the single-precision floating-point value `a'.
   2127 The operation is performed according to the IEC/IEEE Standard for Binary
   2128 Floating-Point Arithmetic.
   2129 -------------------------------------------------------------------------------
   2130 */
   2131 float32 float32_sqrt( float32 a )
   2132 {
   2133     flag aSign;
   2134     int16 aExp, zExp;
   2135     bits32 aSig, zSig;
   2136     bits64 rem, term;
   2137 
   2138     aSig = extractFloat32Frac( a );
   2139     aExp = extractFloat32Exp( a );
   2140     aSign = extractFloat32Sign( a );
   2141     if ( aExp == 0xFF ) {
   2142         if ( aSig ) return propagateFloat32NaN( a, 0 );
   2143         if ( ! aSign ) return a;
   2144         float_raise( float_flag_invalid );
   2145         return float32_default_nan;
   2146     }
   2147     if ( aSign ) {
   2148         if ( ( aExp | aSig ) == 0 ) return a;
   2149         float_raise( float_flag_invalid );
   2150         return float32_default_nan;
   2151     }
   2152     if ( aExp == 0 ) {
   2153         if ( aSig == 0 ) return 0;
   2154         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2155     }
   2156     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   2157     aSig = ( aSig | 0x00800000 )<<8;
   2158     zSig = estimateSqrt32( aExp, aSig ) + 2;
   2159     if ( ( zSig & 0x7F ) <= 5 ) {
   2160         if ( zSig < 2 ) {
   2161             zSig = 0x7FFFFFFF;
   2162             goto roundAndPack;
   2163         }
   2164         aSig >>= aExp & 1;
   2165         term = ( (bits64) zSig ) * zSig;
   2166         rem = ( ( (bits64) aSig )<<32 ) - term;
   2167         while ( (sbits64) rem < 0 ) {
   2168             --zSig;
   2169             rem += ( ( (bits64) zSig )<<1 ) | 1;
   2170         }
   2171         zSig |= ( rem != 0 );
   2172     }
   2173     shift32RightJamming( zSig, 1, &zSig );
   2174  roundAndPack:
   2175     return roundAndPackFloat32( 0, zExp, zSig );
   2176 
   2177 }
   2178 #endif /* !SOFTFLOAT_FOR_GCC */
   2179 
   2180 /*
   2181 -------------------------------------------------------------------------------
   2182 Returns 1 if the single-precision floating-point value `a' is equal to
   2183 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2184 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2185 -------------------------------------------------------------------------------
   2186 */
   2187 flag float32_eq( float32 a, float32 b )
   2188 {
   2189 
   2190     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2191          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2192        ) {
   2193         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2194             float_raise( float_flag_invalid );
   2195         }
   2196         return 0;
   2197     }
   2198     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2199 
   2200 }
   2201 
   2202 /*
   2203 -------------------------------------------------------------------------------
   2204 Returns 1 if the single-precision floating-point value `a' is less than
   2205 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2206 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2207 Arithmetic.
   2208 -------------------------------------------------------------------------------
   2209 */
   2210 flag float32_le( float32 a, float32 b )
   2211 {
   2212     flag aSign, bSign;
   2213 
   2214     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2215          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2216        ) {
   2217         float_raise( float_flag_invalid );
   2218         return 0;
   2219     }
   2220     aSign = extractFloat32Sign( a );
   2221     bSign = extractFloat32Sign( b );
   2222     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2223     return ( a == b ) || ( aSign ^ ( a < b ) );
   2224 
   2225 }
   2226 
   2227 /*
   2228 -------------------------------------------------------------------------------
   2229 Returns 1 if the single-precision floating-point value `a' is less than
   2230 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2231 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2232 -------------------------------------------------------------------------------
   2233 */
   2234 flag float32_lt( float32 a, float32 b )
   2235 {
   2236     flag aSign, bSign;
   2237 
   2238     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2239          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2240        ) {
   2241         float_raise( float_flag_invalid );
   2242         return 0;
   2243     }
   2244     aSign = extractFloat32Sign( a );
   2245     bSign = extractFloat32Sign( b );
   2246     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2247     return ( a != b ) && ( aSign ^ ( a < b ) );
   2248 
   2249 }
   2250 
   2251 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2252 /*
   2253 -------------------------------------------------------------------------------
   2254 Returns 1 if the single-precision floating-point value `a' is equal to
   2255 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2256 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2257 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2258 -------------------------------------------------------------------------------
   2259 */
   2260 flag float32_eq_signaling( float32 a, float32 b )
   2261 {
   2262 
   2263     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2264          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2265        ) {
   2266         float_raise( float_flag_invalid );
   2267         return 0;
   2268     }
   2269     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2270 
   2271 }
   2272 
   2273 /*
   2274 -------------------------------------------------------------------------------
   2275 Returns 1 if the single-precision floating-point value `a' is less than or
   2276 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2277 cause an exception.  Otherwise, the comparison is performed according to the
   2278 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2279 -------------------------------------------------------------------------------
   2280 */
   2281 flag float32_le_quiet( float32 a, float32 b )
   2282 {
   2283     flag aSign, bSign;
   2284 
   2285     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2286          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2287        ) {
   2288         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2289             float_raise( float_flag_invalid );
   2290         }
   2291         return 0;
   2292     }
   2293     aSign = extractFloat32Sign( a );
   2294     bSign = extractFloat32Sign( b );
   2295     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2296     return ( a == b ) || ( aSign ^ ( a < b ) );
   2297 
   2298 }
   2299 
   2300 /*
   2301 -------------------------------------------------------------------------------
   2302 Returns 1 if the single-precision floating-point value `a' is less than
   2303 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2304 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2305 Standard for Binary Floating-Point Arithmetic.
   2306 -------------------------------------------------------------------------------
   2307 */
   2308 flag float32_lt_quiet( float32 a, float32 b )
   2309 {
   2310     flag aSign, bSign;
   2311 
   2312     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2313          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2314        ) {
   2315         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2316             float_raise( float_flag_invalid );
   2317         }
   2318         return 0;
   2319     }
   2320     aSign = extractFloat32Sign( a );
   2321     bSign = extractFloat32Sign( b );
   2322     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2323     return ( a != b ) && ( aSign ^ ( a < b ) );
   2324 
   2325 }
   2326 #endif /* !SOFTFLOAT_FOR_GCC */
   2327 
   2328 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2329 /*
   2330 -------------------------------------------------------------------------------
   2331 Returns the result of converting the double-precision floating-point value
   2332 `a' to the 32-bit two's complement integer format.  The conversion is
   2333 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2334 Arithmetic---which means in particular that the conversion is rounded
   2335 according to the current rounding mode.  If `a' is a NaN, the largest
   2336 positive integer is returned.  Otherwise, if the conversion overflows, the
   2337 largest integer with the same sign as `a' is returned.
   2338 -------------------------------------------------------------------------------
   2339 */
   2340 int32 float64_to_int32( float64 a )
   2341 {
   2342     flag aSign;
   2343     int16 aExp, shiftCount;
   2344     bits64 aSig;
   2345 
   2346     aSig = extractFloat64Frac( a );
   2347     aExp = extractFloat64Exp( a );
   2348     aSign = extractFloat64Sign( a );
   2349     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2350     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2351     shiftCount = 0x42C - aExp;
   2352     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
   2353     return roundAndPackInt32( aSign, aSig );
   2354 
   2355 }
   2356 #endif /* !SOFTFLOAT_FOR_GCC */
   2357 
   2358 /*
   2359 -------------------------------------------------------------------------------
   2360 Returns the result of converting the double-precision floating-point value
   2361 `a' to the 32-bit two's complement integer format.  The conversion is
   2362 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2363 Arithmetic, except that the conversion is always rounded toward zero.
   2364 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2365 the conversion overflows, the largest integer with the same sign as `a' is
   2366 returned.
   2367 -------------------------------------------------------------------------------
   2368 */
   2369 int32 float64_to_int32_round_to_zero( float64 a )
   2370 {
   2371     flag aSign;
   2372     int16 aExp, shiftCount;
   2373     bits64 aSig, savedASig;
   2374     int32 z;
   2375 
   2376     aSig = extractFloat64Frac( a );
   2377     aExp = extractFloat64Exp( a );
   2378     aSign = extractFloat64Sign( a );
   2379     if ( 0x41E < aExp ) {
   2380         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2381         goto invalid;
   2382     }
   2383     else if ( aExp < 0x3FF ) {
   2384         if ( aExp || aSig ) set_float_exception_inexact_flag();
   2385         return 0;
   2386     }
   2387     aSig |= LIT64( 0x0010000000000000 );
   2388     shiftCount = 0x433 - aExp;
   2389     savedASig = aSig;
   2390     aSig >>= shiftCount;
   2391     z = (int32)aSig;
   2392     if ( aSign ) z = - z;
   2393     if ( ( z < 0 ) ^ aSign ) {
   2394  invalid:
   2395         float_raise( float_flag_invalid );
   2396         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   2397     }
   2398     if ( ( aSig<<shiftCount ) != savedASig ) {
   2399         set_float_exception_inexact_flag();
   2400     }
   2401     return z;
   2402 
   2403 }
   2404 
   2405 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2406 /*
   2407 -------------------------------------------------------------------------------
   2408 Returns the result of converting the double-precision floating-point value
   2409 `a' to the 64-bit two's complement integer format.  The conversion is
   2410 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2411 Arithmetic---which means in particular that the conversion is rounded
   2412 according to the current rounding mode.  If `a' is a NaN, the largest
   2413 positive integer is returned.  Otherwise, if the conversion overflows, the
   2414 largest integer with the same sign as `a' is returned.
   2415 -------------------------------------------------------------------------------
   2416 */
   2417 int64 float64_to_int64( float64 a )
   2418 {
   2419     flag aSign;
   2420     int16 aExp, shiftCount;
   2421     bits64 aSig, aSigExtra;
   2422 
   2423     aSig = extractFloat64Frac( a );
   2424     aExp = extractFloat64Exp( a );
   2425     aSign = extractFloat64Sign( a );
   2426     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2427     shiftCount = 0x433 - aExp;
   2428     if ( shiftCount <= 0 ) {
   2429         if ( 0x43E < aExp ) {
   2430             float_raise( float_flag_invalid );
   2431             if (    ! aSign
   2432                  || (    ( aExp == 0x7FF )
   2433                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2434                ) {
   2435                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   2436             }
   2437             return (sbits64) LIT64( 0x8000000000000000 );
   2438         }
   2439         aSigExtra = 0;
   2440         aSig <<= - shiftCount;
   2441     }
   2442     else {
   2443         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   2444     }
   2445     return roundAndPackInt64( aSign, aSig, aSigExtra );
   2446 
   2447 }
   2448 
   2449 /*
   2450 -------------------------------------------------------------------------------
   2451 Returns the result of converting the double-precision floating-point value
   2452 `a' to the 64-bit two's complement integer format.  The conversion is
   2453 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2454 Arithmetic, except that the conversion is always rounded toward zero.
   2455 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2456 the conversion overflows, the largest integer with the same sign as `a' is
   2457 returned.
   2458 -------------------------------------------------------------------------------
   2459 */
   2460 int64 float64_to_int64_round_to_zero( float64 a )
   2461 {
   2462     flag aSign;
   2463     int16 aExp, shiftCount;
   2464     bits64 aSig;
   2465     int64 z;
   2466 
   2467     aSig = extractFloat64Frac( a );
   2468     aExp = extractFloat64Exp( a );
   2469     aSign = extractFloat64Sign( a );
   2470     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2471     shiftCount = aExp - 0x433;
   2472     if ( 0 <= shiftCount ) {
   2473         if ( 0x43E <= aExp ) {
   2474             if ( a != LIT64( 0xC3E0000000000000 ) ) {
   2475                 float_raise( float_flag_invalid );
   2476                 if (    ! aSign
   2477                      || (    ( aExp == 0x7FF )
   2478                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2479                    ) {
   2480                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   2481                 }
   2482             }
   2483             return (sbits64) LIT64( 0x8000000000000000 );
   2484         }
   2485         z = aSig<<shiftCount;
   2486     }
   2487     else {
   2488         if ( aExp < 0x3FE ) {
   2489             if ( aExp | aSig ) set_float_exception_inexact_flag();
   2490             return 0;
   2491         }
   2492         z = aSig>>( - shiftCount );
   2493         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   2494             set_float_exception_inexact_flag();
   2495         }
   2496     }
   2497     if ( aSign ) z = - z;
   2498     return z;
   2499 
   2500 }
   2501 #endif /* !SOFTFLOAT_FOR_GCC */
   2502 
   2503 /*
   2504 -------------------------------------------------------------------------------
   2505 Returns the result of converting the double-precision floating-point value
   2506 `a' to the single-precision floating-point format.  The conversion is
   2507 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2508 Arithmetic.
   2509 -------------------------------------------------------------------------------
   2510 */
   2511 float32 float64_to_float32( float64 a )
   2512 {
   2513     flag aSign;
   2514     int16 aExp;
   2515     bits64 aSig;
   2516     bits32 zSig;
   2517 
   2518     aSig = extractFloat64Frac( a );
   2519     aExp = extractFloat64Exp( a );
   2520     aSign = extractFloat64Sign( a );
   2521     if ( aExp == 0x7FF ) {
   2522         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
   2523         return packFloat32( aSign, 0xFF, 0 );
   2524     }
   2525     shift64RightJamming( aSig, 22, &aSig );
   2526     zSig = (bits32)aSig;
   2527     if ( aExp || zSig ) {
   2528         zSig |= 0x40000000;
   2529         aExp -= 0x381;
   2530     }
   2531     return roundAndPackFloat32( aSign, aExp, zSig );
   2532 
   2533 }
   2534 
   2535 #ifdef FLOATX80
   2536 
   2537 /*
   2538 -------------------------------------------------------------------------------
   2539 Returns the result of converting the double-precision floating-point value
   2540 `a' to the extended double-precision floating-point format.  The conversion
   2541 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2542 Arithmetic.
   2543 -------------------------------------------------------------------------------
   2544 */
   2545 floatx80 float64_to_floatx80( float64 a )
   2546 {
   2547     flag aSign;
   2548     int16 aExp;
   2549     bits64 aSig;
   2550 
   2551     aSig = extractFloat64Frac( a );
   2552     aExp = extractFloat64Exp( a );
   2553     aSign = extractFloat64Sign( a );
   2554     if ( aExp == 0x7FF ) {
   2555         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
   2556         return packFloatx80( aSign, 0x7FFF, 0 );
   2557     }
   2558     if ( aExp == 0 ) {
   2559         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   2560         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2561     }
   2562     return
   2563         packFloatx80(
   2564             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
   2565 
   2566 }
   2567 
   2568 #endif
   2569 
   2570 #ifdef FLOAT128
   2571 
   2572 /*
   2573 -------------------------------------------------------------------------------
   2574 Returns the result of converting the double-precision floating-point value
   2575 `a' to the quadruple-precision floating-point format.  The conversion is
   2576 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2577 Arithmetic.
   2578 -------------------------------------------------------------------------------
   2579 */
   2580 float128 float64_to_float128( float64 a )
   2581 {
   2582     flag aSign;
   2583     int16 aExp;
   2584     bits64 aSig, zSig0, zSig1;
   2585 
   2586     aSig = extractFloat64Frac( a );
   2587     aExp = extractFloat64Exp( a );
   2588     aSign = extractFloat64Sign( a );
   2589     if ( aExp == 0x7FF ) {
   2590         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
   2591         return packFloat128( aSign, 0x7FFF, 0, 0 );
   2592     }
   2593     if ( aExp == 0 ) {
   2594         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   2595         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2596         --aExp;
   2597     }
   2598     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
   2599     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
   2600 
   2601 }
   2602 
   2603 #endif
   2604 
   2605 #ifndef SOFTFLOAT_FOR_GCC
   2606 /*
   2607 -------------------------------------------------------------------------------
   2608 Rounds the double-precision floating-point value `a' to an integer, and
   2609 returns the result as a double-precision floating-point value.  The
   2610 operation is performed according to the IEC/IEEE Standard for Binary
   2611 Floating-Point Arithmetic.
   2612 -------------------------------------------------------------------------------
   2613 */
   2614 float64 float64_round_to_int( float64 a )
   2615 {
   2616     flag aSign;
   2617     int16 aExp;
   2618     bits64 lastBitMask, roundBitsMask;
   2619     int8 roundingMode;
   2620     float64 z;
   2621 
   2622     aExp = extractFloat64Exp( a );
   2623     if ( 0x433 <= aExp ) {
   2624         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
   2625             return propagateFloat64NaN( a, a );
   2626         }
   2627         return a;
   2628     }
   2629     if ( aExp < 0x3FF ) {
   2630         if ( (bits64) ( a<<1 ) == 0 ) return a;
   2631         set_float_exception_inexact_flag();
   2632         aSign = extractFloat64Sign( a );
   2633         switch ( float_rounding_mode ) {
   2634          case float_round_nearest_even:
   2635             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
   2636                 return packFloat64( aSign, 0x3FF, 0 );
   2637             }
   2638             break;
   2639 	 case float_round_to_zero:
   2640 	    break;
   2641          case float_round_down:
   2642             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
   2643          case float_round_up:
   2644             return
   2645             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
   2646         }
   2647         return packFloat64( aSign, 0, 0 );
   2648     }
   2649     lastBitMask = 1;
   2650     lastBitMask <<= 0x433 - aExp;
   2651     roundBitsMask = lastBitMask - 1;
   2652     z = a;
   2653     roundingMode = float_rounding_mode;
   2654     if ( roundingMode == float_round_nearest_even ) {
   2655         z += lastBitMask>>1;
   2656         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   2657     }
   2658     else if ( roundingMode != float_round_to_zero ) {
   2659         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   2660             z += roundBitsMask;
   2661         }
   2662     }
   2663     z &= ~ roundBitsMask;
   2664     if ( z != a ) set_float_exception_inexact_flag();
   2665     return z;
   2666 
   2667 }
   2668 #endif
   2669 
   2670 /*
   2671 -------------------------------------------------------------------------------
   2672 Returns the result of adding the absolute values of the double-precision
   2673 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   2674 before being returned.  `zSign' is ignored if the result is a NaN.
   2675 The addition is performed according to the IEC/IEEE Standard for Binary
   2676 Floating-Point Arithmetic.
   2677 -------------------------------------------------------------------------------
   2678 */
   2679 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   2680 {
   2681     int16 aExp, bExp, zExp;
   2682     bits64 aSig, bSig, zSig;
   2683     int16 expDiff;
   2684 
   2685     aSig = extractFloat64Frac( a );
   2686     aExp = extractFloat64Exp( a );
   2687     bSig = extractFloat64Frac( b );
   2688     bExp = extractFloat64Exp( b );
   2689     expDiff = aExp - bExp;
   2690     aSig <<= 9;
   2691     bSig <<= 9;
   2692     if ( 0 < expDiff ) {
   2693         if ( aExp == 0x7FF ) {
   2694             if ( aSig ) return propagateFloat64NaN( a, b );
   2695             return a;
   2696         }
   2697         if ( bExp == 0 ) {
   2698             --expDiff;
   2699         }
   2700         else {
   2701             bSig |= LIT64( 0x2000000000000000 );
   2702         }
   2703         shift64RightJamming( bSig, expDiff, &bSig );
   2704         zExp = aExp;
   2705     }
   2706     else if ( expDiff < 0 ) {
   2707         if ( bExp == 0x7FF ) {
   2708             if ( bSig ) return propagateFloat64NaN( a, b );
   2709             return packFloat64( zSign, 0x7FF, 0 );
   2710         }
   2711         if ( aExp == 0 ) {
   2712             ++expDiff;
   2713         }
   2714         else {
   2715             aSig |= LIT64( 0x2000000000000000 );
   2716         }
   2717         shift64RightJamming( aSig, - expDiff, &aSig );
   2718         zExp = bExp;
   2719     }
   2720     else {
   2721         if ( aExp == 0x7FF ) {
   2722             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2723             return a;
   2724         }
   2725         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
   2726         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
   2727         zExp = aExp;
   2728         goto roundAndPack;
   2729     }
   2730     aSig |= LIT64( 0x2000000000000000 );
   2731     zSig = ( aSig + bSig )<<1;
   2732     --zExp;
   2733     if ( (sbits64) zSig < 0 ) {
   2734         zSig = aSig + bSig;
   2735         ++zExp;
   2736     }
   2737  roundAndPack:
   2738     return roundAndPackFloat64( zSign, zExp, zSig );
   2739 
   2740 }
   2741 
   2742 /*
   2743 -------------------------------------------------------------------------------
   2744 Returns the result of subtracting the absolute values of the double-
   2745 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   2746 difference is negated before being returned.  `zSign' is ignored if the
   2747 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   2748 Standard for Binary Floating-Point Arithmetic.
   2749 -------------------------------------------------------------------------------
   2750 */
   2751 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   2752 {
   2753     int16 aExp, bExp, zExp;
   2754     bits64 aSig, bSig, zSig;
   2755     int16 expDiff;
   2756 
   2757     aSig = extractFloat64Frac( a );
   2758     aExp = extractFloat64Exp( a );
   2759     bSig = extractFloat64Frac( b );
   2760     bExp = extractFloat64Exp( b );
   2761     expDiff = aExp - bExp;
   2762     aSig <<= 10;
   2763     bSig <<= 10;
   2764     if ( 0 < expDiff ) goto aExpBigger;
   2765     if ( expDiff < 0 ) goto bExpBigger;
   2766     if ( aExp == 0x7FF ) {
   2767         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2768         float_raise( float_flag_invalid );
   2769         return float64_default_nan;
   2770     }
   2771     if ( aExp == 0 ) {
   2772         aExp = 1;
   2773         bExp = 1;
   2774     }
   2775     if ( bSig < aSig ) goto aBigger;
   2776     if ( aSig < bSig ) goto bBigger;
   2777     return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
   2778  bExpBigger:
   2779     if ( bExp == 0x7FF ) {
   2780         if ( bSig ) return propagateFloat64NaN( a, b );
   2781         return packFloat64( zSign ^ 1, 0x7FF, 0 );
   2782     }
   2783     if ( aExp == 0 ) {
   2784         ++expDiff;
   2785     }
   2786     else {
   2787         aSig |= LIT64( 0x4000000000000000 );
   2788     }
   2789     shift64RightJamming( aSig, - expDiff, &aSig );
   2790     bSig |= LIT64( 0x4000000000000000 );
   2791  bBigger:
   2792     zSig = bSig - aSig;
   2793     zExp = bExp;
   2794     zSign ^= 1;
   2795     goto normalizeRoundAndPack;
   2796  aExpBigger:
   2797     if ( aExp == 0x7FF ) {
   2798         if ( aSig ) return propagateFloat64NaN( a, b );
   2799         return a;
   2800     }
   2801     if ( bExp == 0 ) {
   2802         --expDiff;
   2803     }
   2804     else {
   2805         bSig |= LIT64( 0x4000000000000000 );
   2806     }
   2807     shift64RightJamming( bSig, expDiff, &bSig );
   2808     aSig |= LIT64( 0x4000000000000000 );
   2809  aBigger:
   2810     zSig = aSig - bSig;
   2811     zExp = aExp;
   2812  normalizeRoundAndPack:
   2813     --zExp;
   2814     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
   2815 
   2816 }
   2817 
   2818 /*
   2819 -------------------------------------------------------------------------------
   2820 Returns the result of adding the double-precision floating-point values `a'
   2821 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   2822 Binary Floating-Point Arithmetic.
   2823 -------------------------------------------------------------------------------
   2824 */
   2825 float64 float64_add( float64 a, float64 b )
   2826 {
   2827     flag aSign, bSign;
   2828 
   2829     aSign = extractFloat64Sign( a );
   2830     bSign = extractFloat64Sign( b );
   2831     if ( aSign == bSign ) {
   2832         return addFloat64Sigs( a, b, aSign );
   2833     }
   2834     else {
   2835         return subFloat64Sigs( a, b, aSign );
   2836     }
   2837 
   2838 }
   2839 
   2840 /*
   2841 -------------------------------------------------------------------------------
   2842 Returns the result of subtracting the double-precision floating-point values
   2843 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2844 for Binary Floating-Point Arithmetic.
   2845 -------------------------------------------------------------------------------
   2846 */
   2847 float64 float64_sub( float64 a, float64 b )
   2848 {
   2849     flag aSign, bSign;
   2850 
   2851     aSign = extractFloat64Sign( a );
   2852     bSign = extractFloat64Sign( b );
   2853     if ( aSign == bSign ) {
   2854         return subFloat64Sigs( a, b, aSign );
   2855     }
   2856     else {
   2857         return addFloat64Sigs( a, b, aSign );
   2858     }
   2859 
   2860 }
   2861 
   2862 /*
   2863 -------------------------------------------------------------------------------
   2864 Returns the result of multiplying the double-precision floating-point values
   2865 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2866 for Binary Floating-Point Arithmetic.
   2867 -------------------------------------------------------------------------------
   2868 */
   2869 float64 float64_mul( float64 a, float64 b )
   2870 {
   2871     flag aSign, bSign, zSign;
   2872     int16 aExp, bExp, zExp;
   2873     bits64 aSig, bSig, zSig0, zSig1;
   2874 
   2875     aSig = extractFloat64Frac( a );
   2876     aExp = extractFloat64Exp( a );
   2877     aSign = extractFloat64Sign( a );
   2878     bSig = extractFloat64Frac( b );
   2879     bExp = extractFloat64Exp( b );
   2880     bSign = extractFloat64Sign( b );
   2881     zSign = aSign ^ bSign;
   2882     if ( aExp == 0x7FF ) {
   2883         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2884             return propagateFloat64NaN( a, b );
   2885         }
   2886         if ( ( bExp | bSig ) == 0 ) {
   2887             float_raise( float_flag_invalid );
   2888             return float64_default_nan;
   2889         }
   2890         return packFloat64( zSign, 0x7FF, 0 );
   2891     }
   2892     if ( bExp == 0x7FF ) {
   2893         if ( bSig ) return propagateFloat64NaN( a, b );
   2894         if ( ( aExp | aSig ) == 0 ) {
   2895             float_raise( float_flag_invalid );
   2896             return float64_default_nan;
   2897         }
   2898         return packFloat64( zSign, 0x7FF, 0 );
   2899     }
   2900     if ( aExp == 0 ) {
   2901         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2902         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2903     }
   2904     if ( bExp == 0 ) {
   2905         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
   2906         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2907     }
   2908     zExp = aExp + bExp - 0x3FF;
   2909     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2910     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2911     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   2912     zSig0 |= ( zSig1 != 0 );
   2913     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
   2914         zSig0 <<= 1;
   2915         --zExp;
   2916     }
   2917     return roundAndPackFloat64( zSign, zExp, zSig0 );
   2918 
   2919 }
   2920 
   2921 /*
   2922 -------------------------------------------------------------------------------
   2923 Returns the result of dividing the double-precision floating-point value `a'
   2924 by the corresponding value `b'.  The operation is performed according to
   2925 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2926 -------------------------------------------------------------------------------
   2927 */
   2928 float64 float64_div( float64 a, float64 b )
   2929 {
   2930     flag aSign, bSign, zSign;
   2931     int16 aExp, bExp, zExp;
   2932     bits64 aSig, bSig, zSig;
   2933     bits64 rem0, rem1;
   2934     bits64 term0, term1;
   2935 
   2936     aSig = extractFloat64Frac( a );
   2937     aExp = extractFloat64Exp( a );
   2938     aSign = extractFloat64Sign( a );
   2939     bSig = extractFloat64Frac( b );
   2940     bExp = extractFloat64Exp( b );
   2941     bSign = extractFloat64Sign( b );
   2942     zSign = aSign ^ bSign;
   2943     if ( aExp == 0x7FF ) {
   2944         if ( aSig ) return propagateFloat64NaN( a, b );
   2945         if ( bExp == 0x7FF ) {
   2946             if ( bSig ) return propagateFloat64NaN( a, b );
   2947             float_raise( float_flag_invalid );
   2948             return float64_default_nan;
   2949         }
   2950         return packFloat64( zSign, 0x7FF, 0 );
   2951     }
   2952     if ( bExp == 0x7FF ) {
   2953         if ( bSig ) return propagateFloat64NaN( a, b );
   2954         return packFloat64( zSign, 0, 0 );
   2955     }
   2956     if ( bExp == 0 ) {
   2957         if ( bSig == 0 ) {
   2958             if ( ( aExp | aSig ) == 0 ) {
   2959                 float_raise( float_flag_invalid );
   2960                 return float64_default_nan;
   2961             }
   2962             float_raise( float_flag_divbyzero );
   2963             return packFloat64( zSign, 0x7FF, 0 );
   2964         }
   2965         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2966     }
   2967     if ( aExp == 0 ) {
   2968         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2969         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2970     }
   2971     zExp = aExp - bExp + 0x3FD;
   2972     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2973     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2974     if ( bSig <= ( aSig + aSig ) ) {
   2975         aSig >>= 1;
   2976         ++zExp;
   2977     }
   2978     zSig = estimateDiv128To64( aSig, 0, bSig );
   2979     if ( ( zSig & 0x1FF ) <= 2 ) {
   2980         mul64To128( bSig, zSig, &term0, &term1 );
   2981         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   2982         while ( (sbits64) rem0 < 0 ) {
   2983             --zSig;
   2984             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   2985         }
   2986         zSig |= ( rem1 != 0 );
   2987     }
   2988     return roundAndPackFloat64( zSign, zExp, zSig );
   2989 
   2990 }
   2991 
   2992 #ifndef SOFTFLOAT_FOR_GCC
   2993 /*
   2994 -------------------------------------------------------------------------------
   2995 Returns the remainder of the double-precision floating-point value `a'
   2996 with respect to the corresponding value `b'.  The operation is performed
   2997 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2998 -------------------------------------------------------------------------------
   2999 */
   3000 float64 float64_rem( float64 a, float64 b )
   3001 {
   3002     flag aSign, bSign, zSign;
   3003     int16 aExp, bExp, expDiff;
   3004     bits64 aSig, bSig;
   3005     bits64 q, alternateASig;
   3006     sbits64 sigMean;
   3007 
   3008     aSig = extractFloat64Frac( a );
   3009     aExp = extractFloat64Exp( a );
   3010     aSign = extractFloat64Sign( a );
   3011     bSig = extractFloat64Frac( b );
   3012     bExp = extractFloat64Exp( b );
   3013     bSign = extractFloat64Sign( b );
   3014     if ( aExp == 0x7FF ) {
   3015         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   3016             return propagateFloat64NaN( a, b );
   3017         }
   3018         float_raise( float_flag_invalid );
   3019         return float64_default_nan;
   3020     }
   3021     if ( bExp == 0x7FF ) {
   3022         if ( bSig ) return propagateFloat64NaN( a, b );
   3023         return a;
   3024     }
   3025     if ( bExp == 0 ) {
   3026         if ( bSig == 0 ) {
   3027             float_raise( float_flag_invalid );
   3028             return float64_default_nan;
   3029         }
   3030         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   3031     }
   3032     if ( aExp == 0 ) {
   3033         if ( aSig == 0 ) return a;
   3034         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3035     }
   3036     expDiff = aExp - bExp;
   3037     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
   3038     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   3039     if ( expDiff < 0 ) {
   3040         if ( expDiff < -1 ) return a;
   3041         aSig >>= 1;
   3042     }
   3043     q = ( bSig <= aSig );
   3044     if ( q ) aSig -= bSig;
   3045     expDiff -= 64;
   3046     while ( 0 < expDiff ) {
   3047         q = estimateDiv128To64( aSig, 0, bSig );
   3048         q = ( 2 < q ) ? q - 2 : 0;
   3049         aSig = - ( ( bSig>>2 ) * q );
   3050         expDiff -= 62;
   3051     }
   3052     expDiff += 64;
   3053     if ( 0 < expDiff ) {
   3054         q = estimateDiv128To64( aSig, 0, bSig );
   3055         q = ( 2 < q ) ? q - 2 : 0;
   3056         q >>= 64 - expDiff;
   3057         bSig >>= 2;
   3058         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   3059     }
   3060     else {
   3061         aSig >>= 2;
   3062         bSig >>= 2;
   3063     }
   3064     do {
   3065         alternateASig = aSig;
   3066         ++q;
   3067         aSig -= bSig;
   3068     } while ( 0 <= (sbits64) aSig );
   3069     sigMean = aSig + alternateASig;
   3070     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   3071         aSig = alternateASig;
   3072     }
   3073     zSign = ( (sbits64) aSig < 0 );
   3074     if ( zSign ) aSig = - aSig;
   3075     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
   3076 
   3077 }
   3078 
   3079 /*
   3080 -------------------------------------------------------------------------------
   3081 Returns the square root of the double-precision floating-point value `a'.
   3082 The operation is performed according to the IEC/IEEE Standard for Binary
   3083 Floating-Point Arithmetic.
   3084 -------------------------------------------------------------------------------
   3085 */
   3086 float64 float64_sqrt( float64 a )
   3087 {
   3088     flag aSign;
   3089     int16 aExp, zExp;
   3090     bits64 aSig, zSig, doubleZSig;
   3091     bits64 rem0, rem1, term0, term1;
   3092 
   3093     aSig = extractFloat64Frac( a );
   3094     aExp = extractFloat64Exp( a );
   3095     aSign = extractFloat64Sign( a );
   3096     if ( aExp == 0x7FF ) {
   3097         if ( aSig ) return propagateFloat64NaN( a, a );
   3098         if ( ! aSign ) return a;
   3099         float_raise( float_flag_invalid );
   3100         return float64_default_nan;
   3101     }
   3102     if ( aSign ) {
   3103         if ( ( aExp | aSig ) == 0 ) return a;
   3104         float_raise( float_flag_invalid );
   3105         return float64_default_nan;
   3106     }
   3107     if ( aExp == 0 ) {
   3108         if ( aSig == 0 ) return 0;
   3109         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3110     }
   3111     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   3112     aSig |= LIT64( 0x0010000000000000 );
   3113     zSig = estimateSqrt32( aExp, aSig>>21 );
   3114     aSig <<= 9 - ( aExp & 1 );
   3115     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
   3116     if ( ( zSig & 0x1FF ) <= 5 ) {
   3117         doubleZSig = zSig<<1;
   3118         mul64To128( zSig, zSig, &term0, &term1 );
   3119         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   3120         while ( (sbits64) rem0 < 0 ) {
   3121             --zSig;
   3122             doubleZSig -= 2;
   3123             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
   3124         }
   3125         zSig |= ( ( rem0 | rem1 ) != 0 );
   3126     }
   3127     return roundAndPackFloat64( 0, zExp, zSig );
   3128 
   3129 }
   3130 #endif
   3131 
   3132 /*
   3133 -------------------------------------------------------------------------------
   3134 Returns 1 if the double-precision floating-point value `a' is equal to the
   3135 corresponding value `b', and 0 otherwise.  The comparison is performed
   3136 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3137 -------------------------------------------------------------------------------
   3138 */
   3139 flag float64_eq( float64 a, float64 b )
   3140 {
   3141 
   3142     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3143          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3144        ) {
   3145         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3146             float_raise( float_flag_invalid );
   3147         }
   3148         return 0;
   3149     }
   3150     return ( a == b ) ||
   3151 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   3152 
   3153 }
   3154 
   3155 /*
   3156 -------------------------------------------------------------------------------
   3157 Returns 1 if the double-precision floating-point value `a' is less than or
   3158 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   3159 performed according to the IEC/IEEE Standard for Binary Floating-Point
   3160 Arithmetic.
   3161 -------------------------------------------------------------------------------
   3162 */
   3163 flag float64_le( float64 a, float64 b )
   3164 {
   3165     flag aSign, bSign;
   3166 
   3167     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3168          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3169        ) {
   3170         float_raise( float_flag_invalid );
   3171         return 0;
   3172     }
   3173     aSign = extractFloat64Sign( a );
   3174     bSign = extractFloat64Sign( b );
   3175     if ( aSign != bSign )
   3176 	return aSign ||
   3177 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   3178 	      0 );
   3179     return ( a == b ) ||
   3180 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3181 
   3182 }
   3183 
   3184 /*
   3185 -------------------------------------------------------------------------------
   3186 Returns 1 if the double-precision floating-point value `a' is less than
   3187 the corresponding value `b', and 0 otherwise.  The comparison is performed
   3188 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3189 -------------------------------------------------------------------------------
   3190 */
   3191 flag float64_lt( float64 a, float64 b )
   3192 {
   3193     flag aSign, bSign;
   3194 
   3195     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3196          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3197        ) {
   3198         float_raise( float_flag_invalid );
   3199         return 0;
   3200     }
   3201     aSign = extractFloat64Sign( a );
   3202     bSign = extractFloat64Sign( b );
   3203     if ( aSign != bSign )
   3204 	return aSign &&
   3205 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   3206 	      0 );
   3207     return ( a != b ) &&
   3208 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3209 
   3210 }
   3211 
   3212 #ifndef SOFTFLOAT_FOR_GCC
   3213 /*
   3214 -------------------------------------------------------------------------------
   3215 Returns 1 if the double-precision floating-point value `a' is equal to the
   3216 corresponding value `b', and 0 otherwise.  The invalid exception is raised
   3217 if either operand is a NaN.  Otherwise, the comparison is performed
   3218 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3219 -------------------------------------------------------------------------------
   3220 */
   3221 flag float64_eq_signaling( float64 a, float64 b )
   3222 {
   3223 
   3224     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3225          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3226        ) {
   3227         float_raise( float_flag_invalid );
   3228         return 0;
   3229     }
   3230     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3231 
   3232 }
   3233 
   3234 /*
   3235 -------------------------------------------------------------------------------
   3236 Returns 1 if the double-precision floating-point value `a' is less than or
   3237 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   3238 cause an exception.  Otherwise, the comparison is performed according to the
   3239 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3240 -------------------------------------------------------------------------------
   3241 */
   3242 flag float64_le_quiet( float64 a, float64 b )
   3243 {
   3244     flag aSign, bSign;
   3245 
   3246     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3247          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3248        ) {
   3249         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3250             float_raise( float_flag_invalid );
   3251         }
   3252         return 0;
   3253     }
   3254     aSign = extractFloat64Sign( a );
   3255     bSign = extractFloat64Sign( b );
   3256     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3257     return ( a == b ) || ( aSign ^ ( a < b ) );
   3258 
   3259 }
   3260 
   3261 /*
   3262 -------------------------------------------------------------------------------
   3263 Returns 1 if the double-precision floating-point value `a' is less than
   3264 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   3265 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   3266 Standard for Binary Floating-Point Arithmetic.
   3267 -------------------------------------------------------------------------------
   3268 */
   3269 flag float64_lt_quiet( float64 a, float64 b )
   3270 {
   3271     flag aSign, bSign;
   3272 
   3273     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3274          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3275        ) {
   3276         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3277             float_raise( float_flag_invalid );
   3278         }
   3279         return 0;
   3280     }
   3281     aSign = extractFloat64Sign( a );
   3282     bSign = extractFloat64Sign( b );
   3283     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   3284     return ( a != b ) && ( aSign ^ ( a < b ) );
   3285 
   3286 }
   3287 #endif
   3288 
   3289 #ifdef FLOATX80
   3290 #ifdef X80M68K
   3291 flag floatx80_eq( floatx80 a, floatx80 b )
   3292 {
   3293     return !floatx80_eq_internal(a, b);
   3294 }
   3295 
   3296 flag floatx80_le_quiet( floatx80 a, floatx80 b )
   3297 {
   3298     return !floatx80_le_quiet_internal(a, b);
   3299 }
   3300 
   3301 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
   3302 {
   3303     return floatx80_lt_quiet_internal(a, b) ? -1 : 1;
   3304 }
   3305 
   3306 flag floatx80_le( floatx80 a, floatx80 b )
   3307 {
   3308     return !floatx80_le_internal(a, b);
   3309 }
   3310 
   3311 flag floatx80_lt( floatx80 a, floatx80 b )
   3312 {
   3313     return floatx80_lt_internal(a, b) ? -1 : 1;
   3314 }
   3315 
   3316 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
   3317 {
   3318     return !floatx80_eq_signaling_internal(a, b);
   3319 }
   3320 #else
   3321 
   3322 #define floatx80_eq_internal		floatx80_eq
   3323 #define floatx80_le_internal		floatx80_le
   3324 #define floatx80_lt_internal		floatx80_lt
   3325 #define floatx80_le_quiet_internal	floatx80_le_quiet
   3326 #define floatx80_lt_quiet_internal	floatx80_lt_quiet
   3327 #define floatx80_eq_signaling_internal	floatx80_eq_signaling
   3328 
   3329 #endif
   3330 
   3331 /*
   3332 -------------------------------------------------------------------------------
   3333 Returns the result of converting the extended double-precision floating-
   3334 point value `a' to the 32-bit two's complement integer format.  The
   3335 conversion is performed according to the IEC/IEEE Standard for Binary
   3336 Floating-Point Arithmetic---which means in particular that the conversion
   3337 is rounded according to the current rounding mode.  If `a' is a NaN, the
   3338 largest positive integer is returned.  Otherwise, if the conversion
   3339 overflows, the largest integer with the same sign as `a' is returned.
   3340 -------------------------------------------------------------------------------
   3341 */
   3342 int32 floatx80_to_int32( floatx80 a )
   3343 {
   3344     flag aSign;
   3345     int32 aExp, shiftCount;
   3346     bits64 aSig;
   3347 
   3348     aSig = extractFloatx80Frac( a );
   3349     aExp = extractFloatx80Exp( a );
   3350     aSign = extractFloatx80Sign( a );
   3351     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3352     shiftCount = 0x4037 - aExp;
   3353     if ( shiftCount <= 0 ) shiftCount = 1;
   3354     shift64RightJamming( aSig, shiftCount, &aSig );
   3355     return roundAndPackInt32( aSign, aSig );
   3356 
   3357 }
   3358 
   3359 /*
   3360 -------------------------------------------------------------------------------
   3361 Returns the result of converting the extended double-precision floating-
   3362 point value `a' to the 32-bit two's complement integer format.  The
   3363 conversion is performed according to the IEC/IEEE Standard for Binary
   3364 Floating-Point Arithmetic, except that the conversion is always rounded
   3365 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3366 Otherwise, if the conversion overflows, the largest integer with the same
   3367 sign as `a' is returned.
   3368 -------------------------------------------------------------------------------
   3369 */
   3370 int32 floatx80_to_int32_round_to_zero( floatx80 a )
   3371 {
   3372     flag aSign;
   3373     int32 aExp, shiftCount;
   3374     bits64 aSig, savedASig;
   3375     int32 z;
   3376 
   3377     aSig = extractFloatx80Frac( a );
   3378     aExp = extractFloatx80Exp( a );
   3379     aSign = extractFloatx80Sign( a );
   3380     if ( 0x401E < aExp ) {
   3381         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3382         goto invalid;
   3383     }
   3384     else if ( aExp < 0x3FFF ) {
   3385         if ( aExp || aSig ) set_float_exception_inexact_flag();
   3386         return 0;
   3387     }
   3388     shiftCount = 0x403E - aExp;
   3389     savedASig = aSig;
   3390     aSig >>= shiftCount;
   3391     z = aSig;
   3392     if ( aSign ) z = - z;
   3393     if ( ( z < 0 ) ^ aSign ) {
   3394  invalid:
   3395         float_raise( float_flag_invalid );
   3396         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   3397     }
   3398     if ( ( aSig<<shiftCount ) != savedASig ) {
   3399         set_float_exception_inexact_flag();
   3400     }
   3401     return z;
   3402 
   3403 }
   3404 
   3405 /*
   3406 -------------------------------------------------------------------------------
   3407 Returns the result of converting the extended double-precision floating-
   3408 point value `a' to the 64-bit two's complement integer format.  The
   3409 conversion is performed according to the IEC/IEEE Standard for Binary
   3410 Floating-Point Arithmetic---which means in particular that the conversion
   3411 is rounded according to the current rounding mode.  If `a' is a NaN,
   3412 the largest positive integer is returned.  Otherwise, if the conversion
   3413 overflows, the largest integer with the same sign as `a' is returned.
   3414 -------------------------------------------------------------------------------
   3415 */
   3416 int64 floatx80_to_int64( floatx80 a )
   3417 {
   3418     flag aSign;
   3419     int32 aExp, shiftCount;
   3420     bits64 aSig, aSigExtra;
   3421 
   3422     aSig = extractFloatx80Frac( a );
   3423     aExp = extractFloatx80Exp( a );
   3424     aSign = extractFloatx80Sign( a );
   3425     shiftCount = 0x403E - aExp;
   3426     if ( shiftCount <= 0 ) {
   3427         if ( shiftCount ) {
   3428             float_raise( float_flag_invalid );
   3429             if (    ! aSign
   3430                  || (    ( aExp == 0x7FFF )
   3431                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
   3432                ) {
   3433                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3434             }
   3435             return (sbits64) LIT64( 0x8000000000000000 );
   3436         }
   3437         aSigExtra = 0;
   3438     }
   3439     else {
   3440         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   3441     }
   3442     return roundAndPackInt64( aSign, aSig, aSigExtra );
   3443 
   3444 }
   3445 
   3446 /*
   3447 -------------------------------------------------------------------------------
   3448 Returns the result of converting the extended double-precision floating-
   3449 point value `a' to the 64-bit two's complement integer format.  The
   3450 conversion is performed according to the IEC/IEEE Standard for Binary
   3451 Floating-Point Arithmetic, except that the conversion is always rounded
   3452 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3453 Otherwise, if the conversion overflows, the largest integer with the same
   3454 sign as `a' is returned.
   3455 -------------------------------------------------------------------------------
   3456 */
   3457 int64 floatx80_to_int64_round_to_zero( floatx80 a )
   3458 {
   3459     flag aSign;
   3460     int32 aExp, shiftCount;
   3461     bits64 aSig;
   3462     int64 z;
   3463 
   3464     aSig = extractFloatx80Frac( a );
   3465     aExp = extractFloatx80Exp( a );
   3466     aSign = extractFloatx80Sign( a );
   3467     shiftCount = aExp - 0x403E;
   3468     if ( 0 <= shiftCount ) {
   3469         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
   3470         if ( ( (a.high>>X80SHIFT) != 0xC03E ) || aSig ) {
   3471             float_raise( float_flag_invalid );
   3472             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
   3473                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3474             }
   3475         }
   3476         return (sbits64) LIT64( 0x8000000000000000 );
   3477     }
   3478     else if ( aExp < 0x3FFF ) {
   3479         if ( aExp | aSig ) set_float_exception_inexact_flag();
   3480         return 0;
   3481     }
   3482     z = aSig>>( - shiftCount );
   3483     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   3484         set_float_exception_inexact_flag();
   3485     }
   3486     if ( aSign ) z = - z;
   3487     return z;
   3488 
   3489 }
   3490 
   3491 /*
   3492 -------------------------------------------------------------------------------
   3493 Returns the result of converting the extended double-precision floating-
   3494 point value `a' to the single-precision floating-point format.  The
   3495 conversion is performed according to the IEC/IEEE Standard for Binary
   3496 Floating-Point Arithmetic.
   3497 -------------------------------------------------------------------------------
   3498 */
   3499 float32 floatx80_to_float32( floatx80 a )
   3500 {
   3501     flag aSign;
   3502     int32 aExp;
   3503     bits64 aSig;
   3504 
   3505     aSig = extractFloatx80Frac( a );
   3506     aExp = extractFloatx80Exp( a );
   3507     aSign = extractFloatx80Sign( a );
   3508     if ( aExp == 0x7FFF ) {
   3509         if ( (bits64) ( aSig<<1 ) ) {
   3510             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
   3511         }
   3512         return packFloat32( aSign, 0xFF, 0 );
   3513     }
   3514     shift64RightJamming( aSig, 33, &aSig );
   3515     if ( aExp || aSig ) aExp -= 0x3F81;
   3516     return roundAndPackFloat32( aSign, aExp, aSig );
   3517 
   3518 }
   3519 
   3520 /*
   3521 -------------------------------------------------------------------------------
   3522 Returns the result of converting the extended double-precision floating-
   3523 point value `a' to the double-precision floating-point format.  The
   3524 conversion is performed according to the IEC/IEEE Standard for Binary
   3525 Floating-Point Arithmetic.
   3526 -------------------------------------------------------------------------------
   3527 */
   3528 float64 floatx80_to_float64( floatx80 a )
   3529 {
   3530     flag aSign;
   3531     int32 aExp;
   3532     bits64 aSig, zSig;
   3533 
   3534     aSig = extractFloatx80Frac( a );
   3535     aExp = extractFloatx80Exp( a );
   3536     aSign = extractFloatx80Sign( a );
   3537     if ( aExp == 0x7FFF ) {
   3538         if ( (bits64) ( aSig<<1 ) ) {
   3539             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
   3540         }
   3541         return packFloat64( aSign, 0x7FF, 0 );
   3542     }
   3543     shift64RightJamming( aSig, 1, &zSig );
   3544     if ( aExp || aSig ) aExp -= 0x3C01;
   3545     return roundAndPackFloat64( aSign, aExp, zSig );
   3546 
   3547 }
   3548 
   3549 #ifdef FLOAT128
   3550 
   3551 /*
   3552 -------------------------------------------------------------------------------
   3553 Returns the result of converting the extended double-precision floating-
   3554 point value `a' to the quadruple-precision floating-point format.  The
   3555 conversion is performed according to the IEC/IEEE Standard for Binary
   3556 Floating-Point Arithmetic.
   3557 -------------------------------------------------------------------------------
   3558 */
   3559 float128 floatx80_to_float128( floatx80 a )
   3560 {
   3561     flag aSign;
   3562     int16 aExp;
   3563     bits64 aSig, zSig0, zSig1;
   3564 
   3565     aSig = extractFloatx80Frac( a );
   3566     aExp = extractFloatx80Exp( a );
   3567     aSign = extractFloatx80Sign( a );
   3568     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
   3569         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
   3570     }
   3571     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
   3572     return packFloat128( aSign, aExp, zSig0, zSig1 );
   3573 
   3574 }
   3575 
   3576 #endif
   3577 
   3578 /*
   3579 -------------------------------------------------------------------------------
   3580 Rounds the extended double-precision floating-point value `a' to an integer,
   3581 and returns the result as an extended quadruple-precision floating-point
   3582 value.  The operation is performed according to the IEC/IEEE Standard for
   3583 Binary Floating-Point Arithmetic.
   3584 -------------------------------------------------------------------------------
   3585 */
   3586 floatx80 floatx80_round_to_int( floatx80 a )
   3587 {
   3588     flag aSign;
   3589     int32 aExp;
   3590     bits64 lastBitMask, roundBitsMask;
   3591     int8 roundingMode;
   3592     floatx80 z;
   3593 
   3594     aExp = extractFloatx80Exp( a );
   3595     if ( 0x403E <= aExp ) {
   3596         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
   3597             return propagateFloatx80NaN( a, a );
   3598         }
   3599         return a;
   3600     }
   3601     if ( aExp < 0x3FFF ) {
   3602         if (    ( aExp == 0 )
   3603              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
   3604             return a;
   3605         }
   3606         set_float_exception_inexact_flag();
   3607         aSign = extractFloatx80Sign( a );
   3608         switch ( float_rounding_mode ) {
   3609          case float_round_nearest_even:
   3610             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
   3611                ) {
   3612                 return
   3613                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3614             }
   3615             break;
   3616 	 case float_round_to_zero:
   3617 	    break;
   3618          case float_round_down:
   3619             return
   3620                   aSign ?
   3621                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
   3622                 : packFloatx80( 0, 0, 0 );
   3623          case float_round_up:
   3624             return
   3625                   aSign ? packFloatx80( 1, 0, 0 )
   3626                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3627         }
   3628         return packFloatx80( aSign, 0, 0 );
   3629     }
   3630     lastBitMask = 1;
   3631     lastBitMask <<= 0x403E - aExp;
   3632     roundBitsMask = lastBitMask - 1;
   3633     z = a;
   3634     roundingMode = float_rounding_mode;
   3635     if ( roundingMode == float_round_nearest_even ) {
   3636         z.low += lastBitMask>>1;
   3637         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   3638     }
   3639     else if ( roundingMode != float_round_to_zero ) {
   3640         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   3641             z.low += roundBitsMask;
   3642         }
   3643     }
   3644     z.low &= ~ roundBitsMask;
   3645     if ( z.low == 0 ) {
   3646         ++z.high;
   3647         z.low = LIT64( 0x8000000000000000 );
   3648     }
   3649     z.high <<= X80SHIFT;
   3650     if ( z.low != a.low ) set_float_exception_inexact_flag();
   3651     return z;
   3652 
   3653 }
   3654 
   3655 /*
   3656 -------------------------------------------------------------------------------
   3657 Returns the result of adding the absolute values of the extended double-
   3658 precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
   3659 negated before being returned.  `zSign' is ignored if the result is a NaN.
   3660 The addition is performed according to the IEC/IEEE Standard for Binary
   3661 Floating-Point Arithmetic.
   3662 -------------------------------------------------------------------------------
   3663 */
   3664 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3665 {
   3666     int32 aExp, bExp, zExp;
   3667     bits64 aSig, bSig, zSig0, zSig1;
   3668     int32 expDiff;
   3669 
   3670     aSig = extractFloatx80Frac( a );
   3671     aExp = extractFloatx80Exp( a );
   3672     bSig = extractFloatx80Frac( b );
   3673     bExp = extractFloatx80Exp( b );
   3674     expDiff = aExp - bExp;
   3675     if ( 0 < expDiff ) {
   3676         if ( aExp == 0x7FFF ) {
   3677             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3678             return a;
   3679         }
   3680         if ( bExp == 0 ) --expDiff;
   3681         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3682         zExp = aExp;
   3683     }
   3684     else if ( expDiff < 0 ) {
   3685         if ( bExp == 0x7FFF ) {
   3686             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3687             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3688         }
   3689         if ( aExp == 0 ) ++expDiff;
   3690         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3691         zExp = bExp;
   3692     }
   3693     else {
   3694         if ( aExp == 0x7FFF ) {
   3695             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3696                 return propagateFloatx80NaN( a, b );
   3697             }
   3698             return a;
   3699         }
   3700         zSig1 = 0;
   3701         zSig0 = aSig + bSig;
   3702         if ( aExp == 0 ) {
   3703             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
   3704             goto roundAndPack;
   3705         }
   3706         zExp = aExp;
   3707         goto shiftRight1;
   3708     }
   3709     zSig0 = aSig + bSig;
   3710     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
   3711  shiftRight1:
   3712     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3713     zSig0 |= LIT64( 0x8000000000000000 );
   3714     ++zExp;
   3715  roundAndPack:
   3716     return
   3717         roundAndPackFloatx80(
   3718             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3719 
   3720 }
   3721 
   3722 /*
   3723 -------------------------------------------------------------------------------
   3724 Returns the result of subtracting the absolute values of the extended
   3725 double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
   3726 difference is negated before being returned.  `zSign' is ignored if the
   3727 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   3728 Standard for Binary Floating-Point Arithmetic.
   3729 -------------------------------------------------------------------------------
   3730 */
   3731 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3732 {
   3733     int32 aExp, bExp, zExp;
   3734     bits64 aSig, bSig, zSig0, zSig1;
   3735     int32 expDiff;
   3736     floatx80 z;
   3737 
   3738     aSig = extractFloatx80Frac( a );
   3739     aExp = extractFloatx80Exp( a );
   3740     bSig = extractFloatx80Frac( b );
   3741     bExp = extractFloatx80Exp( b );
   3742     expDiff = aExp - bExp;
   3743     if ( 0 < expDiff ) goto aExpBigger;
   3744     if ( expDiff < 0 ) goto bExpBigger;
   3745     if ( aExp == 0x7FFF ) {
   3746         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3747             return propagateFloatx80NaN( a, b );
   3748         }
   3749         float_raise( float_flag_invalid );
   3750         z.low = floatx80_default_nan_low;
   3751         z.high = floatx80_default_nan_high<<X80SHIFT;
   3752         return z;
   3753     }
   3754     if ( aExp == 0 ) {
   3755         aExp = 1;
   3756         bExp = 1;
   3757     }
   3758     zSig1 = 0;
   3759     if ( bSig < aSig ) goto aBigger;
   3760     if ( aSig < bSig ) goto bBigger;
   3761     return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
   3762  bExpBigger:
   3763     if ( bExp == 0x7FFF ) {
   3764         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3765         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3766     }
   3767     if ( aExp == 0 ) ++expDiff;
   3768     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3769  bBigger:
   3770     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
   3771     zExp = bExp;
   3772     zSign ^= 1;
   3773     goto normalizeRoundAndPack;
   3774  aExpBigger:
   3775     if ( aExp == 0x7FFF ) {
   3776         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3777         return a;
   3778     }
   3779     if ( bExp == 0 ) --expDiff;
   3780     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3781  aBigger:
   3782     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
   3783     zExp = aExp;
   3784  normalizeRoundAndPack:
   3785     return
   3786         normalizeRoundAndPackFloatx80(
   3787             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3788 
   3789 }
   3790 
   3791 /*
   3792 -------------------------------------------------------------------------------
   3793 Returns the result of adding the extended double-precision floating-point
   3794 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   3795 Standard for Binary Floating-Point Arithmetic.
   3796 -------------------------------------------------------------------------------
   3797 */
   3798 floatx80 floatx80_add( floatx80 a, floatx80 b )
   3799 {
   3800     flag aSign, bSign;
   3801 
   3802     aSign = extractFloatx80Sign( a );
   3803     bSign = extractFloatx80Sign( b );
   3804     if ( aSign == bSign ) {
   3805         return addFloatx80Sigs( a, b, aSign );
   3806     }
   3807     else {
   3808         return subFloatx80Sigs( a, b, aSign );
   3809     }
   3810 
   3811 }
   3812 
   3813 /*
   3814 -------------------------------------------------------------------------------
   3815 Returns the result of subtracting the extended double-precision floating-
   3816 point values `a' and `b'.  The operation is performed according to the
   3817 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3818 -------------------------------------------------------------------------------
   3819 */
   3820 floatx80 floatx80_sub( floatx80 a, floatx80 b )
   3821 {
   3822     flag aSign, bSign;
   3823 
   3824     aSign = extractFloatx80Sign( a );
   3825     bSign = extractFloatx80Sign( b );
   3826     if ( aSign == bSign ) {
   3827         return subFloatx80Sigs( a, b, aSign );
   3828     }
   3829     else {
   3830         return addFloatx80Sigs( a, b, aSign );
   3831     }
   3832 
   3833 }
   3834 
   3835 /*
   3836 -------------------------------------------------------------------------------
   3837 Returns the result of multiplying the extended double-precision floating-
   3838 point values `a' and `b'.  The operation is performed according to the
   3839 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3840 -------------------------------------------------------------------------------
   3841 */
   3842 floatx80 floatx80_mul( floatx80 a, floatx80 b )
   3843 {
   3844     flag aSign, bSign, zSign;
   3845     int32 aExp, bExp, zExp;
   3846     bits64 aSig, bSig, zSig0, zSig1;
   3847     floatx80 z;
   3848 
   3849     aSig = extractFloatx80Frac( a );
   3850     aExp = extractFloatx80Exp( a );
   3851     aSign = extractFloatx80Sign( a );
   3852     bSig = extractFloatx80Frac( b );
   3853     bExp = extractFloatx80Exp( b );
   3854     bSign = extractFloatx80Sign( b );
   3855     zSign = aSign ^ bSign;
   3856     if ( aExp == 0x7FFF ) {
   3857         if (    (bits64) ( aSig<<1 )
   3858              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3859             return propagateFloatx80NaN( a, b );
   3860         }
   3861         if ( ( bExp | bSig ) == 0 ) goto invalid;
   3862         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3863     }
   3864     if ( bExp == 0x7FFF ) {
   3865         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3866         if ( ( aExp | aSig ) == 0 ) {
   3867  invalid:
   3868             float_raise( float_flag_invalid );
   3869             z.low = floatx80_default_nan_low;
   3870             z.high = floatx80_default_nan_high<<X80SHIFT;
   3871             return z;
   3872         }
   3873         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3874     }
   3875     if ( aExp == 0 ) {
   3876         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3877         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3878     }
   3879     if ( bExp == 0 ) {
   3880         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3881         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3882     }
   3883     zExp = aExp + bExp - 0x3FFE;
   3884     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   3885     if ( 0 < (sbits64) zSig0 ) {
   3886         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3887         --zExp;
   3888     }
   3889     return
   3890         roundAndPackFloatx80(
   3891             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3892 
   3893 }
   3894 
   3895 /*
   3896 -------------------------------------------------------------------------------
   3897 Returns the result of dividing the extended double-precision floating-point
   3898 value `a' by the corresponding value `b'.  The operation is performed
   3899 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3900 -------------------------------------------------------------------------------
   3901 */
   3902 floatx80 floatx80_div( floatx80 a, floatx80 b )
   3903 {
   3904     flag aSign, bSign, zSign;
   3905     int32 aExp, bExp, zExp;
   3906     bits64 aSig, bSig, zSig0, zSig1;
   3907     bits64 rem0, rem1, rem2, term0, term1, term2;
   3908     floatx80 z;
   3909 
   3910     aSig = extractFloatx80Frac( a );
   3911     aExp = extractFloatx80Exp( a );
   3912     aSign = extractFloatx80Sign( a );
   3913     bSig = extractFloatx80Frac( b );
   3914     bExp = extractFloatx80Exp( b );
   3915     bSign = extractFloatx80Sign( b );
   3916     zSign = aSign ^ bSign;
   3917     if ( aExp == 0x7FFF ) {
   3918         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3919         if ( bExp == 0x7FFF ) {
   3920             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3921             goto invalid;
   3922         }
   3923         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3924     }
   3925     if ( bExp == 0x7FFF ) {
   3926         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3927         return packFloatx80( zSign, 0, 0 );
   3928     }
   3929     if ( bExp == 0 ) {
   3930         if ( bSig == 0 ) {
   3931             if ( ( aExp | aSig ) == 0 ) {
   3932  invalid:
   3933                 float_raise( float_flag_invalid );
   3934                 z.low = floatx80_default_nan_low;
   3935                 z.high = floatx80_default_nan_high<<X80SHIFT;
   3936                 return z;
   3937             }
   3938             float_raise( float_flag_divbyzero );
   3939             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3940         }
   3941         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3942     }
   3943     if ( aExp == 0 ) {
   3944         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3945         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3946     }
   3947     zExp = aExp - bExp + 0x3FFE;
   3948     rem1 = 0;
   3949     if ( bSig <= aSig ) {
   3950         shift128Right( aSig, 0, 1, &aSig, &rem1 );
   3951         ++zExp;
   3952     }
   3953     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
   3954     mul64To128( bSig, zSig0, &term0, &term1 );
   3955     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
   3956     while ( (sbits64) rem0 < 0 ) {
   3957         --zSig0;
   3958         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   3959     }
   3960     zSig1 = estimateDiv128To64( rem1, 0, bSig );
   3961     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
   3962         mul64To128( bSig, zSig1, &term1, &term2 );
   3963         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   3964         while ( (sbits64) rem1 < 0 ) {
   3965             --zSig1;
   3966             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
   3967         }
   3968         zSig1 |= ( ( rem1 | rem2 ) != 0 );
   3969     }
   3970     return
   3971         roundAndPackFloatx80(
   3972             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3973 
   3974 }
   3975 
   3976 /*
   3977 -------------------------------------------------------------------------------
   3978 Returns the remainder of the extended double-precision floating-point value
   3979 `a' with respect to the corresponding value `b'.  The operation is performed
   3980 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3981 -------------------------------------------------------------------------------
   3982 */
   3983 floatx80 floatx80_rem( floatx80 a, floatx80 b )
   3984 {
   3985     flag aSign, zSign;
   3986     int32 aExp, bExp, expDiff;
   3987     bits64 aSig0, aSig1, bSig;
   3988     bits64 q, term0, term1, alternateASig0, alternateASig1;
   3989     floatx80 z;
   3990 
   3991     aSig0 = extractFloatx80Frac( a );
   3992     aExp = extractFloatx80Exp( a );
   3993     aSign = extractFloatx80Sign( a );
   3994     bSig = extractFloatx80Frac( b );
   3995     bExp = extractFloatx80Exp( b );
   3996 
   3997     if ( aExp == 0x7FFF ) {
   3998         if (    (bits64) ( aSig0<<1 )
   3999              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   4000             return propagateFloatx80NaN( a, b );
   4001         }
   4002         goto invalid;
   4003     }
   4004     if ( bExp == 0x7FFF ) {
   4005         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   4006         return a;
   4007     }
   4008     if ( bExp == 0 ) {
   4009         if ( bSig == 0 ) {
   4010  invalid:
   4011             float_raise( float_flag_invalid );
   4012             z.low = floatx80_default_nan_low;
   4013             z.high = floatx80_default_nan_high<<X80SHIFT;
   4014             return z;
   4015         }
   4016         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   4017     }
   4018     if ( aExp == 0 ) {
   4019         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
   4020         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4021     }
   4022     bSig |= LIT64( 0x8000000000000000 );
   4023     zSign = aSign;
   4024     expDiff = aExp - bExp;
   4025     aSig1 = 0;
   4026     if ( expDiff < 0 ) {
   4027         if ( expDiff < -1 ) return a;
   4028         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
   4029         expDiff = 0;
   4030     }
   4031     q = ( bSig <= aSig0 );
   4032     if ( q ) aSig0 -= bSig;
   4033     expDiff -= 64;
   4034     while ( 0 < expDiff ) {
   4035         q = estimateDiv128To64( aSig0, aSig1, bSig );
   4036         q = ( 2 < q ) ? q - 2 : 0;
   4037         mul64To128( bSig, q, &term0, &term1 );
   4038         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4039         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
   4040         expDiff -= 62;
   4041     }
   4042     expDiff += 64;
   4043     if ( 0 < expDiff ) {
   4044         q = estimateDiv128To64( aSig0, aSig1, bSig );
   4045         q = ( 2 < q ) ? q - 2 : 0;
   4046         q >>= 64 - expDiff;
   4047         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
   4048         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4049         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
   4050         while ( le128( term0, term1, aSig0, aSig1 ) ) {
   4051             ++q;
   4052             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4053         }
   4054     }
   4055     else {
   4056         term1 = 0;
   4057         term0 = bSig;
   4058     }
   4059     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
   4060     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4061          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4062               && ( q & 1 ) )
   4063        ) {
   4064         aSig0 = alternateASig0;
   4065         aSig1 = alternateASig1;
   4066         zSign = ! zSign;
   4067     }
   4068     return
   4069         normalizeRoundAndPackFloatx80(
   4070             80, zSign, bExp + expDiff, aSig0, aSig1 );
   4071 
   4072 }
   4073 
   4074 /*
   4075 -------------------------------------------------------------------------------
   4076 Returns the square root of the extended double-precision floating-point
   4077 value `a'.  The operation is performed according to the IEC/IEEE Standard
   4078 for Binary Floating-Point Arithmetic.
   4079 -------------------------------------------------------------------------------
   4080 */
   4081 floatx80 floatx80_sqrt( floatx80 a )
   4082 {
   4083     flag aSign;
   4084     int32 aExp, zExp;
   4085     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
   4086     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   4087     floatx80 z;
   4088 
   4089     aSig0 = extractFloatx80Frac( a );
   4090     aExp = extractFloatx80Exp( a );
   4091     aSign = extractFloatx80Sign( a );
   4092     if ( aExp == 0x7FFF ) {
   4093         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
   4094         if ( ! aSign ) return a;
   4095         goto invalid;
   4096     }
   4097     if ( aSign ) {
   4098         if ( ( aExp | aSig0 ) == 0 ) return a;
   4099  invalid:
   4100         float_raise( float_flag_invalid );
   4101         z.low = floatx80_default_nan_low;
   4102         z.high = floatx80_default_nan_high<<X80SHIFT;
   4103         return z;
   4104     }
   4105     if ( aExp == 0 ) {
   4106         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
   4107         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4108     }
   4109     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
   4110     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
   4111     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
   4112     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   4113     doubleZSig0 = zSig0<<1;
   4114     mul64To128( zSig0, zSig0, &term0, &term1 );
   4115     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   4116     while ( (sbits64) rem0 < 0 ) {
   4117         --zSig0;
   4118         doubleZSig0 -= 2;
   4119         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   4120     }
   4121     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   4122     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
   4123         if ( zSig1 == 0 ) zSig1 = 1;
   4124         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   4125         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   4126         mul64To128( zSig1, zSig1, &term2, &term3 );
   4127         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   4128         while ( (sbits64) rem1 < 0 ) {
   4129             --zSig1;
   4130             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   4131             term3 |= 1;
   4132             term2 |= doubleZSig0;
   4133             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   4134         }
   4135         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   4136     }
   4137     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
   4138     zSig0 |= doubleZSig0;
   4139     return
   4140         roundAndPackFloatx80(
   4141             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
   4142 
   4143 }
   4144 
   4145 /*
   4146 -------------------------------------------------------------------------------
   4147 Returns 1 if the extended double-precision floating-point value `a' is
   4148 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   4149 performed according to the IEC/IEEE Standard for Binary Floating-Point
   4150 Arithmetic.
   4151 -------------------------------------------------------------------------------
   4152 */
   4153 flag floatx80_eq_internal( floatx80 a, floatx80 b )
   4154 {
   4155 
   4156     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4157               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4158          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4159               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4160        ) {
   4161         if (    floatx80_is_signaling_nan( a )
   4162              || floatx80_is_signaling_nan( b ) ) {
   4163             float_raise( float_flag_invalid );
   4164         }
   4165         return 0;
   4166     }
   4167     return
   4168            ( a.low == b.low )
   4169         && (    ( a.high == b.high )
   4170              || (    ( a.low == 0 )
   4171                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4172 	   == 0 ) ) );
   4173 
   4174 }
   4175 
   4176 /*
   4177 -------------------------------------------------------------------------------
   4178 Returns 1 if the extended double-precision floating-point value `a' is
   4179 less than or equal to the corresponding value `b', and 0 otherwise.  The
   4180 comparison is performed according to the IEC/IEEE Standard for Binary
   4181 Floating-Point Arithmetic.
   4182 -------------------------------------------------------------------------------
   4183 */
   4184 flag floatx80_le_internal( floatx80 a, floatx80 b )
   4185 {
   4186     flag aSign, bSign;
   4187 
   4188     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4189               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4190          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4191               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4192        ) {
   4193         float_raise( float_flag_invalid );
   4194         return 0;
   4195     }
   4196     aSign = extractFloatx80Sign( a );
   4197     bSign = extractFloatx80Sign( b );
   4198     if ( aSign != bSign ) {
   4199         return
   4200                aSign
   4201             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4202 	         <<1 ) ) | a.low | b.low ) == 0 );
   4203     }
   4204     return
   4205           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4206         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4207 
   4208 }
   4209 
   4210 /*
   4211 -------------------------------------------------------------------------------
   4212 Returns 1 if the extended double-precision floating-point value `a' is
   4213 less than the corresponding value `b', and 0 otherwise.  The comparison
   4214 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4215 Arithmetic.
   4216 -------------------------------------------------------------------------------
   4217 */
   4218 flag floatx80_lt_internal( floatx80 a, floatx80 b )
   4219 {
   4220     flag aSign, bSign;
   4221 
   4222     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4223               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4224          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4225               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4226        ) {
   4227         float_raise( float_flag_invalid );
   4228         return 0;
   4229     }
   4230     aSign = extractFloatx80Sign( a );
   4231     bSign = extractFloatx80Sign( b );
   4232     if ( aSign != bSign ) {
   4233         return
   4234                aSign
   4235             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4236 		 <<1 ) ) | a.low | b.low ) != 0 );
   4237     }
   4238     return
   4239           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4240         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4241 
   4242 }
   4243 
   4244 /*
   4245 -------------------------------------------------------------------------------
   4246 Returns 1 if the extended double-precision floating-point value `a' is equal
   4247 to the corresponding value `b', and 0 otherwise.  The invalid exception is
   4248 raised if either operand is a NaN.  Otherwise, the comparison is performed
   4249 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4250 -------------------------------------------------------------------------------
   4251 */
   4252 flag floatx80_eq_signaling_internal( floatx80 a, floatx80 b )
   4253 {
   4254 
   4255     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4256               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4257          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4258               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4259        ) {
   4260         float_raise( float_flag_invalid );
   4261         return 0;
   4262     }
   4263     return
   4264            ( a.low == b.low )
   4265         && (    ( (a.high>>X80SHIFT) == (b.high>>X80SHIFT) )
   4266              || (    ( a.low == 0 )
   4267                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4268 	    == 0 ) ) );
   4269 
   4270 }
   4271 
   4272 /*
   4273 -------------------------------------------------------------------------------
   4274 Returns 1 if the extended double-precision floating-point value `a' is less
   4275 than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
   4276 do not cause an exception.  Otherwise, the comparison is performed according
   4277 to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4278 -------------------------------------------------------------------------------
   4279 */
   4280 flag floatx80_le_quiet_internal( floatx80 a, floatx80 b )
   4281 {
   4282     flag aSign, bSign;
   4283 
   4284     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4285               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4286          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4287               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4288        ) {
   4289         if (    floatx80_is_signaling_nan( a )
   4290              || floatx80_is_signaling_nan( b ) ) {
   4291             float_raise( float_flag_invalid );
   4292         }
   4293         return 0;
   4294     }
   4295     aSign = extractFloatx80Sign( a );
   4296     bSign = extractFloatx80Sign( b );
   4297     if ( aSign != bSign ) {
   4298         return
   4299                aSign
   4300             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1
   4301 		 ) ) | a.low | b.low ) == 0 );
   4302     }
   4303     return
   4304           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4305         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4306 
   4307 }
   4308 
   4309 /*
   4310 -------------------------------------------------------------------------------
   4311 Returns 1 if the extended double-precision floating-point value `a' is less
   4312 than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
   4313 an exception.  Otherwise, the comparison is performed according to the
   4314 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4315 -------------------------------------------------------------------------------
   4316 */
   4317 flag floatx80_lt_quiet_internal( floatx80 a, floatx80 b )
   4318 {
   4319     flag aSign, bSign;
   4320 
   4321     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4322               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4323          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4324               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4325        ) {
   4326         if (    floatx80_is_signaling_nan( a )
   4327              || floatx80_is_signaling_nan( b ) ) {
   4328             float_raise( float_flag_invalid );
   4329         }
   4330         return 0;
   4331     }
   4332     aSign = extractFloatx80Sign( a );
   4333     bSign = extractFloatx80Sign( b );
   4334     if ( aSign != bSign ) {
   4335         return
   4336                aSign
   4337             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4338 		 ) | a.low | b.low ) != 0 );
   4339     }
   4340     return
   4341           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4342         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4343 
   4344 }
   4345 
   4346 #endif
   4347 
   4348 #ifdef FLOAT128
   4349 
   4350 /*
   4351 -------------------------------------------------------------------------------
   4352 Returns the result of converting the quadruple-precision floating-point
   4353 value `a' to the 32-bit two's complement integer format.  The conversion
   4354 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4355 Arithmetic---which means in particular that the conversion is rounded
   4356 according to the current rounding mode.  If `a' is a NaN, the largest
   4357 positive integer is returned.  Otherwise, if the conversion overflows, the
   4358 largest integer with the same sign as `a' is returned.
   4359 -------------------------------------------------------------------------------
   4360 */
   4361 int32 float128_to_int32( float128 a )
   4362 {
   4363     flag aSign;
   4364     int32 aExp, shiftCount;
   4365     bits64 aSig0, aSig1;
   4366 
   4367     aSig1 = extractFloat128Frac1( a );
   4368     aSig0 = extractFloat128Frac0( a );
   4369     aExp = extractFloat128Exp( a );
   4370     aSign = extractFloat128Sign( a );
   4371     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   4372     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4373     aSig0 |= ( aSig1 != 0 );
   4374     shiftCount = 0x4028 - aExp;
   4375     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
   4376     return roundAndPackInt32( aSign, aSig0 );
   4377 
   4378 }
   4379 
   4380 /*
   4381 -------------------------------------------------------------------------------
   4382 Returns the result of converting the quadruple-precision floating-point
   4383 value `a' to the 32-bit two's complement integer format.  The conversion
   4384 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4385 Arithmetic, except that the conversion is always rounded toward zero.  If
   4386 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   4387 conversion overflows, the largest integer with the same sign as `a' is
   4388 returned.
   4389 -------------------------------------------------------------------------------
   4390 */
   4391 int32 float128_to_int32_round_to_zero( float128 a )
   4392 {
   4393     flag aSign;
   4394     int32 aExp, shiftCount;
   4395     bits64 aSig0, aSig1, savedASig;
   4396     int32 z;
   4397 
   4398     aSig1 = extractFloat128Frac1( a );
   4399     aSig0 = extractFloat128Frac0( a );
   4400     aExp = extractFloat128Exp( a );
   4401     aSign = extractFloat128Sign( a );
   4402     aSig0 |= ( aSig1 != 0 );
   4403     if ( 0x401E < aExp ) {
   4404         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
   4405         goto invalid;
   4406     }
   4407     else if ( aExp < 0x3FFF ) {
   4408         if ( aExp || aSig0 ) set_float_exception_inexact_flag();
   4409         return 0;
   4410     }
   4411     aSig0 |= LIT64( 0x0001000000000000 );
   4412     shiftCount = 0x402F - aExp;
   4413     savedASig = aSig0;
   4414     aSig0 >>= shiftCount;
   4415     z = (int32)aSig0;
   4416     if ( aSign ) z = - z;
   4417     if ( ( z < 0 ) ^ aSign ) {
   4418  invalid:
   4419         float_raise( float_flag_invalid );
   4420         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   4421     }
   4422     if ( ( aSig0<<shiftCount ) != savedASig ) {
   4423         set_float_exception_inexact_flag();
   4424     }
   4425     return z;
   4426 
   4427 }
   4428 
   4429 /*
   4430 -------------------------------------------------------------------------------
   4431 Returns the result of converting the quadruple-precision floating-point
   4432 value `a' to the 64-bit two's complement integer format.  The conversion
   4433 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4434 Arithmetic---which means in particular that the conversion is rounded
   4435 according to the current rounding mode.  If `a' is a NaN, the largest
   4436 positive integer is returned.  Otherwise, if the conversion overflows, the
   4437 largest integer with the same sign as `a' is returned.
   4438 -------------------------------------------------------------------------------
   4439 */
   4440 int64 float128_to_int64( float128 a )
   4441 {
   4442     flag aSign;
   4443     int32 aExp, shiftCount;
   4444     bits64 aSig0, aSig1;
   4445 
   4446     aSig1 = extractFloat128Frac1( a );
   4447     aSig0 = extractFloat128Frac0( a );
   4448     aExp = extractFloat128Exp( a );
   4449     aSign = extractFloat128Sign( a );
   4450     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4451     shiftCount = 0x402F - aExp;
   4452     if ( shiftCount <= 0 ) {
   4453         if ( 0x403E < aExp ) {
   4454             float_raise( float_flag_invalid );
   4455             if (    ! aSign
   4456                  || (    ( aExp == 0x7FFF )
   4457                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
   4458                     )
   4459                ) {
   4460                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   4461             }
   4462             return (sbits64) LIT64( 0x8000000000000000 );
   4463         }
   4464         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
   4465     }
   4466     else {
   4467         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
   4468     }
   4469     return roundAndPackInt64( aSign, aSig0, aSig1 );
   4470 
   4471 }
   4472 
   4473 /*
   4474 -------------------------------------------------------------------------------
   4475 Returns the result of converting the quadruple-precision floating-point
   4476 value `a' to the 64-bit two's complement integer format.  The conversion
   4477 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4478 Arithmetic, except that the conversion is always rounded toward zero.
   4479 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   4480 the conversion overflows, the largest integer with the same sign as `a' is
   4481 returned.
   4482 -------------------------------------------------------------------------------
   4483 */
   4484 int64 float128_to_int64_round_to_zero( float128 a )
   4485 {
   4486     flag aSign;
   4487     int32 aExp, shiftCount;
   4488     bits64 aSig0, aSig1;
   4489     int64 z;
   4490 
   4491     aSig1 = extractFloat128Frac1( a );
   4492     aSig0 = extractFloat128Frac0( a );
   4493     aExp = extractFloat128Exp( a );
   4494     aSign = extractFloat128Sign( a );
   4495     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4496     shiftCount = aExp - 0x402F;
   4497     if ( 0 < shiftCount ) {
   4498         if ( 0x403E <= aExp ) {
   4499             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4500             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4501                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4502                 if ( aSig1 ) set_float_exception_inexact_flag();
   4503             }
   4504             else {
   4505                 float_raise( float_flag_invalid );
   4506                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
   4507                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   4508                 }
   4509             }
   4510             return (sbits64) LIT64( 0x8000000000000000 );
   4511         }
   4512         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4513         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4514             set_float_exception_inexact_flag();
   4515         }
   4516     }
   4517     else {
   4518         if ( aExp < 0x3FFF ) {
   4519             if ( aExp | aSig0 | aSig1 ) {
   4520                 set_float_exception_inexact_flag();
   4521             }
   4522             return 0;
   4523         }
   4524         z = aSig0>>( - shiftCount );
   4525         if (    aSig1
   4526              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4527             set_float_exception_inexact_flag();
   4528         }
   4529     }
   4530     if ( aSign ) z = - z;
   4531     return z;
   4532 
   4533 }
   4534 
   4535 #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
   4536     && defined(SOFTFLOAT_NEED_FIXUNS)
   4537 /*
   4538  * just like above - but do not care for overflow of signed results
   4539  */
   4540 uint64 float128_to_uint64_round_to_zero( float128 a )
   4541 {
   4542     flag aSign;
   4543     int32 aExp, shiftCount;
   4544     bits64 aSig0, aSig1;
   4545     uint64 z;
   4546 
   4547     aSig1 = extractFloat128Frac1( a );
   4548     aSig0 = extractFloat128Frac0( a );
   4549     aExp = extractFloat128Exp( a );
   4550     aSign = extractFloat128Sign( a );
   4551     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4552     shiftCount = aExp - 0x402F;
   4553     if ( 0 < shiftCount ) {
   4554         if ( 0x403F <= aExp ) {
   4555             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4556             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4557                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4558                 if ( aSig1 ) set_float_exception_inexact_flag();
   4559             }
   4560             else {
   4561                 float_raise( float_flag_invalid );
   4562             }
   4563             return LIT64( 0xFFFFFFFFFFFFFFFF );
   4564         }
   4565         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4566         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4567             set_float_exception_inexact_flag();
   4568         }
   4569     }
   4570     else {
   4571         if ( aExp < 0x3FFF ) {
   4572             if ( aExp | aSig0 | aSig1 ) {
   4573                 set_float_exception_inexact_flag();
   4574             }
   4575             return 0;
   4576         }
   4577         z = aSig0>>( - shiftCount );
   4578         if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4579             set_float_exception_inexact_flag();
   4580         }
   4581     }
   4582     if ( aSign ) z = - z;
   4583     return z;
   4584 
   4585 }
   4586 #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
   4587 
   4588 /*
   4589 -------------------------------------------------------------------------------
   4590 Returns the result of converting the quadruple-precision floating-point
   4591 value `a' to the single-precision floating-point format.  The conversion
   4592 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4593 Arithmetic.
   4594 -------------------------------------------------------------------------------
   4595 */
   4596 float32 float128_to_float32( float128 a )
   4597 {
   4598     flag aSign;
   4599     int32 aExp;
   4600     bits64 aSig0, aSig1;
   4601     bits32 zSig;
   4602 
   4603     aSig1 = extractFloat128Frac1( a );
   4604     aSig0 = extractFloat128Frac0( a );
   4605     aExp = extractFloat128Exp( a );
   4606     aSign = extractFloat128Sign( a );
   4607     if ( aExp == 0x7FFF ) {
   4608         if ( aSig0 | aSig1 ) {
   4609             return commonNaNToFloat32( float128ToCommonNaN( a ) );
   4610         }
   4611         return packFloat32( aSign, 0xFF, 0 );
   4612     }
   4613     aSig0 |= ( aSig1 != 0 );
   4614     shift64RightJamming( aSig0, 18, &aSig0 );
   4615     zSig = (bits32)aSig0;
   4616     if ( aExp || zSig ) {
   4617         zSig |= 0x40000000;
   4618         aExp -= 0x3F81;
   4619     }
   4620     return roundAndPackFloat32( aSign, aExp, zSig );
   4621 
   4622 }
   4623 
   4624 /*
   4625 -------------------------------------------------------------------------------
   4626 Returns the result of converting the quadruple-precision floating-point
   4627 value `a' to the double-precision floating-point format.  The conversion
   4628 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4629 Arithmetic.
   4630 -------------------------------------------------------------------------------
   4631 */
   4632 float64 float128_to_float64( float128 a )
   4633 {
   4634     flag aSign;
   4635     int32 aExp;
   4636     bits64 aSig0, aSig1;
   4637 
   4638     aSig1 = extractFloat128Frac1( a );
   4639     aSig0 = extractFloat128Frac0( a );
   4640     aExp = extractFloat128Exp( a );
   4641     aSign = extractFloat128Sign( a );
   4642     if ( aExp == 0x7FFF ) {
   4643         if ( aSig0 | aSig1 ) {
   4644             return commonNaNToFloat64( float128ToCommonNaN( a ) );
   4645         }
   4646         return packFloat64( aSign, 0x7FF, 0 );
   4647     }
   4648     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4649     aSig0 |= ( aSig1 != 0 );
   4650     if ( aExp || aSig0 ) {
   4651         aSig0 |= LIT64( 0x4000000000000000 );
   4652         aExp -= 0x3C01;
   4653     }
   4654     return roundAndPackFloat64( aSign, aExp, aSig0 );
   4655 
   4656 }
   4657 
   4658 #ifdef FLOATX80
   4659 
   4660 /*
   4661 -------------------------------------------------------------------------------
   4662 Returns the result of converting the quadruple-precision floating-point
   4663 value `a' to the extended double-precision floating-point format.  The
   4664 conversion is performed according to the IEC/IEEE Standard for Binary
   4665 Floating-Point Arithmetic.
   4666 -------------------------------------------------------------------------------
   4667 */
   4668 floatx80 float128_to_floatx80( float128 a )
   4669 {
   4670     flag aSign;
   4671     int32 aExp;
   4672     bits64 aSig0, aSig1;
   4673 
   4674     aSig1 = extractFloat128Frac1( a );
   4675     aSig0 = extractFloat128Frac0( a );
   4676     aExp = extractFloat128Exp( a );
   4677     aSign = extractFloat128Sign( a );
   4678     if ( aExp == 0x7FFF ) {
   4679         if ( aSig0 | aSig1 ) {
   4680             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
   4681         }
   4682         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   4683     }
   4684     if ( aExp == 0 ) {
   4685         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
   4686         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4687     }
   4688     else {
   4689         aSig0 |= LIT64( 0x0001000000000000 );
   4690     }
   4691     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
   4692     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
   4693 
   4694 }
   4695 
   4696 #endif
   4697 
   4698 /*
   4699 -------------------------------------------------------------------------------
   4700 Rounds the quadruple-precision floating-point value `a' to an integer, and
   4701 returns the result as a quadruple-precision floating-point value.  The
   4702 operation is performed according to the IEC/IEEE Standard for Binary
   4703 Floating-Point Arithmetic.
   4704 -------------------------------------------------------------------------------
   4705 */
   4706 float128 float128_round_to_int( float128 a )
   4707 {
   4708     flag aSign;
   4709     int32 aExp;
   4710     bits64 lastBitMask, roundBitsMask;
   4711     int8 roundingMode;
   4712     float128 z;
   4713 
   4714     aExp = extractFloat128Exp( a );
   4715     if ( 0x402F <= aExp ) {
   4716         if ( 0x406F <= aExp ) {
   4717             if (    ( aExp == 0x7FFF )
   4718                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
   4719                ) {
   4720                 return propagateFloat128NaN( a, a );
   4721             }
   4722             return a;
   4723         }
   4724         lastBitMask = 1;
   4725         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
   4726         roundBitsMask = lastBitMask - 1;
   4727         z = a;
   4728         roundingMode = float_rounding_mode;
   4729         if ( roundingMode == float_round_nearest_even ) {
   4730             if ( lastBitMask ) {
   4731                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   4732                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   4733             }
   4734             else {
   4735                 if ( (sbits64) z.low < 0 ) {
   4736                     ++z.high;
   4737                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
   4738                 }
   4739             }
   4740         }
   4741         else if ( roundingMode != float_round_to_zero ) {
   4742             if (   extractFloat128Sign( z )
   4743                  ^ ( roundingMode == float_round_up ) ) {
   4744                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   4745             }
   4746         }
   4747         z.low &= ~ roundBitsMask;
   4748     }
   4749     else {
   4750         if ( aExp < 0x3FFF ) {
   4751             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   4752             set_float_exception_inexact_flag();
   4753             aSign = extractFloat128Sign( a );
   4754             switch ( float_rounding_mode ) {
   4755              case float_round_nearest_even:
   4756                 if (    ( aExp == 0x3FFE )
   4757                      && (   extractFloat128Frac0( a )
   4758                           | extractFloat128Frac1( a ) )
   4759                    ) {
   4760                     return packFloat128( aSign, 0x3FFF, 0, 0 );
   4761                 }
   4762                 break;
   4763 	     case float_round_to_zero:
   4764 		break;
   4765              case float_round_down:
   4766                 return
   4767                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
   4768                     : packFloat128( 0, 0, 0, 0 );
   4769              case float_round_up:
   4770                 return
   4771                       aSign ? packFloat128( 1, 0, 0, 0 )
   4772                     : packFloat128( 0, 0x3FFF, 0, 0 );
   4773             }
   4774             return packFloat128( aSign, 0, 0, 0 );
   4775         }
   4776         lastBitMask = 1;
   4777         lastBitMask <<= 0x402F - aExp;
   4778         roundBitsMask = lastBitMask - 1;
   4779         z.low = 0;
   4780         z.high = a.high;
   4781         roundingMode = float_rounding_mode;
   4782         if ( roundingMode == float_round_nearest_even ) {
   4783             z.high += lastBitMask>>1;
   4784             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   4785                 z.high &= ~ lastBitMask;
   4786             }
   4787         }
   4788         else if ( roundingMode != float_round_to_zero ) {
   4789             if (   extractFloat128Sign( z )
   4790                  ^ ( roundingMode == float_round_up ) ) {
   4791                 z.high |= ( a.low != 0 );
   4792                 z.high += roundBitsMask;
   4793             }
   4794         }
   4795         z.high &= ~ roundBitsMask;
   4796     }
   4797     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   4798         set_float_exception_inexact_flag();
   4799     }
   4800     return z;
   4801 
   4802 }
   4803 
   4804 /*
   4805 -------------------------------------------------------------------------------
   4806 Returns the result of adding the absolute values of the quadruple-precision
   4807 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   4808 before being returned.  `zSign' is ignored if the result is a NaN.
   4809 The addition is performed according to the IEC/IEEE Standard for Binary
   4810 Floating-Point Arithmetic.
   4811 -------------------------------------------------------------------------------
   4812 */
   4813 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
   4814 {
   4815     int32 aExp, bExp, zExp;
   4816     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4817     int32 expDiff;
   4818 
   4819     aSig1 = extractFloat128Frac1( a );
   4820     aSig0 = extractFloat128Frac0( a );
   4821     aExp = extractFloat128Exp( a );
   4822     bSig1 = extractFloat128Frac1( b );
   4823     bSig0 = extractFloat128Frac0( b );
   4824     bExp = extractFloat128Exp( b );
   4825     expDiff = aExp - bExp;
   4826     if ( 0 < expDiff ) {
   4827         if ( aExp == 0x7FFF ) {
   4828             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4829             return a;
   4830         }
   4831         if ( bExp == 0 ) {
   4832             --expDiff;
   4833         }
   4834         else {
   4835             bSig0 |= LIT64( 0x0001000000000000 );
   4836         }
   4837         shift128ExtraRightJamming(
   4838             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   4839         zExp = aExp;
   4840     }
   4841     else if ( expDiff < 0 ) {
   4842         if ( bExp == 0x7FFF ) {
   4843             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4844             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4845         }
   4846         if ( aExp == 0 ) {
   4847             ++expDiff;
   4848         }
   4849         else {
   4850             aSig0 |= LIT64( 0x0001000000000000 );
   4851         }
   4852         shift128ExtraRightJamming(
   4853             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   4854         zExp = bExp;
   4855     }
   4856     else {
   4857         if ( aExp == 0x7FFF ) {
   4858             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4859                 return propagateFloat128NaN( a, b );
   4860             }
   4861             return a;
   4862         }
   4863         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4864         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
   4865         zSig2 = 0;
   4866         zSig0 |= LIT64( 0x0002000000000000 );
   4867         zExp = aExp;
   4868         goto shiftRight1;
   4869     }
   4870     aSig0 |= LIT64( 0x0001000000000000 );
   4871     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4872     --zExp;
   4873     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
   4874     ++zExp;
   4875  shiftRight1:
   4876     shift128ExtraRightJamming(
   4877         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4878  roundAndPack:
   4879     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4880 
   4881 }
   4882 
   4883 /*
   4884 -------------------------------------------------------------------------------
   4885 Returns the result of subtracting the absolute values of the quadruple-
   4886 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   4887 difference is negated before being returned.  `zSign' is ignored if the
   4888 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   4889 Standard for Binary Floating-Point Arithmetic.
   4890 -------------------------------------------------------------------------------
   4891 */
   4892 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
   4893 {
   4894     int32 aExp, bExp, zExp;
   4895     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   4896     int32 expDiff;
   4897     float128 z;
   4898 
   4899     aSig1 = extractFloat128Frac1( a );
   4900     aSig0 = extractFloat128Frac0( a );
   4901     aExp = extractFloat128Exp( a );
   4902     bSig1 = extractFloat128Frac1( b );
   4903     bSig0 = extractFloat128Frac0( b );
   4904     bExp = extractFloat128Exp( b );
   4905     expDiff = aExp - bExp;
   4906     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4907     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
   4908     if ( 0 < expDiff ) goto aExpBigger;
   4909     if ( expDiff < 0 ) goto bExpBigger;
   4910     if ( aExp == 0x7FFF ) {
   4911         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4912             return propagateFloat128NaN( a, b );
   4913         }
   4914         float_raise( float_flag_invalid );
   4915         z.low = float128_default_nan_low;
   4916         z.high = float128_default_nan_high;
   4917         return z;
   4918     }
   4919     if ( aExp == 0 ) {
   4920         aExp = 1;
   4921         bExp = 1;
   4922     }
   4923     if ( bSig0 < aSig0 ) goto aBigger;
   4924     if ( aSig0 < bSig0 ) goto bBigger;
   4925     if ( bSig1 < aSig1 ) goto aBigger;
   4926     if ( aSig1 < bSig1 ) goto bBigger;
   4927     return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
   4928  bExpBigger:
   4929     if ( bExp == 0x7FFF ) {
   4930         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4931         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
   4932     }
   4933     if ( aExp == 0 ) {
   4934         ++expDiff;
   4935     }
   4936     else {
   4937         aSig0 |= LIT64( 0x4000000000000000 );
   4938     }
   4939     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   4940     bSig0 |= LIT64( 0x4000000000000000 );
   4941  bBigger:
   4942     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4943     zExp = bExp;
   4944     zSign ^= 1;
   4945     goto normalizeRoundAndPack;
   4946  aExpBigger:
   4947     if ( aExp == 0x7FFF ) {
   4948         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4949         return a;
   4950     }
   4951     if ( bExp == 0 ) {
   4952         --expDiff;
   4953     }
   4954     else {
   4955         bSig0 |= LIT64( 0x4000000000000000 );
   4956     }
   4957     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   4958     aSig0 |= LIT64( 0x4000000000000000 );
   4959  aBigger:
   4960     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4961     zExp = aExp;
   4962  normalizeRoundAndPack:
   4963     --zExp;
   4964     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
   4965 
   4966 }
   4967 
   4968 /*
   4969 -------------------------------------------------------------------------------
   4970 Returns the result of adding the quadruple-precision floating-point values
   4971 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   4972 for Binary Floating-Point Arithmetic.
   4973 -------------------------------------------------------------------------------
   4974 */
   4975 float128 float128_add( float128 a, float128 b )
   4976 {
   4977     flag aSign, bSign;
   4978 
   4979     aSign = extractFloat128Sign( a );
   4980     bSign = extractFloat128Sign( b );
   4981     if ( aSign == bSign ) {
   4982         return addFloat128Sigs( a, b, aSign );
   4983     }
   4984     else {
   4985         return subFloat128Sigs( a, b, aSign );
   4986     }
   4987 
   4988 }
   4989 
   4990 /*
   4991 -------------------------------------------------------------------------------
   4992 Returns the result of subtracting the quadruple-precision floating-point
   4993 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4994 Standard for Binary Floating-Point Arithmetic.
   4995 -------------------------------------------------------------------------------
   4996 */
   4997 float128 float128_sub( float128 a, float128 b )
   4998 {
   4999     flag aSign, bSign;
   5000 
   5001     aSign = extractFloat128Sign( a );
   5002     bSign = extractFloat128Sign( b );
   5003     if ( aSign == bSign ) {
   5004         return subFloat128Sigs( a, b, aSign );
   5005     }
   5006     else {
   5007         return addFloat128Sigs( a, b, aSign );
   5008     }
   5009 
   5010 }
   5011 
   5012 /*
   5013 -------------------------------------------------------------------------------
   5014 Returns the result of multiplying the quadruple-precision floating-point
   5015 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   5016 Standard for Binary Floating-Point Arithmetic.
   5017 -------------------------------------------------------------------------------
   5018 */
   5019 float128 float128_mul( float128 a, float128 b )
   5020 {
   5021     flag aSign, bSign, zSign;
   5022     int32 aExp, bExp, zExp;
   5023     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   5024     float128 z;
   5025 
   5026     aSig1 = extractFloat128Frac1( a );
   5027     aSig0 = extractFloat128Frac0( a );
   5028     aExp = extractFloat128Exp( a );
   5029     aSign = extractFloat128Sign( a );
   5030     bSig1 = extractFloat128Frac1( b );
   5031     bSig0 = extractFloat128Frac0( b );
   5032     bExp = extractFloat128Exp( b );
   5033     bSign = extractFloat128Sign( b );
   5034     zSign = aSign ^ bSign;
   5035     if ( aExp == 0x7FFF ) {
   5036         if (    ( aSig0 | aSig1 )
   5037              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5038             return propagateFloat128NaN( a, b );
   5039         }
   5040         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   5041         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5042     }
   5043     if ( bExp == 0x7FFF ) {
   5044         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5045         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5046  invalid:
   5047             float_raise( float_flag_invalid );
   5048             z.low = float128_default_nan_low;
   5049             z.high = float128_default_nan_high;
   5050             return z;
   5051         }
   5052         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5053     }
   5054     if ( aExp == 0 ) {
   5055         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5056         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5057     }
   5058     if ( bExp == 0 ) {
   5059         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5060         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5061     }
   5062     zExp = aExp + bExp - 0x4000;
   5063     aSig0 |= LIT64( 0x0001000000000000 );
   5064     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
   5065     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   5066     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   5067     zSig2 |= ( zSig3 != 0 );
   5068     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
   5069         shift128ExtraRightJamming(
   5070             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   5071         ++zExp;
   5072     }
   5073     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5074 
   5075 }
   5076 
   5077 /*
   5078 -------------------------------------------------------------------------------
   5079 Returns the result of dividing the quadruple-precision floating-point value
   5080 `a' by the corresponding value `b'.  The operation is performed according to
   5081 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5082 -------------------------------------------------------------------------------
   5083 */
   5084 float128 float128_div( float128 a, float128 b )
   5085 {
   5086     flag aSign, bSign, zSign;
   5087     int32 aExp, bExp, zExp;
   5088     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   5089     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5090     float128 z;
   5091 
   5092     aSig1 = extractFloat128Frac1( a );
   5093     aSig0 = extractFloat128Frac0( a );
   5094     aExp = extractFloat128Exp( a );
   5095     aSign = extractFloat128Sign( a );
   5096     bSig1 = extractFloat128Frac1( b );
   5097     bSig0 = extractFloat128Frac0( b );
   5098     bExp = extractFloat128Exp( b );
   5099     bSign = extractFloat128Sign( b );
   5100     zSign = aSign ^ bSign;
   5101     if ( aExp == 0x7FFF ) {
   5102         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   5103         if ( bExp == 0x7FFF ) {
   5104             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5105             goto invalid;
   5106         }
   5107         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5108     }
   5109     if ( bExp == 0x7FFF ) {
   5110         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5111         return packFloat128( zSign, 0, 0, 0 );
   5112     }
   5113     if ( bExp == 0 ) {
   5114         if ( ( bSig0 | bSig1 ) == 0 ) {
   5115             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5116  invalid:
   5117                 float_raise( float_flag_invalid );
   5118                 z.low = float128_default_nan_low;
   5119                 z.high = float128_default_nan_high;
   5120                 return z;
   5121             }
   5122             float_raise( float_flag_divbyzero );
   5123             return packFloat128( zSign, 0x7FFF, 0, 0 );
   5124         }
   5125         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5126     }
   5127     if ( aExp == 0 ) {
   5128         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5129         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5130     }
   5131     zExp = aExp - bExp + 0x3FFD;
   5132     shortShift128Left(
   5133         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
   5134     shortShift128Left(
   5135         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5136     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
   5137         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   5138         ++zExp;
   5139     }
   5140     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5141     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   5142     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   5143     while ( (sbits64) rem0 < 0 ) {
   5144         --zSig0;
   5145         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   5146     }
   5147     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
   5148     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
   5149         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   5150         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   5151         while ( (sbits64) rem1 < 0 ) {
   5152             --zSig1;
   5153             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   5154         }
   5155         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5156     }
   5157     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
   5158     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5159 
   5160 }
   5161 
   5162 /*
   5163 -------------------------------------------------------------------------------
   5164 Returns the remainder of the quadruple-precision floating-point value `a'
   5165 with respect to the corresponding value `b'.  The operation is performed
   5166 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5167 -------------------------------------------------------------------------------
   5168 */
   5169 float128 float128_rem( float128 a, float128 b )
   5170 {
   5171     flag aSign, zSign;
   5172     int32 aExp, bExp, expDiff;
   5173     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   5174     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
   5175     sbits64 sigMean0;
   5176     float128 z;
   5177 
   5178     aSig1 = extractFloat128Frac1( a );
   5179     aSig0 = extractFloat128Frac0( a );
   5180     aExp = extractFloat128Exp( a );
   5181     aSign = extractFloat128Sign( a );
   5182     bSig1 = extractFloat128Frac1( b );
   5183     bSig0 = extractFloat128Frac0( b );
   5184     bExp = extractFloat128Exp( b );
   5185     if ( aExp == 0x7FFF ) {
   5186         if (    ( aSig0 | aSig1 )
   5187              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5188             return propagateFloat128NaN( a, b );
   5189         }
   5190         goto invalid;
   5191     }
   5192     if ( bExp == 0x7FFF ) {
   5193         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5194         return a;
   5195     }
   5196     if ( bExp == 0 ) {
   5197         if ( ( bSig0 | bSig1 ) == 0 ) {
   5198  invalid:
   5199             float_raise( float_flag_invalid );
   5200             z.low = float128_default_nan_low;
   5201             z.high = float128_default_nan_high;
   5202             return z;
   5203         }
   5204         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5205     }
   5206     if ( aExp == 0 ) {
   5207         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   5208         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5209     }
   5210     expDiff = aExp - bExp;
   5211     if ( expDiff < -1 ) return a;
   5212     shortShift128Left(
   5213         aSig0 | LIT64( 0x0001000000000000 ),
   5214         aSig1,
   5215         15 - ( expDiff < 0 ),
   5216         &aSig0,
   5217         &aSig1
   5218     );
   5219     shortShift128Left(
   5220         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5221     q = le128( bSig0, bSig1, aSig0, aSig1 );
   5222     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5223     expDiff -= 64;
   5224     while ( 0 < expDiff ) {
   5225         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5226         q = ( 4 < q ) ? q - 4 : 0;
   5227         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5228         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
   5229         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
   5230         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   5231         expDiff -= 61;
   5232     }
   5233     if ( -64 < expDiff ) {
   5234         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5235         q = ( 4 < q ) ? q - 4 : 0;
   5236         q >>= - expDiff;
   5237         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5238         expDiff += 52;
   5239         if ( expDiff < 0 ) {
   5240             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   5241         }
   5242         else {
   5243             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   5244         }
   5245         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5246         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   5247     }
   5248     else {
   5249         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
   5250         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5251     }
   5252     do {
   5253         alternateASig0 = aSig0;
   5254         alternateASig1 = aSig1;
   5255         ++q;
   5256         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5257     } while ( 0 <= (sbits64) aSig0 );
   5258     add128(
   5259         aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
   5260     if (    ( sigMean0 < 0 )
   5261          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   5262         aSig0 = alternateASig0;
   5263         aSig1 = alternateASig1;
   5264     }
   5265     zSign = ( (sbits64) aSig0 < 0 );
   5266     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   5267     return
   5268         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   5269 
   5270 }
   5271 
   5272 /*
   5273 -------------------------------------------------------------------------------
   5274 Returns the square root of the quadruple-precision floating-point value `a'.
   5275 The operation is performed according to the IEC/IEEE Standard for Binary
   5276 Floating-Point Arithmetic.
   5277 -------------------------------------------------------------------------------
   5278 */
   5279 float128 float128_sqrt( float128 a )
   5280 {
   5281     flag aSign;
   5282     int32 aExp, zExp;
   5283     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   5284     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5285     float128 z;
   5286 
   5287     aSig1 = extractFloat128Frac1( a );
   5288     aSig0 = extractFloat128Frac0( a );
   5289     aExp = extractFloat128Exp( a );
   5290     aSign = extractFloat128Sign( a );
   5291     if ( aExp == 0x7FFF ) {
   5292         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
   5293         if ( ! aSign ) return a;
   5294         goto invalid;
   5295     }
   5296     if ( aSign ) {
   5297         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   5298  invalid:
   5299         float_raise( float_flag_invalid );
   5300         z.low = float128_default_nan_low;
   5301         z.high = float128_default_nan_high;
   5302         return z;
   5303     }
   5304     if ( aExp == 0 ) {
   5305         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
   5306         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5307     }
   5308     zExp = (int32) ( (bits32)(aExp - 0x3FFF) >> 1) + 0x3FFE;
   5309     aSig0 |= LIT64( 0x0001000000000000 );
   5310     zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
   5311     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
   5312     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   5313     doubleZSig0 = zSig0<<1;
   5314     mul64To128( zSig0, zSig0, &term0, &term1 );
   5315     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   5316     while ( (sbits64) rem0 < 0 ) {
   5317         --zSig0;
   5318         doubleZSig0 -= 2;
   5319         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   5320     }
   5321     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   5322     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
   5323         if ( zSig1 == 0 ) zSig1 = 1;
   5324         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   5325         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   5326         mul64To128( zSig1, zSig1, &term2, &term3 );
   5327         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   5328         while ( (sbits64) rem1 < 0 ) {
   5329             --zSig1;
   5330             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   5331             term3 |= 1;
   5332             term2 |= doubleZSig0;
   5333             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   5334         }
   5335         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5336     }
   5337     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
   5338     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
   5339 
   5340 }
   5341 
   5342 /*
   5343 -------------------------------------------------------------------------------
   5344 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5345 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5346 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5347 -------------------------------------------------------------------------------
   5348 */
   5349 flag float128_eq( float128 a, float128 b )
   5350 {
   5351 
   5352     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5353               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5354          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5355               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5356        ) {
   5357         if (    float128_is_signaling_nan( a )
   5358              || float128_is_signaling_nan( b ) ) {
   5359             float_raise( float_flag_invalid );
   5360         }
   5361         return 0;
   5362     }
   5363     return
   5364            ( a.low == b.low )
   5365         && (    ( a.high == b.high )
   5366              || (    ( a.low == 0 )
   5367                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5368            );
   5369 
   5370 }
   5371 
   5372 /*
   5373 -------------------------------------------------------------------------------
   5374 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5375 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   5376 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   5377 Arithmetic.
   5378 -------------------------------------------------------------------------------
   5379 */
   5380 flag float128_le( float128 a, float128 b )
   5381 {
   5382     flag aSign, bSign;
   5383 
   5384     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5385               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5386          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5387               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5388        ) {
   5389         float_raise( float_flag_invalid );
   5390         return 0;
   5391     }
   5392     aSign = extractFloat128Sign( a );
   5393     bSign = extractFloat128Sign( b );
   5394     if ( aSign != bSign ) {
   5395         return
   5396                aSign
   5397             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5398                  == 0 );
   5399     }
   5400     return
   5401           aSign ? le128( b.high, b.low, a.high, a.low )
   5402         : le128( a.high, a.low, b.high, b.low );
   5403 
   5404 }
   5405 
   5406 /*
   5407 -------------------------------------------------------------------------------
   5408 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5409 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5410 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5411 -------------------------------------------------------------------------------
   5412 */
   5413 flag float128_lt( float128 a, float128 b )
   5414 {
   5415     flag aSign, bSign;
   5416 
   5417     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5418               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5419          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5420               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5421        ) {
   5422         float_raise( float_flag_invalid );
   5423         return 0;
   5424     }
   5425     aSign = extractFloat128Sign( a );
   5426     bSign = extractFloat128Sign( b );
   5427     if ( aSign != bSign ) {
   5428         return
   5429                aSign
   5430             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5431                  != 0 );
   5432     }
   5433     return
   5434           aSign ? lt128( b.high, b.low, a.high, a.low )
   5435         : lt128( a.high, a.low, b.high, b.low );
   5436 
   5437 }
   5438 
   5439 /*
   5440 -------------------------------------------------------------------------------
   5441 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5442 the corresponding value `b', and 0 otherwise.  The invalid exception is
   5443 raised if either operand is a NaN.  Otherwise, the comparison is performed
   5444 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5445 -------------------------------------------------------------------------------
   5446 */
   5447 flag float128_eq_signaling( float128 a, float128 b )
   5448 {
   5449 
   5450     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5451               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5452          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5453               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5454        ) {
   5455         float_raise( float_flag_invalid );
   5456         return 0;
   5457     }
   5458     return
   5459            ( a.low == b.low )
   5460         && (    ( a.high == b.high )
   5461              || (    ( a.low == 0 )
   5462                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5463            );
   5464 
   5465 }
   5466 
   5467 /*
   5468 -------------------------------------------------------------------------------
   5469 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5470 or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   5471 cause an exception.  Otherwise, the comparison is performed according to the
   5472 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5473 -------------------------------------------------------------------------------
   5474 */
   5475 flag float128_le_quiet( float128 a, float128 b )
   5476 {
   5477     flag aSign, bSign;
   5478 
   5479     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5480               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5481          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5482               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5483        ) {
   5484         if (    float128_is_signaling_nan( a )
   5485              || float128_is_signaling_nan( b ) ) {
   5486             float_raise( float_flag_invalid );
   5487         }
   5488         return 0;
   5489     }
   5490     aSign = extractFloat128Sign( a );
   5491     bSign = extractFloat128Sign( b );
   5492     if ( aSign != bSign ) {
   5493         return
   5494                aSign
   5495             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5496                  == 0 );
   5497     }
   5498     return
   5499           aSign ? le128( b.high, b.low, a.high, a.low )
   5500         : le128( a.high, a.low, b.high, b.low );
   5501 
   5502 }
   5503 
   5504 /*
   5505 -------------------------------------------------------------------------------
   5506 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5507 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   5508 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   5509 Standard for Binary Floating-Point Arithmetic.
   5510 -------------------------------------------------------------------------------
   5511 */
   5512 flag float128_lt_quiet( float128 a, float128 b )
   5513 {
   5514     flag aSign, bSign;
   5515 
   5516     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5517               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5518          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5519               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5520        ) {
   5521         if (    float128_is_signaling_nan( a )
   5522              || float128_is_signaling_nan( b ) ) {
   5523             float_raise( float_flag_invalid );
   5524         }
   5525         return 0;
   5526     }
   5527     aSign = extractFloat128Sign( a );
   5528     bSign = extractFloat128Sign( b );
   5529     if ( aSign != bSign ) {
   5530         return
   5531                aSign
   5532             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5533                  != 0 );
   5534     }
   5535     return
   5536           aSign ? lt128( b.high, b.low, a.high, a.low )
   5537         : lt128( a.high, a.low, b.high, b.low );
   5538 
   5539 }
   5540 
   5541 #endif
   5542 
   5543 
   5544 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
   5545 
   5546 /*
   5547  * These two routines are not part of the original softfloat distribution.
   5548  *
   5549  * They are based on the corresponding conversions to integer but return
   5550  * unsigned numbers instead since these functions are required by GCC.
   5551  *
   5552  * Added by Mark Brinicombe <mark (at) NetBSD.org>	27/09/97
   5553  *
   5554  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
   5555  */
   5556 
   5557 /*
   5558 -------------------------------------------------------------------------------
   5559 Returns the result of converting the double-precision floating-point value
   5560 `a' to the 32-bit unsigned integer format.  The conversion is
   5561 performed according to the IEC/IEEE Standard for Binary Floating-point
   5562 Arithmetic, except that the conversion is always rounded toward zero.  If
   5563 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5564 overflows, the largest integer positive is returned.
   5565 -------------------------------------------------------------------------------
   5566 */
   5567 uint32 float64_to_uint32_round_to_zero( float64 a )
   5568 {
   5569     flag aSign;
   5570     int16 aExp, shiftCount;
   5571     bits64 aSig, savedASig;
   5572     uint32 z;
   5573 
   5574     aSig = extractFloat64Frac( a );
   5575     aExp = extractFloat64Exp( a );
   5576     aSign = extractFloat64Sign( a );
   5577 
   5578     if (aSign) {
   5579         float_raise( float_flag_invalid );
   5580     	return(0);
   5581     }
   5582 
   5583     if ( 0x41E < aExp ) {
   5584         float_raise( float_flag_invalid );
   5585         return 0xffffffff;
   5586     }
   5587     else if ( aExp < 0x3FF ) {
   5588         if ( aExp || aSig ) set_float_exception_inexact_flag();
   5589         return 0;
   5590     }
   5591     aSig |= LIT64( 0x0010000000000000 );
   5592     shiftCount = 0x433 - aExp;
   5593     savedASig = aSig;
   5594     aSig >>= shiftCount;
   5595     z = (uint32)aSig;
   5596     if ( ( aSig<<shiftCount ) != savedASig ) {
   5597         set_float_exception_inexact_flag();
   5598     }
   5599     return z;
   5600 
   5601 }
   5602 
   5603 /*
   5604 -------------------------------------------------------------------------------
   5605 Returns the result of converting the single-precision floating-point value
   5606 `a' to the 32-bit unsigned integer format.  The conversion is
   5607 performed according to the IEC/IEEE Standard for Binary Floating-point
   5608 Arithmetic, except that the conversion is always rounded toward zero.  If
   5609 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5610 overflows, the largest positive integer is returned.
   5611 -------------------------------------------------------------------------------
   5612 */
   5613 uint32 float32_to_uint32_round_to_zero( float32 a )
   5614 {
   5615     flag aSign;
   5616     int16 aExp, shiftCount;
   5617     bits32 aSig;
   5618     uint32 z;
   5619 
   5620     aSig = extractFloat32Frac( a );
   5621     aExp = extractFloat32Exp( a );
   5622     aSign = extractFloat32Sign( a );
   5623     shiftCount = aExp - 0x9E;
   5624 
   5625     if (aSign) {
   5626         float_raise( float_flag_invalid );
   5627     	return(0);
   5628     }
   5629     if ( 0 < shiftCount ) {
   5630         float_raise( float_flag_invalid );
   5631         return 0xFFFFFFFF;
   5632     }
   5633     else if ( aExp <= 0x7E ) {
   5634         if ( aExp | aSig ) set_float_exception_inexact_flag();
   5635         return 0;
   5636     }
   5637     aSig = ( aSig | 0x800000 )<<8;
   5638     z = aSig>>( - shiftCount );
   5639     if ( aSig<<( shiftCount & 31 ) ) {
   5640         set_float_exception_inexact_flag();
   5641     }
   5642     return z;
   5643 
   5644 }
   5645 
   5646 #endif
   5647