Home | History | Annotate | Line # | Download | only in bits64
softfloat.c revision 1.16
      1 /* $NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $ */
      2 
      3 /*
      4  * This version hacked for use with gcc -msoft-float by bjh21.
      5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  *  itself).
      7  */
      8 
      9 /*
     10  * Things you may want to define:
     11  *
     12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  *   properly renamed.
     15  */
     16 
     17 /*
     18 ===============================================================================
     19 
     20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21 Arithmetic Package, Release 2a.
     22 
     23 Written by John R. Hauser.  This work was made possible in part by the
     24 International Computer Science Institute, located at Suite 600, 1947 Center
     25 Street, Berkeley, California 94704.  Funding was partially provided by the
     26 National Science Foundation under grant MIP-9311980.  The original version
     27 of this code was written as part of a project to build a fixed-point vector
     28 processor in collaboration with the University of California at Berkeley,
     29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31 arithmetic/SoftFloat.html'.
     32 
     33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38 
     39 Derivative works are acceptable, even for commercial purposes, so long as
     40 (1) they include prominent notice that the work is derivative, and (2) they
     41 include prominent notice akin to these four paragraphs for those parts of
     42 this code that are retained.
     43 
     44 ===============================================================================
     45 */
     46 
     47 #include <sys/cdefs.h>
     48 #if defined(LIBC_SCCS) && !defined(lint)
     49 __RCSID("$NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $");
     50 #endif /* LIBC_SCCS and not lint */
     51 
     52 #ifdef SOFTFLOAT_FOR_GCC
     53 #include "softfloat-for-gcc.h"
     54 #endif
     55 
     56 #include "milieu.h"
     57 #include "softfloat.h"
     58 
     59 /*
     60  * Conversions between floats as stored in memory and floats as
     61  * SoftFloat uses them
     62  */
     63 #ifndef FLOAT64_DEMANGLE
     64 #define FLOAT64_DEMANGLE(a)	(a)
     65 #endif
     66 #ifndef FLOAT64_MANGLE
     67 #define FLOAT64_MANGLE(a)	(a)
     68 #endif
     69 
     70 #ifndef X80SHIFT
     71 #define X80SHIFT		0
     72 #endif
     73 /*
     74 -------------------------------------------------------------------------------
     75 Floating-point rounding mode, extended double-precision rounding precision,
     76 and exception flags.
     77 -------------------------------------------------------------------------------
     78 */
     79 #ifndef set_float_rounding_mode
     80 fp_rnd float_rounding_mode = float_round_nearest_even;
     81 fp_except float_exception_flags = 0;
     82 #endif
     83 #ifndef set_float_exception_inexact_flag
     84 #define	set_float_exception_inexact_flag() \
     85 	((void)(float_exception_flags |= float_flag_inexact))
     86 #endif
     87 #ifdef FLOATX80
     88 int8 floatx80_rounding_precision = 80;
     89 #endif
     90 
     91 /*
     92 -------------------------------------------------------------------------------
     93 Primitive arithmetic functions, including multi-word arithmetic, and
     94 division and square root approximations.  (Can be specialized to target if
     95 desired.)
     96 -------------------------------------------------------------------------------
     97 */
     98 #include "softfloat-macros"
     99 
    100 /*
    101 -------------------------------------------------------------------------------
    102 Functions and definitions to determine:  (1) whether tininess for underflow
    103 is detected before or after rounding by default, (2) what (if anything)
    104 happens when exceptions are raised, (3) how signaling NaNs are distinguished
    105 from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    106 are propagated from function inputs to output.  These details are target-
    107 specific.
    108 -------------------------------------------------------------------------------
    109 */
    110 #include "softfloat-specialize"
    111 
    112 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    113 /*
    114 -------------------------------------------------------------------------------
    115 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
    116 and 7, and returns the properly rounded 32-bit integer corresponding to the
    117 input.  If `zSign' is 1, the input is negated before being converted to an
    118 integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
    119 is simply rounded to an integer, with the inexact exception raised if the
    120 input cannot be represented exactly as an integer.  However, if the fixed-
    121 point input is too large, the invalid exception is raised and the largest
    122 positive or negative integer is returned.
    123 -------------------------------------------------------------------------------
    124 */
    125 static int32 roundAndPackInt32( flag zSign, bits64 absZ )
    126 {
    127     int8 roundingMode;
    128     flag roundNearestEven;
    129     int8 roundIncrement, roundBits;
    130     int32 z;
    131 
    132     roundingMode = float_rounding_mode;
    133     roundNearestEven = ( roundingMode == float_round_nearest_even );
    134     roundIncrement = 0x40;
    135     if ( ! roundNearestEven ) {
    136         if ( roundingMode == float_round_to_zero ) {
    137             roundIncrement = 0;
    138         }
    139         else {
    140             roundIncrement = 0x7F;
    141             if ( zSign ) {
    142                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    143             }
    144             else {
    145                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    146             }
    147         }
    148     }
    149     roundBits = (int8)(absZ & 0x7F);
    150     absZ = ( absZ + roundIncrement )>>7;
    151     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    152     z = (int32)absZ;
    153     if ( zSign ) z = - z;
    154     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
    155         float_raise( float_flag_invalid );
    156         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
    157     }
    158     if ( roundBits ) set_float_exception_inexact_flag();
    159     return z;
    160 
    161 }
    162 
    163 /*
    164 -------------------------------------------------------------------------------
    165 Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
    166 `absZ1', with binary point between bits 63 and 64 (between the input words),
    167 and returns the properly rounded 64-bit integer corresponding to the input.
    168 If `zSign' is 1, the input is negated before being converted to an integer.
    169 Ordinarily, the fixed-point input is simply rounded to an integer, with
    170 the inexact exception raised if the input cannot be represented exactly as
    171 an integer.  However, if the fixed-point input is too large, the invalid
    172 exception is raised and the largest positive or negative integer is
    173 returned.
    174 -------------------------------------------------------------------------------
    175 */
    176 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
    177 {
    178     int8 roundingMode;
    179     flag roundNearestEven, increment;
    180     int64 z;
    181 
    182     roundingMode = float_rounding_mode;
    183     roundNearestEven = ( roundingMode == float_round_nearest_even );
    184     increment = ( (sbits64) absZ1 < 0 );
    185     if ( ! roundNearestEven ) {
    186         if ( roundingMode == float_round_to_zero ) {
    187             increment = 0;
    188         }
    189         else {
    190             if ( zSign ) {
    191                 increment = ( roundingMode == float_round_down ) && absZ1;
    192             }
    193             else {
    194                 increment = ( roundingMode == float_round_up ) && absZ1;
    195             }
    196         }
    197     }
    198     if ( increment ) {
    199         ++absZ0;
    200         if ( absZ0 == 0 ) goto overflow;
    201         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
    202     }
    203     z = absZ0;
    204     if ( zSign ) z = - z;
    205     if ( z && ( ( z < 0 ) ^ zSign ) ) {
    206  overflow:
    207         float_raise( float_flag_invalid );
    208         return
    209               zSign ? (sbits64) LIT64( 0x8000000000000000 )
    210             : LIT64( 0x7FFFFFFFFFFFFFFF );
    211     }
    212     if ( absZ1 ) set_float_exception_inexact_flag();
    213     return z;
    214 
    215 }
    216 #endif
    217 
    218 /*
    219 -------------------------------------------------------------------------------
    220 Returns the fraction bits of the single-precision floating-point value `a'.
    221 -------------------------------------------------------------------------------
    222 */
    223 INLINE bits32 extractFloat32Frac( float32 a )
    224 {
    225 
    226     return a & 0x007FFFFF;
    227 
    228 }
    229 
    230 /*
    231 -------------------------------------------------------------------------------
    232 Returns the exponent bits of the single-precision floating-point value `a'.
    233 -------------------------------------------------------------------------------
    234 */
    235 INLINE int16 extractFloat32Exp( float32 a )
    236 {
    237 
    238     return ( a>>23 ) & 0xFF;
    239 
    240 }
    241 
    242 /*
    243 -------------------------------------------------------------------------------
    244 Returns the sign bit of the single-precision floating-point value `a'.
    245 -------------------------------------------------------------------------------
    246 */
    247 INLINE flag extractFloat32Sign( float32 a )
    248 {
    249 
    250     return a>>31;
    251 
    252 }
    253 
    254 /*
    255 -------------------------------------------------------------------------------
    256 Normalizes the subnormal single-precision floating-point value represented
    257 by the denormalized significand `aSig'.  The normalized exponent and
    258 significand are stored at the locations pointed to by `zExpPtr' and
    259 `zSigPtr', respectively.
    260 -------------------------------------------------------------------------------
    261 */
    262 static void
    263  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    264 {
    265     int8 shiftCount;
    266 
    267     shiftCount = countLeadingZeros32( aSig ) - 8;
    268     *zSigPtr = aSig<<shiftCount;
    269     *zExpPtr = 1 - shiftCount;
    270 
    271 }
    272 
    273 /*
    274 -------------------------------------------------------------------------------
    275 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    276 single-precision floating-point value, returning the result.  After being
    277 shifted into the proper positions, the three fields are simply added
    278 together to form the result.  This means that any integer portion of `zSig'
    279 will be added into the exponent.  Since a properly normalized significand
    280 will have an integer portion equal to 1, the `zExp' input should be 1 less
    281 than the desired result exponent whenever `zSig' is a complete, normalized
    282 significand.
    283 -------------------------------------------------------------------------------
    284 */
    285 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    286 {
    287 
    288     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    289 
    290 }
    291 
    292 /*
    293 -------------------------------------------------------------------------------
    294 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    295 and significand `zSig', and returns the proper single-precision floating-
    296 point value corresponding to the abstract input.  Ordinarily, the abstract
    297 value is simply rounded and packed into the single-precision format, with
    298 the inexact exception raised if the abstract input cannot be represented
    299 exactly.  However, if the abstract value is too large, the overflow and
    300 inexact exceptions are raised and an infinity or maximal finite value is
    301 returned.  If the abstract value is too small, the input value is rounded to
    302 a subnormal number, and the underflow and inexact exceptions are raised if
    303 the abstract input cannot be represented exactly as a subnormal single-
    304 precision floating-point number.
    305     The input significand `zSig' has its binary point between bits 30
    306 and 29, which is 7 bits to the left of the usual location.  This shifted
    307 significand must be normalized or smaller.  If `zSig' is not normalized,
    308 `zExp' must be 0; in that case, the result returned is a subnormal number,
    309 and it must not require rounding.  In the usual case that `zSig' is
    310 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    311 The handling of underflow and overflow follows the IEC/IEEE Standard for
    312 Binary Floating-Point Arithmetic.
    313 -------------------------------------------------------------------------------
    314 */
    315 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    316 {
    317     int8 roundingMode;
    318     flag roundNearestEven;
    319     int8 roundIncrement, roundBits;
    320     flag isTiny;
    321 
    322     roundingMode = float_rounding_mode;
    323     roundNearestEven = ( roundingMode == float_round_nearest_even );
    324     roundIncrement = 0x40;
    325     if ( ! roundNearestEven ) {
    326         if ( roundingMode == float_round_to_zero ) {
    327             roundIncrement = 0;
    328         }
    329         else {
    330             roundIncrement = 0x7F;
    331             if ( zSign ) {
    332                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    333             }
    334             else {
    335                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    336             }
    337         }
    338     }
    339     roundBits = zSig & 0x7F;
    340     if ( 0xFD <= (bits16) zExp ) {
    341         if (    ( 0xFD < zExp )
    342              || (    ( zExp == 0xFD )
    343                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    344            ) {
    345             float_raise( float_flag_overflow | float_flag_inexact );
    346             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    347         }
    348         if ( zExp < 0 ) {
    349             isTiny =
    350                    ( float_detect_tininess == float_tininess_before_rounding )
    351                 || ( zExp < -1 )
    352                 || ( zSig + roundIncrement < 0x80000000U );
    353             shift32RightJamming( zSig, - zExp, &zSig );
    354             zExp = 0;
    355             roundBits = zSig & 0x7F;
    356             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    357         }
    358     }
    359     if ( roundBits ) set_float_exception_inexact_flag();
    360     zSig = ( zSig + roundIncrement )>>7;
    361     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    362     if ( zSig == 0 ) zExp = 0;
    363     return packFloat32( zSign, zExp, zSig );
    364 
    365 }
    366 
    367 /*
    368 -------------------------------------------------------------------------------
    369 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    370 and significand `zSig', and returns the proper single-precision floating-
    371 point value corresponding to the abstract input.  This routine is just like
    372 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    373 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    374 floating-point exponent.
    375 -------------------------------------------------------------------------------
    376 */
    377 static float32
    378  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    379 {
    380     int8 shiftCount;
    381 
    382     shiftCount = countLeadingZeros32( zSig ) - 1;
    383     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    384 
    385 }
    386 
    387 /*
    388 -------------------------------------------------------------------------------
    389 Returns the fraction bits of the double-precision floating-point value `a'.
    390 -------------------------------------------------------------------------------
    391 */
    392 INLINE bits64 extractFloat64Frac( float64 a )
    393 {
    394 
    395     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
    396 
    397 }
    398 
    399 /*
    400 -------------------------------------------------------------------------------
    401 Returns the exponent bits of the double-precision floating-point value `a'.
    402 -------------------------------------------------------------------------------
    403 */
    404 INLINE int16 extractFloat64Exp( float64 a )
    405 {
    406 
    407     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    408 
    409 }
    410 
    411 /*
    412 -------------------------------------------------------------------------------
    413 Returns the sign bit of the double-precision floating-point value `a'.
    414 -------------------------------------------------------------------------------
    415 */
    416 INLINE flag extractFloat64Sign( float64 a )
    417 {
    418 
    419     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    420 
    421 }
    422 
    423 /*
    424 -------------------------------------------------------------------------------
    425 Normalizes the subnormal double-precision floating-point value represented
    426 by the denormalized significand `aSig'.  The normalized exponent and
    427 significand are stored at the locations pointed to by `zExpPtr' and
    428 `zSigPtr', respectively.
    429 -------------------------------------------------------------------------------
    430 */
    431 static void
    432  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
    433 {
    434     int8 shiftCount;
    435 
    436     shiftCount = countLeadingZeros64( aSig ) - 11;
    437     *zSigPtr = aSig<<shiftCount;
    438     *zExpPtr = 1 - shiftCount;
    439 
    440 }
    441 
    442 /*
    443 -------------------------------------------------------------------------------
    444 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    445 double-precision floating-point value, returning the result.  After being
    446 shifted into the proper positions, the three fields are simply added
    447 together to form the result.  This means that any integer portion of `zSig'
    448 will be added into the exponent.  Since a properly normalized significand
    449 will have an integer portion equal to 1, the `zExp' input should be 1 less
    450 than the desired result exponent whenever `zSig' is a complete, normalized
    451 significand.
    452 -------------------------------------------------------------------------------
    453 */
    454 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
    455 {
    456 
    457     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    458 			   ( ( (bits64) zExp )<<52 ) + zSig );
    459 
    460 }
    461 
    462 /*
    463 -------------------------------------------------------------------------------
    464 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    465 and significand `zSig', and returns the proper double-precision floating-
    466 point value corresponding to the abstract input.  Ordinarily, the abstract
    467 value is simply rounded and packed into the double-precision format, with
    468 the inexact exception raised if the abstract input cannot be represented
    469 exactly.  However, if the abstract value is too large, the overflow and
    470 inexact exceptions are raised and an infinity or maximal finite value is
    471 returned.  If the abstract value is too small, the input value is rounded to
    472 a subnormal number, and the underflow and inexact exceptions are raised if
    473 the abstract input cannot be represented exactly as a subnormal double-
    474 precision floating-point number.
    475     The input significand `zSig' has its binary point between bits 62
    476 and 61, which is 10 bits to the left of the usual location.  This shifted
    477 significand must be normalized or smaller.  If `zSig' is not normalized,
    478 `zExp' must be 0; in that case, the result returned is a subnormal number,
    479 and it must not require rounding.  In the usual case that `zSig' is
    480 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    481 The handling of underflow and overflow follows the IEC/IEEE Standard for
    482 Binary Floating-Point Arithmetic.
    483 -------------------------------------------------------------------------------
    484 */
    485 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    486 {
    487     int8 roundingMode;
    488     flag roundNearestEven;
    489     int16 roundIncrement, roundBits;
    490     flag isTiny;
    491 
    492     roundingMode = float_rounding_mode;
    493     roundNearestEven = ( roundingMode == float_round_nearest_even );
    494     roundIncrement = 0x200;
    495     if ( ! roundNearestEven ) {
    496         if ( roundingMode == float_round_to_zero ) {
    497             roundIncrement = 0;
    498         }
    499         else {
    500             roundIncrement = 0x3FF;
    501             if ( zSign ) {
    502                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    503             }
    504             else {
    505                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    506             }
    507         }
    508     }
    509     roundBits = (int16)(zSig & 0x3FF);
    510     if ( 0x7FD <= (bits16) zExp ) {
    511         if (    ( 0x7FD < zExp )
    512              || (    ( zExp == 0x7FD )
    513                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
    514            ) {
    515             float_raise( float_flag_overflow | float_flag_inexact );
    516             return FLOAT64_MANGLE(
    517 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
    518 		( roundIncrement == 0 ));
    519         }
    520         if ( zExp < 0 ) {
    521             isTiny =
    522                    ( float_detect_tininess == float_tininess_before_rounding )
    523                 || ( zExp < -1 )
    524                 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
    525             shift64RightJamming( zSig, - zExp, &zSig );
    526             zExp = 0;
    527             roundBits = (int16)(zSig & 0x3FF);
    528             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    529         }
    530     }
    531     if ( roundBits ) set_float_exception_inexact_flag();
    532     zSig = ( zSig + roundIncrement )>>10;
    533     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
    534     if ( zSig == 0 ) zExp = 0;
    535     return packFloat64( zSign, zExp, zSig );
    536 
    537 }
    538 
    539 /*
    540 -------------------------------------------------------------------------------
    541 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    542 and significand `zSig', and returns the proper double-precision floating-
    543 point value corresponding to the abstract input.  This routine is just like
    544 `roundAndPackFloat64' except that `zSig' does not have to be normalized.
    545 Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    546 floating-point exponent.
    547 -------------------------------------------------------------------------------
    548 */
    549 static float64
    550  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    551 {
    552     int8 shiftCount;
    553 
    554     shiftCount = countLeadingZeros64( zSig ) - 1;
    555     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
    556 
    557 }
    558 
    559 #ifdef FLOATX80
    560 
    561 /*
    562 -------------------------------------------------------------------------------
    563 Returns the fraction bits of the extended double-precision floating-point
    564 value `a'.
    565 -------------------------------------------------------------------------------
    566 */
    567 INLINE bits64 extractFloatx80Frac( floatx80 a )
    568 {
    569 
    570     return a.low;
    571 
    572 }
    573 
    574 /*
    575 -------------------------------------------------------------------------------
    576 Returns the exponent bits of the extended double-precision floating-point
    577 value `a'.
    578 -------------------------------------------------------------------------------
    579 */
    580 INLINE int32 extractFloatx80Exp( floatx80 a )
    581 {
    582 
    583     return (a.high>>X80SHIFT) & 0x7FFF;
    584 
    585 }
    586 
    587 /*
    588 -------------------------------------------------------------------------------
    589 Returns the sign bit of the extended double-precision floating-point value
    590 `a'.
    591 -------------------------------------------------------------------------------
    592 */
    593 INLINE flag extractFloatx80Sign( floatx80 a )
    594 {
    595 
    596     return a.high>>(15 + X80SHIFT);
    597 
    598 }
    599 
    600 /*
    601 -------------------------------------------------------------------------------
    602 Normalizes the subnormal extended double-precision floating-point value
    603 represented by the denormalized significand `aSig'.  The normalized exponent
    604 and significand are stored at the locations pointed to by `zExpPtr' and
    605 `zSigPtr', respectively.
    606 -------------------------------------------------------------------------------
    607 */
    608 static void
    609  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
    610 {
    611     int8 shiftCount;
    612 
    613     shiftCount = countLeadingZeros64( aSig );
    614     *zSigPtr = aSig<<shiftCount;
    615     *zExpPtr = 1 - shiftCount;
    616 
    617 }
    618 
    619 /*
    620 -------------------------------------------------------------------------------
    621 Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
    622 extended double-precision floating-point value, returning the result.
    623 -------------------------------------------------------------------------------
    624 */
    625 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
    626 {
    627     floatx80 z;
    628 
    629     z.low = (bits64)zSig;
    630     z.high = (bits32)( ( ( (bits32) zSign )<<15 ) + zExp )<<X80SHIFT;
    631     return z;
    632 
    633 }
    634 
    635 /*
    636 -------------------------------------------------------------------------------
    637 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    638 and extended significand formed by the concatenation of `zSig0' and `zSig1',
    639 and returns the proper extended double-precision floating-point value
    640 corresponding to the abstract input.  Ordinarily, the abstract value is
    641 rounded and packed into the extended double-precision format, with the
    642 inexact exception raised if the abstract input cannot be represented
    643 exactly.  However, if the abstract value is too large, the overflow and
    644 inexact exceptions are raised and an infinity or maximal finite value is
    645 returned.  If the abstract value is too small, the input value is rounded to
    646 a subnormal number, and the underflow and inexact exceptions are raised if
    647 the abstract input cannot be represented exactly as a subnormal extended
    648 double-precision floating-point number.
    649     If `roundingPrecision' is 32 or 64, the result is rounded to the same
    650 number of bits as single or double precision, respectively.  Otherwise, the
    651 result is rounded to the full precision of the extended double-precision
    652 format.
    653     The input significand must be normalized or smaller.  If the input
    654 significand is not normalized, `zExp' must be 0; in that case, the result
    655 returned is a subnormal number, and it must not require rounding.  The
    656 handling of underflow and overflow follows the IEC/IEEE Standard for Binary
    657 Floating-Point Arithmetic.
    658 -------------------------------------------------------------------------------
    659 */
    660 static floatx80
    661  roundAndPackFloatx80(
    662      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    663  )
    664 {
    665     int8 roundingMode;
    666     flag roundNearestEven, increment, isTiny;
    667     uint64 roundIncrement, roundMask, roundBits;
    668 
    669     roundingMode = float_rounding_mode;
    670     roundNearestEven = ( roundingMode == float_round_nearest_even );
    671     if ( roundingPrecision == 80 ) goto precision80;
    672     if ( roundingPrecision == 64 ) {
    673         roundIncrement = LIT64( 0x0000000000000400 );
    674         roundMask = LIT64( 0x00000000000007FF );
    675     }
    676     else if ( roundingPrecision == 32 ) {
    677         roundIncrement = LIT64( 0x0000008000000000 );
    678         roundMask = LIT64( 0x000000FFFFFFFFFF );
    679     }
    680     else {
    681         goto precision80;
    682     }
    683     zSig0 |= ( zSig1 != 0 );
    684     if ( ! roundNearestEven ) {
    685         if ( roundingMode == float_round_to_zero ) {
    686             roundIncrement = 0;
    687         }
    688         else {
    689             roundIncrement = roundMask;
    690             if ( zSign ) {
    691                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    692             }
    693             else {
    694                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    695             }
    696         }
    697     }
    698     roundBits = zSig0 & roundMask;
    699     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    700         if (    ( 0x7FFE < zExp )
    701              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
    702            ) {
    703             goto overflow;
    704         }
    705         if ( zExp <= 0 ) {
    706             isTiny =
    707                    ( float_detect_tininess == float_tininess_before_rounding )
    708                 || ( zExp < 0 )
    709                 || ( zSig0 <= zSig0 + roundIncrement );
    710             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
    711             zExp = 0;
    712             roundBits = zSig0 & roundMask;
    713             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    714             if ( roundBits ) set_float_exception_inexact_flag();
    715             zSig0 += roundIncrement;
    716             if ( (sbits64) zSig0 < 0 ) zExp = 1;
    717             roundIncrement = roundMask + 1;
    718             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    719                 roundMask |= roundIncrement;
    720             }
    721             zSig0 &= ~ roundMask;
    722             return packFloatx80( zSign, zExp, zSig0 );
    723         }
    724     }
    725     if ( roundBits ) set_float_exception_inexact_flag();
    726     zSig0 += roundIncrement;
    727     if ( zSig0 < roundIncrement ) {
    728         ++zExp;
    729         zSig0 = LIT64( 0x8000000000000000 );
    730     }
    731     roundIncrement = roundMask + 1;
    732     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    733         roundMask |= roundIncrement;
    734     }
    735     zSig0 &= ~ roundMask;
    736     if ( zSig0 == 0 ) zExp = 0;
    737     return packFloatx80( zSign, zExp, zSig0 );
    738  precision80:
    739     increment = ( (sbits64) zSig1 < 0 );
    740     if ( ! roundNearestEven ) {
    741         if ( roundingMode == float_round_to_zero ) {
    742             increment = 0;
    743         }
    744         else {
    745             if ( zSign ) {
    746                 increment = ( roundingMode == float_round_down ) && zSig1;
    747             }
    748             else {
    749                 increment = ( roundingMode == float_round_up ) && zSig1;
    750             }
    751         }
    752     }
    753     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    754         if (    ( 0x7FFE < zExp )
    755              || (    ( zExp == 0x7FFE )
    756                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
    757                   && increment
    758                 )
    759            ) {
    760             roundMask = 0;
    761  overflow:
    762             float_raise( float_flag_overflow | float_flag_inexact );
    763             if (    ( roundingMode == float_round_to_zero )
    764                  || ( zSign && ( roundingMode == float_round_up ) )
    765                  || ( ! zSign && ( roundingMode == float_round_down ) )
    766                ) {
    767                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
    768             }
    769             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
    770         }
    771         if ( zExp <= 0 ) {
    772             isTiny =
    773                    ( float_detect_tininess == float_tininess_before_rounding )
    774                 || ( zExp < 0 )
    775                 || ! increment
    776                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
    777             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
    778             zExp = 0;
    779             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
    780             if ( zSig1 ) set_float_exception_inexact_flag();
    781             if ( roundNearestEven ) {
    782                 increment = ( (sbits64) zSig1 < 0 );
    783             }
    784             else {
    785                 if ( zSign ) {
    786                     increment = ( roundingMode == float_round_down ) && zSig1;
    787                 }
    788                 else {
    789                     increment = ( roundingMode == float_round_up ) && zSig1;
    790                 }
    791             }
    792             if ( increment ) {
    793                 ++zSig0;
    794                 zSig0 &=
    795                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    796                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
    797             }
    798             return packFloatx80( zSign, zExp, zSig0 );
    799         }
    800     }
    801     if ( zSig1 ) set_float_exception_inexact_flag();
    802     if ( increment ) {
    803         ++zSig0;
    804         if ( zSig0 == 0 ) {
    805             ++zExp;
    806             zSig0 = LIT64( 0x8000000000000000 );
    807         }
    808         else {
    809             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    810         }
    811     }
    812     else {
    813         if ( zSig0 == 0 ) zExp = 0;
    814     }
    815     return packFloatx80( zSign, zExp, zSig0 );
    816 
    817 }
    818 
    819 /*
    820 -------------------------------------------------------------------------------
    821 Takes an abstract floating-point value having sign `zSign', exponent
    822 `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
    823 and returns the proper extended double-precision floating-point value
    824 corresponding to the abstract input.  This routine is just like
    825 `roundAndPackFloatx80' except that the input significand does not have to be
    826 normalized.
    827 -------------------------------------------------------------------------------
    828 */
    829 static floatx80
    830  normalizeRoundAndPackFloatx80(
    831      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    832  )
    833 {
    834     int8 shiftCount;
    835 
    836     if ( zSig0 == 0 ) {
    837         zSig0 = zSig1;
    838         zSig1 = 0;
    839         zExp -= 64;
    840     }
    841     shiftCount = countLeadingZeros64( zSig0 );
    842     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    843     zExp -= shiftCount;
    844     return
    845         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
    846 
    847 }
    848 
    849 #endif
    850 
    851 #ifdef FLOAT128
    852 
    853 /*
    854 -------------------------------------------------------------------------------
    855 Returns the least-significant 64 fraction bits of the quadruple-precision
    856 floating-point value `a'.
    857 -------------------------------------------------------------------------------
    858 */
    859 INLINE bits64 extractFloat128Frac1( float128 a )
    860 {
    861 
    862     return a.low;
    863 
    864 }
    865 
    866 /*
    867 -------------------------------------------------------------------------------
    868 Returns the most-significant 48 fraction bits of the quadruple-precision
    869 floating-point value `a'.
    870 -------------------------------------------------------------------------------
    871 */
    872 INLINE bits64 extractFloat128Frac0( float128 a )
    873 {
    874 
    875     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
    876 
    877 }
    878 
    879 /*
    880 -------------------------------------------------------------------------------
    881 Returns the exponent bits of the quadruple-precision floating-point value
    882 `a'.
    883 -------------------------------------------------------------------------------
    884 */
    885 INLINE int32 extractFloat128Exp( float128 a )
    886 {
    887 
    888     return (int32)((a.high >> 48) & 0x7FFF);
    889 
    890 }
    891 
    892 /*
    893 -------------------------------------------------------------------------------
    894 Returns the sign bit of the quadruple-precision floating-point value `a'.
    895 -------------------------------------------------------------------------------
    896 */
    897 INLINE flag extractFloat128Sign( float128 a )
    898 {
    899 
    900     return (flag)(a.high >> 63);
    901 
    902 }
    903 
    904 /*
    905 -------------------------------------------------------------------------------
    906 Normalizes the subnormal quadruple-precision floating-point value
    907 represented by the denormalized significand formed by the concatenation of
    908 `aSig0' and `aSig1'.  The normalized exponent is stored at the location
    909 pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
    910 significand are stored at the location pointed to by `zSig0Ptr', and the
    911 least significant 64 bits of the normalized significand are stored at the
    912 location pointed to by `zSig1Ptr'.
    913 -------------------------------------------------------------------------------
    914 */
    915 static void
    916  normalizeFloat128Subnormal(
    917      bits64 aSig0,
    918      bits64 aSig1,
    919      int32 *zExpPtr,
    920      bits64 *zSig0Ptr,
    921      bits64 *zSig1Ptr
    922  )
    923 {
    924     int8 shiftCount;
    925 
    926     if ( aSig0 == 0 ) {
    927         shiftCount = countLeadingZeros64( aSig1 ) - 15;
    928         if ( shiftCount < 0 ) {
    929             *zSig0Ptr = aSig1>>( - shiftCount );
    930             *zSig1Ptr = aSig1<<( shiftCount & 63 );
    931         }
    932         else {
    933             *zSig0Ptr = aSig1<<shiftCount;
    934             *zSig1Ptr = 0;
    935         }
    936         *zExpPtr = - shiftCount - 63;
    937     }
    938     else {
    939         shiftCount = countLeadingZeros64( aSig0 ) - 15;
    940         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    941         *zExpPtr = 1 - shiftCount;
    942     }
    943 
    944 }
    945 
    946 /*
    947 -------------------------------------------------------------------------------
    948 Packs the sign `zSign', the exponent `zExp', and the significand formed
    949 by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
    950 floating-point value, returning the result.  After being shifted into the
    951 proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
    952 added together to form the most significant 32 bits of the result.  This
    953 means that any integer portion of `zSig0' will be added into the exponent.
    954 Since a properly normalized significand will have an integer portion equal
    955 to 1, the `zExp' input should be 1 less than the desired result exponent
    956 whenever `zSig0' and `zSig1' concatenated form a complete, normalized
    957 significand.
    958 -------------------------------------------------------------------------------
    959 */
    960 INLINE float128
    961  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
    962 {
    963     float128 z;
    964 
    965     z.low = zSig1;
    966     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    967     return z;
    968 
    969 }
    970 
    971 /*
    972 -------------------------------------------------------------------------------
    973 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    974 and extended significand formed by the concatenation of `zSig0', `zSig1',
    975 and `zSig2', and returns the proper quadruple-precision floating-point value
    976 corresponding to the abstract input.  Ordinarily, the abstract value is
    977 simply rounded and packed into the quadruple-precision format, with the
    978 inexact exception raised if the abstract input cannot be represented
    979 exactly.  However, if the abstract value is too large, the overflow and
    980 inexact exceptions are raised and an infinity or maximal finite value is
    981 returned.  If the abstract value is too small, the input value is rounded to
    982 a subnormal number, and the underflow and inexact exceptions are raised if
    983 the abstract input cannot be represented exactly as a subnormal quadruple-
    984 precision floating-point number.
    985     The input significand must be normalized or smaller.  If the input
    986 significand is not normalized, `zExp' must be 0; in that case, the result
    987 returned is a subnormal number, and it must not require rounding.  In the
    988 usual case that the input significand is normalized, `zExp' must be 1 less
    989 than the ``true'' floating-point exponent.  The handling of underflow and
    990 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    991 -------------------------------------------------------------------------------
    992 */
    993 static float128
    994  roundAndPackFloat128(
    995      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
    996 {
    997     int8 roundingMode;
    998     flag roundNearestEven, increment, isTiny;
    999 
   1000     roundingMode = float_rounding_mode;
   1001     roundNearestEven = ( roundingMode == float_round_nearest_even );
   1002     increment = ( (sbits64) zSig2 < 0 );
   1003     if ( ! roundNearestEven ) {
   1004         if ( roundingMode == float_round_to_zero ) {
   1005             increment = 0;
   1006         }
   1007         else {
   1008             if ( zSign ) {
   1009                 increment = ( roundingMode == float_round_down ) && zSig2;
   1010             }
   1011             else {
   1012                 increment = ( roundingMode == float_round_up ) && zSig2;
   1013             }
   1014         }
   1015     }
   1016     if ( 0x7FFD <= (bits32) zExp ) {
   1017         if (    ( 0x7FFD < zExp )
   1018              || (    ( zExp == 0x7FFD )
   1019                   && eq128(
   1020                          LIT64( 0x0001FFFFFFFFFFFF ),
   1021                          LIT64( 0xFFFFFFFFFFFFFFFF ),
   1022                          zSig0,
   1023                          zSig1
   1024                      )
   1025                   && increment
   1026                 )
   1027            ) {
   1028             float_raise( float_flag_overflow | float_flag_inexact );
   1029             if (    ( roundingMode == float_round_to_zero )
   1030                  || ( zSign && ( roundingMode == float_round_up ) )
   1031                  || ( ! zSign && ( roundingMode == float_round_down ) )
   1032                ) {
   1033                 return
   1034                     packFloat128(
   1035                         zSign,
   1036                         0x7FFE,
   1037                         LIT64( 0x0000FFFFFFFFFFFF ),
   1038                         LIT64( 0xFFFFFFFFFFFFFFFF )
   1039                     );
   1040             }
   1041             return packFloat128( zSign, 0x7FFF, 0, 0 );
   1042         }
   1043         if ( zExp < 0 ) {
   1044             isTiny =
   1045                    ( float_detect_tininess == float_tininess_before_rounding )
   1046                 || ( zExp < -1 )
   1047                 || ! increment
   1048                 || lt128(
   1049                        zSig0,
   1050                        zSig1,
   1051                        LIT64( 0x0001FFFFFFFFFFFF ),
   1052                        LIT64( 0xFFFFFFFFFFFFFFFF )
   1053                    );
   1054             shift128ExtraRightJamming(
   1055                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
   1056             zExp = 0;
   1057             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
   1058             if ( roundNearestEven ) {
   1059                 increment = ( (sbits64) zSig2 < 0 );
   1060             }
   1061             else {
   1062                 if ( zSign ) {
   1063                     increment = ( roundingMode == float_round_down ) && zSig2;
   1064                 }
   1065                 else {
   1066                     increment = ( roundingMode == float_round_up ) && zSig2;
   1067                 }
   1068             }
   1069         }
   1070     }
   1071     if ( zSig2 ) set_float_exception_inexact_flag();
   1072     if ( increment ) {
   1073         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
   1074         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
   1075     }
   1076     else {
   1077         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
   1078     }
   1079     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1080 
   1081 }
   1082 
   1083 /*
   1084 -------------------------------------------------------------------------------
   1085 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   1086 and significand formed by the concatenation of `zSig0' and `zSig1', and
   1087 returns the proper quadruple-precision floating-point value corresponding
   1088 to the abstract input.  This routine is just like `roundAndPackFloat128'
   1089 except that the input significand has fewer bits and does not have to be
   1090 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
   1091 point exponent.
   1092 -------------------------------------------------------------------------------
   1093 */
   1094 static float128
   1095  normalizeRoundAndPackFloat128(
   1096      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
   1097 {
   1098     int8 shiftCount;
   1099     bits64 zSig2;
   1100 
   1101     if ( zSig0 == 0 ) {
   1102         zSig0 = zSig1;
   1103         zSig1 = 0;
   1104         zExp -= 64;
   1105     }
   1106     shiftCount = countLeadingZeros64( zSig0 ) - 15;
   1107     if ( 0 <= shiftCount ) {
   1108         zSig2 = 0;
   1109         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1110     }
   1111     else {
   1112         shift128ExtraRightJamming(
   1113             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
   1114     }
   1115     zExp -= shiftCount;
   1116     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   1117 
   1118 }
   1119 
   1120 #endif
   1121 
   1122 /*
   1123 -------------------------------------------------------------------------------
   1124 Returns the result of converting the 32-bit two's complement integer `a'
   1125 to the single-precision floating-point format.  The conversion is performed
   1126 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1127 -------------------------------------------------------------------------------
   1128 */
   1129 float32 int32_to_float32( int32 a )
   1130 {
   1131     flag zSign;
   1132 
   1133     if ( a == 0 ) return 0;
   1134     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
   1135     zSign = ( a < 0 );
   1136     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
   1137 
   1138 }
   1139 
   1140 float32 uint32_to_float32( uint32 a )
   1141 {
   1142     if ( a == 0 ) return 0;
   1143     if ( a & (bits32) 0x80000000 )
   1144 	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
   1145     return normalizeRoundAndPackFloat32( 0, 0x9C, a );
   1146 }
   1147 
   1148 
   1149 /*
   1150 -------------------------------------------------------------------------------
   1151 Returns the result of converting the 32-bit two's complement integer `a'
   1152 to the double-precision floating-point format.  The conversion is performed
   1153 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1154 -------------------------------------------------------------------------------
   1155 */
   1156 float64 int32_to_float64( int32 a )
   1157 {
   1158     flag zSign;
   1159     uint32 absA;
   1160     int8 shiftCount;
   1161     bits64 zSig;
   1162 
   1163     if ( a == 0 ) return 0;
   1164     zSign = ( a < 0 );
   1165     absA = zSign ? - a : a;
   1166     shiftCount = countLeadingZeros32( absA ) + 21;
   1167     zSig = absA;
   1168     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
   1169 
   1170 }
   1171 
   1172 float64 uint32_to_float64( uint32 a )
   1173 {
   1174     int8 shiftCount;
   1175     bits64 zSig = a;
   1176 
   1177     if ( a == 0 ) return 0;
   1178     shiftCount = countLeadingZeros32( a ) + 21;
   1179     return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
   1180 
   1181 }
   1182 
   1183 #ifdef FLOATX80
   1184 
   1185 /*
   1186 -------------------------------------------------------------------------------
   1187 Returns the result of converting the 32-bit two's complement integer `a'
   1188 to the extended double-precision floating-point format.  The conversion
   1189 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1190 Arithmetic.
   1191 -------------------------------------------------------------------------------
   1192 */
   1193 floatx80 int32_to_floatx80( int32 a )
   1194 {
   1195     flag zSign;
   1196     uint32 absA;
   1197     int8 shiftCount;
   1198     bits64 zSig;
   1199 
   1200     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1201     zSign = ( a < 0 );
   1202     absA = zSign ? - a : a;
   1203     shiftCount = countLeadingZeros32( absA ) + 32;
   1204     zSig = absA;
   1205     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
   1206 
   1207 }
   1208 
   1209 floatx80 uint32_to_floatx80( uint32 a )
   1210 {
   1211     int8 shiftCount;
   1212     bits64 zSig = a;
   1213 
   1214     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1215     shiftCount = countLeadingZeros32( a ) + 32;
   1216     return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
   1217 
   1218 }
   1219 
   1220 #endif
   1221 
   1222 #ifdef FLOAT128
   1223 
   1224 /*
   1225 -------------------------------------------------------------------------------
   1226 Returns the result of converting the 32-bit two's complement integer `a' to
   1227 the quadruple-precision floating-point format.  The conversion is performed
   1228 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1229 -------------------------------------------------------------------------------
   1230 */
   1231 float128 int32_to_float128( int32 a )
   1232 {
   1233     flag zSign;
   1234     uint32 absA;
   1235     int8 shiftCount;
   1236     bits64 zSig0;
   1237 
   1238     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1239     zSign = ( a < 0 );
   1240     absA = zSign ? - a : a;
   1241     shiftCount = countLeadingZeros32( absA ) + 17;
   1242     zSig0 = absA;
   1243     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1244 
   1245 }
   1246 
   1247 float128 uint32_to_float128( uint32 a )
   1248 {
   1249     int8 shiftCount;
   1250     bits64 zSig0 = a;
   1251 
   1252     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1253     shiftCount = countLeadingZeros32( a ) + 17;
   1254     return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1255 
   1256 }
   1257 
   1258 #endif
   1259 
   1260 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
   1261 /*
   1262 -------------------------------------------------------------------------------
   1263 Returns the result of converting the 64-bit two's complement integer `a'
   1264 to the single-precision floating-point format.  The conversion is performed
   1265 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1266 -------------------------------------------------------------------------------
   1267 */
   1268 float32 int64_to_float32( int64 a )
   1269 {
   1270     flag zSign;
   1271     uint64 absA;
   1272     int8 shiftCount;
   1273 
   1274     if ( a == 0 ) return 0;
   1275     zSign = ( a < 0 );
   1276     absA = zSign ? - a : a;
   1277     shiftCount = countLeadingZeros64( absA ) - 40;
   1278     if ( 0 <= shiftCount ) {
   1279         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
   1280     }
   1281     else {
   1282         shiftCount += 7;
   1283         if ( shiftCount < 0 ) {
   1284             shift64RightJamming( absA, - shiftCount, &absA );
   1285         }
   1286         else {
   1287             absA <<= shiftCount;
   1288         }
   1289         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
   1290     }
   1291 
   1292 }
   1293 
   1294 /*
   1295 -------------------------------------------------------------------------------
   1296 Returns the result of converting the 64-bit two's complement integer `a'
   1297 to the double-precision floating-point format.  The conversion is performed
   1298 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1299 -------------------------------------------------------------------------------
   1300 */
   1301 float64 int64_to_float64( int64 a )
   1302 {
   1303     flag zSign;
   1304 
   1305     if ( a == 0 ) return 0;
   1306     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
   1307         return packFloat64( 1, 0x43E, 0 );
   1308     }
   1309     zSign = ( a < 0 );
   1310     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
   1311 
   1312 }
   1313 
   1314 #ifdef FLOATX80
   1315 
   1316 /*
   1317 -------------------------------------------------------------------------------
   1318 Returns the result of converting the 64-bit two's complement integer `a'
   1319 to the extended double-precision floating-point format.  The conversion
   1320 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1321 Arithmetic.
   1322 -------------------------------------------------------------------------------
   1323 */
   1324 floatx80 int64_to_floatx80( int64 a )
   1325 {
   1326     flag zSign;
   1327     uint64 absA;
   1328     int8 shiftCount;
   1329 
   1330     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1331     zSign = ( a < 0 );
   1332     absA = zSign ? - a : a;
   1333     shiftCount = countLeadingZeros64( absA );
   1334     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
   1335 
   1336 }
   1337 
   1338 #endif
   1339 
   1340 #endif /* !SOFTFLOAT_FOR_GCC */
   1341 
   1342 #ifdef FLOAT128
   1343 
   1344 /*
   1345 -------------------------------------------------------------------------------
   1346 Returns the result of converting the 64-bit two's complement integer `a' to
   1347 the quadruple-precision floating-point format.  The conversion is performed
   1348 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1349 -------------------------------------------------------------------------------
   1350 */
   1351 float128 int64_to_float128( int64 a )
   1352 {
   1353     flag zSign;
   1354     uint64 absA;
   1355     int8 shiftCount;
   1356     int32 zExp;
   1357     bits64 zSig0, zSig1;
   1358 
   1359     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1360     zSign = ( a < 0 );
   1361     absA = zSign ? - a : a;
   1362     shiftCount = countLeadingZeros64( absA ) + 49;
   1363     zExp = 0x406E - shiftCount;
   1364     if ( 64 <= shiftCount ) {
   1365         zSig1 = 0;
   1366         zSig0 = absA;
   1367         shiftCount -= 64;
   1368     }
   1369     else {
   1370         zSig1 = absA;
   1371         zSig0 = 0;
   1372     }
   1373     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1374     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1375 
   1376 }
   1377 
   1378 #endif
   1379 
   1380 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1381 /*
   1382 -------------------------------------------------------------------------------
   1383 Returns the result of converting the single-precision floating-point value
   1384 `a' to the 32-bit two's complement integer format.  The conversion is
   1385 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1386 Arithmetic---which means in particular that the conversion is rounded
   1387 according to the current rounding mode.  If `a' is a NaN, the largest
   1388 positive integer is returned.  Otherwise, if the conversion overflows, the
   1389 largest integer with the same sign as `a' is returned.
   1390 -------------------------------------------------------------------------------
   1391 */
   1392 int32 float32_to_int32( float32 a )
   1393 {
   1394     flag aSign;
   1395     int16 aExp, shiftCount;
   1396     bits32 aSig;
   1397     bits64 aSig64;
   1398 
   1399     aSig = extractFloat32Frac( a );
   1400     aExp = extractFloat32Exp( a );
   1401     aSign = extractFloat32Sign( a );
   1402     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
   1403     if ( aExp ) aSig |= 0x00800000;
   1404     shiftCount = 0xAF - aExp;
   1405     aSig64 = aSig;
   1406     aSig64 <<= 32;
   1407     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
   1408     return roundAndPackInt32( aSign, aSig64 );
   1409 
   1410 }
   1411 #endif /* !SOFTFLOAT_FOR_GCC */
   1412 
   1413 /*
   1414 -------------------------------------------------------------------------------
   1415 Returns the result of converting the single-precision floating-point value
   1416 `a' to the 32-bit two's complement integer format.  The conversion is
   1417 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1418 Arithmetic, except that the conversion is always rounded toward zero.
   1419 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1420 the conversion overflows, the largest integer with the same sign as `a' is
   1421 returned.
   1422 -------------------------------------------------------------------------------
   1423 */
   1424 int32 float32_to_int32_round_to_zero( float32 a )
   1425 {
   1426     flag aSign;
   1427     int16 aExp, shiftCount;
   1428     bits32 aSig;
   1429     int32 z;
   1430 
   1431     aSig = extractFloat32Frac( a );
   1432     aExp = extractFloat32Exp( a );
   1433     aSign = extractFloat32Sign( a );
   1434     shiftCount = aExp - 0x9E;
   1435     if ( 0 <= shiftCount ) {
   1436         if ( a != 0xCF000000 ) {
   1437             float_raise( float_flag_invalid );
   1438             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
   1439         }
   1440         return (sbits32) 0x80000000;
   1441     }
   1442     else if ( aExp <= 0x7E ) {
   1443         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1444         return 0;
   1445     }
   1446     aSig = ( aSig | 0x00800000 )<<8;
   1447     z = aSig>>( - shiftCount );
   1448     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
   1449         set_float_exception_inexact_flag();
   1450     }
   1451     if ( aSign ) z = - z;
   1452     return z;
   1453 
   1454 }
   1455 
   1456 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
   1457 /*
   1458 -------------------------------------------------------------------------------
   1459 Returns the result of converting the single-precision floating-point value
   1460 `a' to the 64-bit two's complement integer format.  The conversion is
   1461 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1462 Arithmetic---which means in particular that the conversion is rounded
   1463 according to the current rounding mode.  If `a' is a NaN, the largest
   1464 positive integer is returned.  Otherwise, if the conversion overflows, the
   1465 largest integer with the same sign as `a' is returned.
   1466 -------------------------------------------------------------------------------
   1467 */
   1468 int64 float32_to_int64( float32 a )
   1469 {
   1470     flag aSign;
   1471     int16 aExp, shiftCount;
   1472     bits32 aSig;
   1473     bits64 aSig64, aSigExtra;
   1474 
   1475     aSig = extractFloat32Frac( a );
   1476     aExp = extractFloat32Exp( a );
   1477     aSign = extractFloat32Sign( a );
   1478     shiftCount = 0xBE - aExp;
   1479     if ( shiftCount < 0 ) {
   1480         float_raise( float_flag_invalid );
   1481         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1482             return LIT64( 0x7FFFFFFFFFFFFFFF );
   1483         }
   1484         return (sbits64) LIT64( 0x8000000000000000 );
   1485     }
   1486     if ( aExp ) aSig |= 0x00800000;
   1487     aSig64 = aSig;
   1488     aSig64 <<= 40;
   1489     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
   1490     return roundAndPackInt64( aSign, aSig64, aSigExtra );
   1491 
   1492 }
   1493 
   1494 /*
   1495 -------------------------------------------------------------------------------
   1496 Returns the result of converting the single-precision floating-point value
   1497 `a' to the 64-bit two's complement integer format.  The conversion is
   1498 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1499 Arithmetic, except that the conversion is always rounded toward zero.  If
   1500 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   1501 conversion overflows, the largest integer with the same sign as `a' is
   1502 returned.
   1503 -------------------------------------------------------------------------------
   1504 */
   1505 int64 float32_to_int64_round_to_zero( float32 a )
   1506 {
   1507     flag aSign;
   1508     int16 aExp, shiftCount;
   1509     bits32 aSig;
   1510     bits64 aSig64;
   1511     int64 z;
   1512 
   1513     aSig = extractFloat32Frac( a );
   1514     aExp = extractFloat32Exp( a );
   1515     aSign = extractFloat32Sign( a );
   1516     shiftCount = aExp - 0xBE;
   1517     if ( 0 <= shiftCount ) {
   1518         if ( a != 0xDF000000 ) {
   1519             float_raise( float_flag_invalid );
   1520             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1521                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   1522             }
   1523         }
   1524         return (sbits64) LIT64( 0x8000000000000000 );
   1525     }
   1526     else if ( aExp <= 0x7E ) {
   1527         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1528         return 0;
   1529     }
   1530     aSig64 = aSig | 0x00800000;
   1531     aSig64 <<= 40;
   1532     z = aSig64>>( - shiftCount );
   1533     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
   1534         set_float_exception_inexact_flag();
   1535     }
   1536     if ( aSign ) z = - z;
   1537     return z;
   1538 
   1539 }
   1540 #endif /* !SOFTFLOAT_FOR_GCC */
   1541 
   1542 /*
   1543 -------------------------------------------------------------------------------
   1544 Returns the result of converting the single-precision floating-point value
   1545 `a' to the double-precision floating-point format.  The conversion is
   1546 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1547 Arithmetic.
   1548 -------------------------------------------------------------------------------
   1549 */
   1550 float64 float32_to_float64( float32 a )
   1551 {
   1552     flag aSign;
   1553     int16 aExp;
   1554     bits32 aSig;
   1555 
   1556     aSig = extractFloat32Frac( a );
   1557     aExp = extractFloat32Exp( a );
   1558     aSign = extractFloat32Sign( a );
   1559     if ( aExp == 0xFF ) {
   1560         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
   1561         return packFloat64( aSign, 0x7FF, 0 );
   1562     }
   1563     if ( aExp == 0 ) {
   1564         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
   1565         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1566         --aExp;
   1567     }
   1568     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
   1569 
   1570 }
   1571 
   1572 #ifdef FLOATX80
   1573 
   1574 /*
   1575 -------------------------------------------------------------------------------
   1576 Returns the result of converting the single-precision floating-point value
   1577 `a' to the extended double-precision floating-point format.  The conversion
   1578 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1579 Arithmetic.
   1580 -------------------------------------------------------------------------------
   1581 */
   1582 floatx80 float32_to_floatx80( float32 a )
   1583 {
   1584     flag aSign;
   1585     int16 aExp;
   1586     bits32 aSig;
   1587 
   1588     aSig = extractFloat32Frac( a );
   1589     aExp = extractFloat32Exp( a );
   1590     aSign = extractFloat32Sign( a );
   1591     if ( aExp == 0xFF ) {
   1592         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
   1593         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   1594     }
   1595     if ( aExp == 0 ) {
   1596         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   1597         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1598     }
   1599     aSig |= 0x00800000;
   1600     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
   1601 
   1602 }
   1603 
   1604 #endif
   1605 
   1606 #ifdef FLOAT128
   1607 
   1608 /*
   1609 -------------------------------------------------------------------------------
   1610 Returns the result of converting the single-precision floating-point value
   1611 `a' to the double-precision floating-point format.  The conversion is
   1612 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1613 Arithmetic.
   1614 -------------------------------------------------------------------------------
   1615 */
   1616 float128 float32_to_float128( float32 a )
   1617 {
   1618     flag aSign;
   1619     int16 aExp;
   1620     bits32 aSig;
   1621 
   1622     aSig = extractFloat32Frac( a );
   1623     aExp = extractFloat32Exp( a );
   1624     aSign = extractFloat32Sign( a );
   1625     if ( aExp == 0xFF ) {
   1626         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
   1627         return packFloat128( aSign, 0x7FFF, 0, 0 );
   1628     }
   1629     if ( aExp == 0 ) {
   1630         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   1631         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1632         --aExp;
   1633     }
   1634     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
   1635 
   1636 }
   1637 
   1638 #endif
   1639 
   1640 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1641 /*
   1642 -------------------------------------------------------------------------------
   1643 Rounds the single-precision floating-point value `a' to an integer, and
   1644 returns the result as a single-precision floating-point value.  The
   1645 operation is performed according to the IEC/IEEE Standard for Binary
   1646 Floating-Point Arithmetic.
   1647 -------------------------------------------------------------------------------
   1648 */
   1649 float32 float32_round_to_int( float32 a )
   1650 {
   1651     flag aSign;
   1652     int16 aExp;
   1653     bits32 lastBitMask, roundBitsMask;
   1654     int8 roundingMode;
   1655     float32 z;
   1656 
   1657     aExp = extractFloat32Exp( a );
   1658     if ( 0x96 <= aExp ) {
   1659         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
   1660             return propagateFloat32NaN( a, a );
   1661         }
   1662         return a;
   1663     }
   1664     if ( aExp <= 0x7E ) {
   1665         if ( (bits32) ( a<<1 ) == 0 ) return a;
   1666         set_float_exception_inexact_flag();
   1667         aSign = extractFloat32Sign( a );
   1668         switch ( float_rounding_mode ) {
   1669          case float_round_nearest_even:
   1670             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
   1671                 return packFloat32( aSign, 0x7F, 0 );
   1672             }
   1673             break;
   1674 	 case float_round_to_zero:
   1675 	    break;
   1676          case float_round_down:
   1677             return aSign ? 0xBF800000 : 0;
   1678          case float_round_up:
   1679             return aSign ? 0x80000000 : 0x3F800000;
   1680         }
   1681         return packFloat32( aSign, 0, 0 );
   1682     }
   1683     lastBitMask = 1;
   1684     lastBitMask <<= 0x96 - aExp;
   1685     roundBitsMask = lastBitMask - 1;
   1686     z = a;
   1687     roundingMode = float_rounding_mode;
   1688     if ( roundingMode == float_round_nearest_even ) {
   1689         z += lastBitMask>>1;
   1690         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   1691     }
   1692     else if ( roundingMode != float_round_to_zero ) {
   1693         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   1694             z += roundBitsMask;
   1695         }
   1696     }
   1697     z &= ~ roundBitsMask;
   1698     if ( z != a ) set_float_exception_inexact_flag();
   1699     return z;
   1700 
   1701 }
   1702 #endif /* !SOFTFLOAT_FOR_GCC */
   1703 
   1704 /*
   1705 -------------------------------------------------------------------------------
   1706 Returns the result of adding the absolute values of the single-precision
   1707 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1708 before being returned.  `zSign' is ignored if the result is a NaN.
   1709 The addition is performed according to the IEC/IEEE Standard for Binary
   1710 Floating-Point Arithmetic.
   1711 -------------------------------------------------------------------------------
   1712 */
   1713 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
   1714 {
   1715     int16 aExp, bExp, zExp;
   1716     bits32 aSig, bSig, zSig;
   1717     int16 expDiff;
   1718 
   1719     aSig = extractFloat32Frac( a );
   1720     aExp = extractFloat32Exp( a );
   1721     bSig = extractFloat32Frac( b );
   1722     bExp = extractFloat32Exp( b );
   1723     expDiff = aExp - bExp;
   1724     aSig <<= 6;
   1725     bSig <<= 6;
   1726     if ( 0 < expDiff ) {
   1727         if ( aExp == 0xFF ) {
   1728             if ( aSig ) return propagateFloat32NaN( a, b );
   1729             return a;
   1730         }
   1731         if ( bExp == 0 ) {
   1732             --expDiff;
   1733         }
   1734         else {
   1735             bSig |= 0x20000000;
   1736         }
   1737         shift32RightJamming( bSig, expDiff, &bSig );
   1738         zExp = aExp;
   1739     }
   1740     else if ( expDiff < 0 ) {
   1741         if ( bExp == 0xFF ) {
   1742             if ( bSig ) return propagateFloat32NaN( a, b );
   1743             return packFloat32( zSign, 0xFF, 0 );
   1744         }
   1745         if ( aExp == 0 ) {
   1746             ++expDiff;
   1747         }
   1748         else {
   1749             aSig |= 0x20000000;
   1750         }
   1751         shift32RightJamming( aSig, - expDiff, &aSig );
   1752         zExp = bExp;
   1753     }
   1754     else {
   1755         if ( aExp == 0xFF ) {
   1756             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1757             return a;
   1758         }
   1759         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
   1760         zSig = 0x40000000 + aSig + bSig;
   1761         zExp = aExp;
   1762         goto roundAndPack;
   1763     }
   1764     aSig |= 0x20000000;
   1765     zSig = ( aSig + bSig )<<1;
   1766     --zExp;
   1767     if ( (sbits32) zSig < 0 ) {
   1768         zSig = aSig + bSig;
   1769         ++zExp;
   1770     }
   1771  roundAndPack:
   1772     return roundAndPackFloat32( zSign, zExp, zSig );
   1773 
   1774 }
   1775 
   1776 /*
   1777 -------------------------------------------------------------------------------
   1778 Returns the result of subtracting the absolute values of the single-
   1779 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1780 difference is negated before being returned.  `zSign' is ignored if the
   1781 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1782 Standard for Binary Floating-Point Arithmetic.
   1783 -------------------------------------------------------------------------------
   1784 */
   1785 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
   1786 {
   1787     int16 aExp, bExp, zExp;
   1788     bits32 aSig, bSig, zSig;
   1789     int16 expDiff;
   1790 
   1791     aSig = extractFloat32Frac( a );
   1792     aExp = extractFloat32Exp( a );
   1793     bSig = extractFloat32Frac( b );
   1794     bExp = extractFloat32Exp( b );
   1795     expDiff = aExp - bExp;
   1796     aSig <<= 7;
   1797     bSig <<= 7;
   1798     if ( 0 < expDiff ) goto aExpBigger;
   1799     if ( expDiff < 0 ) goto bExpBigger;
   1800     if ( aExp == 0xFF ) {
   1801         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1802         float_raise( float_flag_invalid );
   1803         return float32_default_nan;
   1804     }
   1805     if ( aExp == 0 ) {
   1806         aExp = 1;
   1807         bExp = 1;
   1808     }
   1809     if ( bSig < aSig ) goto aBigger;
   1810     if ( aSig < bSig ) goto bBigger;
   1811     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
   1812  bExpBigger:
   1813     if ( bExp == 0xFF ) {
   1814         if ( bSig ) return propagateFloat32NaN( a, b );
   1815         return packFloat32( zSign ^ 1, 0xFF, 0 );
   1816     }
   1817     if ( aExp == 0 ) {
   1818         ++expDiff;
   1819     }
   1820     else {
   1821         aSig |= 0x40000000;
   1822     }
   1823     shift32RightJamming( aSig, - expDiff, &aSig );
   1824     bSig |= 0x40000000;
   1825  bBigger:
   1826     zSig = bSig - aSig;
   1827     zExp = bExp;
   1828     zSign ^= 1;
   1829     goto normalizeRoundAndPack;
   1830  aExpBigger:
   1831     if ( aExp == 0xFF ) {
   1832         if ( aSig ) return propagateFloat32NaN( a, b );
   1833         return a;
   1834     }
   1835     if ( bExp == 0 ) {
   1836         --expDiff;
   1837     }
   1838     else {
   1839         bSig |= 0x40000000;
   1840     }
   1841     shift32RightJamming( bSig, expDiff, &bSig );
   1842     aSig |= 0x40000000;
   1843  aBigger:
   1844     zSig = aSig - bSig;
   1845     zExp = aExp;
   1846  normalizeRoundAndPack:
   1847     --zExp;
   1848     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
   1849 
   1850 }
   1851 
   1852 /*
   1853 -------------------------------------------------------------------------------
   1854 Returns the result of adding the single-precision floating-point values `a'
   1855 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1856 Binary Floating-Point Arithmetic.
   1857 -------------------------------------------------------------------------------
   1858 */
   1859 float32 float32_add( float32 a, float32 b )
   1860 {
   1861     flag aSign, bSign;
   1862 
   1863     aSign = extractFloat32Sign( a );
   1864     bSign = extractFloat32Sign( b );
   1865     if ( aSign == bSign ) {
   1866         return addFloat32Sigs( a, b, aSign );
   1867     }
   1868     else {
   1869         return subFloat32Sigs( a, b, aSign );
   1870     }
   1871 
   1872 }
   1873 
   1874 /*
   1875 -------------------------------------------------------------------------------
   1876 Returns the result of subtracting the single-precision floating-point values
   1877 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1878 for Binary Floating-Point Arithmetic.
   1879 -------------------------------------------------------------------------------
   1880 */
   1881 float32 float32_sub( float32 a, float32 b )
   1882 {
   1883     flag aSign, bSign;
   1884 
   1885     aSign = extractFloat32Sign( a );
   1886     bSign = extractFloat32Sign( b );
   1887     if ( aSign == bSign ) {
   1888         return subFloat32Sigs( a, b, aSign );
   1889     }
   1890     else {
   1891         return addFloat32Sigs( a, b, aSign );
   1892     }
   1893 
   1894 }
   1895 
   1896 /*
   1897 -------------------------------------------------------------------------------
   1898 Returns the result of multiplying the single-precision floating-point values
   1899 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1900 for Binary Floating-Point Arithmetic.
   1901 -------------------------------------------------------------------------------
   1902 */
   1903 float32 float32_mul( float32 a, float32 b )
   1904 {
   1905     flag aSign, bSign, zSign;
   1906     int16 aExp, bExp, zExp;
   1907     bits32 aSig, bSig;
   1908     bits64 zSig64;
   1909     bits32 zSig;
   1910 
   1911     aSig = extractFloat32Frac( a );
   1912     aExp = extractFloat32Exp( a );
   1913     aSign = extractFloat32Sign( a );
   1914     bSig = extractFloat32Frac( b );
   1915     bExp = extractFloat32Exp( b );
   1916     bSign = extractFloat32Sign( b );
   1917     zSign = aSign ^ bSign;
   1918     if ( aExp == 0xFF ) {
   1919         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1920             return propagateFloat32NaN( a, b );
   1921         }
   1922         if ( ( bExp | bSig ) == 0 ) {
   1923             float_raise( float_flag_invalid );
   1924             return float32_default_nan;
   1925         }
   1926         return packFloat32( zSign, 0xFF, 0 );
   1927     }
   1928     if ( bExp == 0xFF ) {
   1929         if ( bSig ) return propagateFloat32NaN( a, b );
   1930         if ( ( aExp | aSig ) == 0 ) {
   1931             float_raise( float_flag_invalid );
   1932             return float32_default_nan;
   1933         }
   1934         return packFloat32( zSign, 0xFF, 0 );
   1935     }
   1936     if ( aExp == 0 ) {
   1937         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1938         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1939     }
   1940     if ( bExp == 0 ) {
   1941         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1942         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1943     }
   1944     zExp = aExp + bExp - 0x7F;
   1945     aSig = ( aSig | 0x00800000 )<<7;
   1946     bSig = ( bSig | 0x00800000 )<<8;
   1947     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
   1948     zSig = (bits32)zSig64;
   1949     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
   1950         zSig <<= 1;
   1951         --zExp;
   1952     }
   1953     return roundAndPackFloat32( zSign, zExp, zSig );
   1954 
   1955 }
   1956 
   1957 /*
   1958 -------------------------------------------------------------------------------
   1959 Returns the result of dividing the single-precision floating-point value `a'
   1960 by the corresponding value `b'.  The operation is performed according to the
   1961 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1962 -------------------------------------------------------------------------------
   1963 */
   1964 float32 float32_div( float32 a, float32 b )
   1965 {
   1966     flag aSign, bSign, zSign;
   1967     int16 aExp, bExp, zExp;
   1968     bits32 aSig, bSig, zSig;
   1969 
   1970     aSig = extractFloat32Frac( a );
   1971     aExp = extractFloat32Exp( a );
   1972     aSign = extractFloat32Sign( a );
   1973     bSig = extractFloat32Frac( b );
   1974     bExp = extractFloat32Exp( b );
   1975     bSign = extractFloat32Sign( b );
   1976     zSign = aSign ^ bSign;
   1977     if ( aExp == 0xFF ) {
   1978         if ( aSig ) return propagateFloat32NaN( a, b );
   1979         if ( bExp == 0xFF ) {
   1980             if ( bSig ) return propagateFloat32NaN( a, b );
   1981             float_raise( float_flag_invalid );
   1982             return float32_default_nan;
   1983         }
   1984         return packFloat32( zSign, 0xFF, 0 );
   1985     }
   1986     if ( bExp == 0xFF ) {
   1987         if ( bSig ) return propagateFloat32NaN( a, b );
   1988         return packFloat32( zSign, 0, 0 );
   1989     }
   1990     if ( bExp == 0 ) {
   1991         if ( bSig == 0 ) {
   1992             if ( ( aExp | aSig ) == 0 ) {
   1993                 float_raise( float_flag_invalid );
   1994                 return float32_default_nan;
   1995             }
   1996             float_raise( float_flag_divbyzero );
   1997             return packFloat32( zSign, 0xFF, 0 );
   1998         }
   1999         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2000     }
   2001     if ( aExp == 0 ) {
   2002         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   2003         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2004     }
   2005     zExp = aExp - bExp + 0x7D;
   2006     aSig = ( aSig | 0x00800000 )<<7;
   2007     bSig = ( bSig | 0x00800000 )<<8;
   2008     if ( bSig <= ( aSig + aSig ) ) {
   2009         aSig >>= 1;
   2010         ++zExp;
   2011     }
   2012     zSig = (bits32)((((bits64) aSig) << 32) / bSig);
   2013     if ( ( zSig & 0x3F ) == 0 ) {
   2014         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
   2015     }
   2016     return roundAndPackFloat32( zSign, zExp, zSig );
   2017 
   2018 }
   2019 
   2020 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2021 /*
   2022 -------------------------------------------------------------------------------
   2023 Returns the remainder of the single-precision floating-point value `a'
   2024 with respect to the corresponding value `b'.  The operation is performed
   2025 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2026 -------------------------------------------------------------------------------
   2027 */
   2028 float32 float32_rem( float32 a, float32 b )
   2029 {
   2030     flag aSign, bSign, zSign;
   2031     int16 aExp, bExp, expDiff;
   2032     bits32 aSig, bSig;
   2033     bits32 q;
   2034     bits64 aSig64, bSig64, q64;
   2035     bits32 alternateASig;
   2036     sbits32 sigMean;
   2037 
   2038     aSig = extractFloat32Frac( a );
   2039     aExp = extractFloat32Exp( a );
   2040     aSign = extractFloat32Sign( a );
   2041     bSig = extractFloat32Frac( b );
   2042     bExp = extractFloat32Exp( b );
   2043     bSign = extractFloat32Sign( b );
   2044     if ( aExp == 0xFF ) {
   2045         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   2046             return propagateFloat32NaN( a, b );
   2047         }
   2048         float_raise( float_flag_invalid );
   2049         return float32_default_nan;
   2050     }
   2051     if ( bExp == 0xFF ) {
   2052         if ( bSig ) return propagateFloat32NaN( a, b );
   2053         return a;
   2054     }
   2055     if ( bExp == 0 ) {
   2056         if ( bSig == 0 ) {
   2057             float_raise( float_flag_invalid );
   2058             return float32_default_nan;
   2059         }
   2060         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2061     }
   2062     if ( aExp == 0 ) {
   2063         if ( aSig == 0 ) return a;
   2064         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2065     }
   2066     expDiff = aExp - bExp;
   2067     aSig |= 0x00800000;
   2068     bSig |= 0x00800000;
   2069     if ( expDiff < 32 ) {
   2070         aSig <<= 8;
   2071         bSig <<= 8;
   2072         if ( expDiff < 0 ) {
   2073             if ( expDiff < -1 ) return a;
   2074             aSig >>= 1;
   2075         }
   2076         q = ( bSig <= aSig );
   2077         if ( q ) aSig -= bSig;
   2078         if ( 0 < expDiff ) {
   2079             q = ( ( (bits64) aSig )<<32 ) / bSig;
   2080             q >>= 32 - expDiff;
   2081             bSig >>= 2;
   2082             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   2083         }
   2084         else {
   2085             aSig >>= 2;
   2086             bSig >>= 2;
   2087         }
   2088     }
   2089     else {
   2090         if ( bSig <= aSig ) aSig -= bSig;
   2091         aSig64 = ( (bits64) aSig )<<40;
   2092         bSig64 = ( (bits64) bSig )<<40;
   2093         expDiff -= 64;
   2094         while ( 0 < expDiff ) {
   2095             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2096             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2097             aSig64 = - ( ( bSig * q64 )<<38 );
   2098             expDiff -= 62;
   2099         }
   2100         expDiff += 64;
   2101         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2102         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2103         q = q64>>( 64 - expDiff );
   2104         bSig <<= 6;
   2105         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
   2106     }
   2107     do {
   2108         alternateASig = aSig;
   2109         ++q;
   2110         aSig -= bSig;
   2111     } while ( 0 <= (sbits32) aSig );
   2112     sigMean = aSig + alternateASig;
   2113     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   2114         aSig = alternateASig;
   2115     }
   2116     zSign = ( (sbits32) aSig < 0 );
   2117     if ( zSign ) aSig = - aSig;
   2118     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   2119 
   2120 }
   2121 #endif /* !SOFTFLOAT_FOR_GCC */
   2122 
   2123 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2124 /*
   2125 -------------------------------------------------------------------------------
   2126 Returns the square root of the single-precision floating-point value `a'.
   2127 The operation is performed according to the IEC/IEEE Standard for Binary
   2128 Floating-Point Arithmetic.
   2129 -------------------------------------------------------------------------------
   2130 */
   2131 float32 float32_sqrt( float32 a )
   2132 {
   2133     flag aSign;
   2134     int16 aExp, zExp;
   2135     bits32 aSig, zSig;
   2136     bits64 rem, term;
   2137 
   2138     aSig = extractFloat32Frac( a );
   2139     aExp = extractFloat32Exp( a );
   2140     aSign = extractFloat32Sign( a );
   2141     if ( aExp == 0xFF ) {
   2142         if ( aSig ) return propagateFloat32NaN( a, 0 );
   2143         if ( ! aSign ) return a;
   2144         float_raise( float_flag_invalid );
   2145         return float32_default_nan;
   2146     }
   2147     if ( aSign ) {
   2148         if ( ( aExp | aSig ) == 0 ) return a;
   2149         float_raise( float_flag_invalid );
   2150         return float32_default_nan;
   2151     }
   2152     if ( aExp == 0 ) {
   2153         if ( aSig == 0 ) return 0;
   2154         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2155     }
   2156     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   2157     aSig = ( aSig | 0x00800000 )<<8;
   2158     zSig = estimateSqrt32( aExp, aSig ) + 2;
   2159     if ( ( zSig & 0x7F ) <= 5 ) {
   2160         if ( zSig < 2 ) {
   2161             zSig = 0x7FFFFFFF;
   2162             goto roundAndPack;
   2163         }
   2164         aSig >>= aExp & 1;
   2165         term = ( (bits64) zSig ) * zSig;
   2166         rem = ( ( (bits64) aSig )<<32 ) - term;
   2167         while ( (sbits64) rem < 0 ) {
   2168             --zSig;
   2169             rem += ( ( (bits64) zSig )<<1 ) | 1;
   2170         }
   2171         zSig |= ( rem != 0 );
   2172     }
   2173     shift32RightJamming( zSig, 1, &zSig );
   2174  roundAndPack:
   2175     return roundAndPackFloat32( 0, zExp, zSig );
   2176 
   2177 }
   2178 #endif /* !SOFTFLOAT_FOR_GCC */
   2179 
   2180 /*
   2181 -------------------------------------------------------------------------------
   2182 Returns 1 if the single-precision floating-point value `a' is equal to
   2183 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2184 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2185 -------------------------------------------------------------------------------
   2186 */
   2187 flag float32_eq( float32 a, float32 b )
   2188 {
   2189 
   2190     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2191          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2192        ) {
   2193         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2194             float_raise( float_flag_invalid );
   2195         }
   2196         return 0;
   2197     }
   2198     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2199 
   2200 }
   2201 
   2202 /*
   2203 -------------------------------------------------------------------------------
   2204 Returns 1 if the single-precision floating-point value `a' is less than
   2205 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2206 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2207 Arithmetic.
   2208 -------------------------------------------------------------------------------
   2209 */
   2210 flag float32_le( float32 a, float32 b )
   2211 {
   2212     flag aSign, bSign;
   2213 
   2214     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2215          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2216        ) {
   2217         float_raise( float_flag_invalid );
   2218         return 0;
   2219     }
   2220     aSign = extractFloat32Sign( a );
   2221     bSign = extractFloat32Sign( b );
   2222     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2223     return ( a == b ) || ( aSign ^ ( a < b ) );
   2224 
   2225 }
   2226 
   2227 /*
   2228 -------------------------------------------------------------------------------
   2229 Returns 1 if the single-precision floating-point value `a' is less than
   2230 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2231 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2232 -------------------------------------------------------------------------------
   2233 */
   2234 flag float32_lt( float32 a, float32 b )
   2235 {
   2236     flag aSign, bSign;
   2237 
   2238     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2239          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2240        ) {
   2241         float_raise( float_flag_invalid );
   2242         return 0;
   2243     }
   2244     aSign = extractFloat32Sign( a );
   2245     bSign = extractFloat32Sign( b );
   2246     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2247     return ( a != b ) && ( aSign ^ ( a < b ) );
   2248 
   2249 }
   2250 
   2251 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2252 /*
   2253 -------------------------------------------------------------------------------
   2254 Returns 1 if the single-precision floating-point value `a' is equal to
   2255 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2256 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2257 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2258 -------------------------------------------------------------------------------
   2259 */
   2260 flag float32_eq_signaling( float32 a, float32 b )
   2261 {
   2262 
   2263     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2264          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2265        ) {
   2266         float_raise( float_flag_invalid );
   2267         return 0;
   2268     }
   2269     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2270 
   2271 }
   2272 
   2273 /*
   2274 -------------------------------------------------------------------------------
   2275 Returns 1 if the single-precision floating-point value `a' is less than or
   2276 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2277 cause an exception.  Otherwise, the comparison is performed according to the
   2278 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2279 -------------------------------------------------------------------------------
   2280 */
   2281 flag float32_le_quiet( float32 a, float32 b )
   2282 {
   2283     flag aSign, bSign;
   2284 
   2285     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2286          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2287        ) {
   2288         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2289             float_raise( float_flag_invalid );
   2290         }
   2291         return 0;
   2292     }
   2293     aSign = extractFloat32Sign( a );
   2294     bSign = extractFloat32Sign( b );
   2295     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2296     return ( a == b ) || ( aSign ^ ( a < b ) );
   2297 
   2298 }
   2299 
   2300 /*
   2301 -------------------------------------------------------------------------------
   2302 Returns 1 if the single-precision floating-point value `a' is less than
   2303 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2304 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2305 Standard for Binary Floating-Point Arithmetic.
   2306 -------------------------------------------------------------------------------
   2307 */
   2308 flag float32_lt_quiet( float32 a, float32 b )
   2309 {
   2310     flag aSign, bSign;
   2311 
   2312     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2313          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2314        ) {
   2315         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2316             float_raise( float_flag_invalid );
   2317         }
   2318         return 0;
   2319     }
   2320     aSign = extractFloat32Sign( a );
   2321     bSign = extractFloat32Sign( b );
   2322     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2323     return ( a != b ) && ( aSign ^ ( a < b ) );
   2324 
   2325 }
   2326 #endif /* !SOFTFLOAT_FOR_GCC */
   2327 
   2328 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2329 /*
   2330 -------------------------------------------------------------------------------
   2331 Returns the result of converting the double-precision floating-point value
   2332 `a' to the 32-bit two's complement integer format.  The conversion is
   2333 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2334 Arithmetic---which means in particular that the conversion is rounded
   2335 according to the current rounding mode.  If `a' is a NaN, the largest
   2336 positive integer is returned.  Otherwise, if the conversion overflows, the
   2337 largest integer with the same sign as `a' is returned.
   2338 -------------------------------------------------------------------------------
   2339 */
   2340 int32 float64_to_int32( float64 a )
   2341 {
   2342     flag aSign;
   2343     int16 aExp, shiftCount;
   2344     bits64 aSig;
   2345 
   2346     aSig = extractFloat64Frac( a );
   2347     aExp = extractFloat64Exp( a );
   2348     aSign = extractFloat64Sign( a );
   2349     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2350     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2351     shiftCount = 0x42C - aExp;
   2352     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
   2353     return roundAndPackInt32( aSign, aSig );
   2354 
   2355 }
   2356 #endif /* !SOFTFLOAT_FOR_GCC */
   2357 
   2358 /*
   2359 -------------------------------------------------------------------------------
   2360 Returns the result of converting the double-precision floating-point value
   2361 `a' to the 32-bit two's complement integer format.  The conversion is
   2362 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2363 Arithmetic, except that the conversion is always rounded toward zero.
   2364 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2365 the conversion overflows, the largest integer with the same sign as `a' is
   2366 returned.
   2367 -------------------------------------------------------------------------------
   2368 */
   2369 int32 float64_to_int32_round_to_zero( float64 a )
   2370 {
   2371     flag aSign;
   2372     int16 aExp, shiftCount;
   2373     bits64 aSig, savedASig;
   2374     int32 z;
   2375 
   2376     aSig = extractFloat64Frac( a );
   2377     aExp = extractFloat64Exp( a );
   2378     aSign = extractFloat64Sign( a );
   2379     if ( 0x41E < aExp ) {
   2380         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2381         goto invalid;
   2382     }
   2383     else if ( aExp < 0x3FF ) {
   2384         if ( aExp || aSig ) set_float_exception_inexact_flag();
   2385         return 0;
   2386     }
   2387     aSig |= LIT64( 0x0010000000000000 );
   2388     shiftCount = 0x433 - aExp;
   2389     savedASig = aSig;
   2390     aSig >>= shiftCount;
   2391     z = (int32)aSig;
   2392     if ( aSign ) z = - z;
   2393     if ( ( z < 0 ) ^ aSign ) {
   2394  invalid:
   2395         float_raise( float_flag_invalid );
   2396         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   2397     }
   2398     if ( ( aSig<<shiftCount ) != savedASig ) {
   2399         set_float_exception_inexact_flag();
   2400     }
   2401     return z;
   2402 
   2403 }
   2404 
   2405 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2406 /*
   2407 -------------------------------------------------------------------------------
   2408 Returns the result of converting the double-precision floating-point value
   2409 `a' to the 64-bit two's complement integer format.  The conversion is
   2410 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2411 Arithmetic---which means in particular that the conversion is rounded
   2412 according to the current rounding mode.  If `a' is a NaN, the largest
   2413 positive integer is returned.  Otherwise, if the conversion overflows, the
   2414 largest integer with the same sign as `a' is returned.
   2415 -------------------------------------------------------------------------------
   2416 */
   2417 int64 float64_to_int64( float64 a )
   2418 {
   2419     flag aSign;
   2420     int16 aExp, shiftCount;
   2421     bits64 aSig, aSigExtra;
   2422 
   2423     aSig = extractFloat64Frac( a );
   2424     aExp = extractFloat64Exp( a );
   2425     aSign = extractFloat64Sign( a );
   2426     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2427     shiftCount = 0x433 - aExp;
   2428     if ( shiftCount <= 0 ) {
   2429         if ( 0x43E < aExp ) {
   2430             float_raise( float_flag_invalid );
   2431             if (    ! aSign
   2432                  || (    ( aExp == 0x7FF )
   2433                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2434                ) {
   2435                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   2436             }
   2437             return (sbits64) LIT64( 0x8000000000000000 );
   2438         }
   2439         aSigExtra = 0;
   2440         aSig <<= - shiftCount;
   2441     }
   2442     else {
   2443         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   2444     }
   2445     return roundAndPackInt64( aSign, aSig, aSigExtra );
   2446 
   2447 }
   2448 
   2449 /*
   2450 -------------------------------------------------------------------------------
   2451 Returns the result of converting the double-precision floating-point value
   2452 `a' to the 64-bit two's complement integer format.  The conversion is
   2453 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2454 Arithmetic, except that the conversion is always rounded toward zero.
   2455 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2456 the conversion overflows, the largest integer with the same sign as `a' is
   2457 returned.
   2458 -------------------------------------------------------------------------------
   2459 */
   2460 int64 float64_to_int64_round_to_zero( float64 a )
   2461 {
   2462     flag aSign;
   2463     int16 aExp, shiftCount;
   2464     bits64 aSig;
   2465     int64 z;
   2466 
   2467     aSig = extractFloat64Frac( a );
   2468     aExp = extractFloat64Exp( a );
   2469     aSign = extractFloat64Sign( a );
   2470     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2471     shiftCount = aExp - 0x433;
   2472     if ( 0 <= shiftCount ) {
   2473         if ( 0x43E <= aExp ) {
   2474             if ( a != LIT64( 0xC3E0000000000000 ) ) {
   2475                 float_raise( float_flag_invalid );
   2476                 if (    ! aSign
   2477                      || (    ( aExp == 0x7FF )
   2478                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2479                    ) {
   2480                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   2481                 }
   2482             }
   2483             return (sbits64) LIT64( 0x8000000000000000 );
   2484         }
   2485         z = aSig<<shiftCount;
   2486     }
   2487     else {
   2488         if ( aExp < 0x3FE ) {
   2489             if ( aExp | aSig ) set_float_exception_inexact_flag();
   2490             return 0;
   2491         }
   2492         z = aSig>>( - shiftCount );
   2493         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   2494             set_float_exception_inexact_flag();
   2495         }
   2496     }
   2497     if ( aSign ) z = - z;
   2498     return z;
   2499 
   2500 }
   2501 #endif /* !SOFTFLOAT_FOR_GCC */
   2502 
   2503 /*
   2504 -------------------------------------------------------------------------------
   2505 Returns the result of converting the double-precision floating-point value
   2506 `a' to the single-precision floating-point format.  The conversion is
   2507 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2508 Arithmetic.
   2509 -------------------------------------------------------------------------------
   2510 */
   2511 float32 float64_to_float32( float64 a )
   2512 {
   2513     flag aSign;
   2514     int16 aExp;
   2515     bits64 aSig;
   2516     bits32 zSig;
   2517 
   2518     aSig = extractFloat64Frac( a );
   2519     aExp = extractFloat64Exp( a );
   2520     aSign = extractFloat64Sign( a );
   2521     if ( aExp == 0x7FF ) {
   2522         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
   2523         return packFloat32( aSign, 0xFF, 0 );
   2524     }
   2525     shift64RightJamming( aSig, 22, &aSig );
   2526     zSig = (bits32)aSig;
   2527     if ( aExp || zSig ) {
   2528         zSig |= 0x40000000;
   2529         aExp -= 0x381;
   2530     }
   2531     return roundAndPackFloat32( aSign, aExp, zSig );
   2532 
   2533 }
   2534 
   2535 #ifdef FLOATX80
   2536 
   2537 /*
   2538 -------------------------------------------------------------------------------
   2539 Returns the result of converting the double-precision floating-point value
   2540 `a' to the extended double-precision floating-point format.  The conversion
   2541 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2542 Arithmetic.
   2543 -------------------------------------------------------------------------------
   2544 */
   2545 floatx80 float64_to_floatx80( float64 a )
   2546 {
   2547     flag aSign;
   2548     int16 aExp;
   2549     bits64 aSig;
   2550 
   2551     aSig = extractFloat64Frac( a );
   2552     aExp = extractFloat64Exp( a );
   2553     aSign = extractFloat64Sign( a );
   2554     if ( aExp == 0x7FF ) {
   2555         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
   2556         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   2557     }
   2558     if ( aExp == 0 ) {
   2559         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   2560         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2561     }
   2562     return
   2563         packFloatx80(
   2564             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
   2565 
   2566 }
   2567 
   2568 #endif
   2569 
   2570 #ifdef FLOAT128
   2571 
   2572 /*
   2573 -------------------------------------------------------------------------------
   2574 Returns the result of converting the double-precision floating-point value
   2575 `a' to the quadruple-precision floating-point format.  The conversion is
   2576 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2577 Arithmetic.
   2578 -------------------------------------------------------------------------------
   2579 */
   2580 float128 float64_to_float128( float64 a )
   2581 {
   2582     flag aSign;
   2583     int16 aExp;
   2584     bits64 aSig, zSig0, zSig1;
   2585 
   2586     aSig = extractFloat64Frac( a );
   2587     aExp = extractFloat64Exp( a );
   2588     aSign = extractFloat64Sign( a );
   2589     if ( aExp == 0x7FF ) {
   2590         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
   2591         return packFloat128( aSign, 0x7FFF, 0, 0 );
   2592     }
   2593     if ( aExp == 0 ) {
   2594         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   2595         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2596         --aExp;
   2597     }
   2598     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
   2599     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
   2600 
   2601 }
   2602 
   2603 #endif
   2604 
   2605 #ifndef SOFTFLOAT_FOR_GCC
   2606 /*
   2607 -------------------------------------------------------------------------------
   2608 Rounds the double-precision floating-point value `a' to an integer, and
   2609 returns the result as a double-precision floating-point value.  The
   2610 operation is performed according to the IEC/IEEE Standard for Binary
   2611 Floating-Point Arithmetic.
   2612 -------------------------------------------------------------------------------
   2613 */
   2614 float64 float64_round_to_int( float64 a )
   2615 {
   2616     flag aSign;
   2617     int16 aExp;
   2618     bits64 lastBitMask, roundBitsMask;
   2619     int8 roundingMode;
   2620     float64 z;
   2621 
   2622     aExp = extractFloat64Exp( a );
   2623     if ( 0x433 <= aExp ) {
   2624         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
   2625             return propagateFloat64NaN( a, a );
   2626         }
   2627         return a;
   2628     }
   2629     if ( aExp < 0x3FF ) {
   2630         if ( (bits64) ( a<<1 ) == 0 ) return a;
   2631         set_float_exception_inexact_flag();
   2632         aSign = extractFloat64Sign( a );
   2633         switch ( float_rounding_mode ) {
   2634          case float_round_nearest_even:
   2635             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
   2636                 return packFloat64( aSign, 0x3FF, 0 );
   2637             }
   2638             break;
   2639 	 case float_round_to_zero:
   2640 	    break;
   2641          case float_round_down:
   2642             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
   2643          case float_round_up:
   2644             return
   2645             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
   2646         }
   2647         return packFloat64( aSign, 0, 0 );
   2648     }
   2649     lastBitMask = 1;
   2650     lastBitMask <<= 0x433 - aExp;
   2651     roundBitsMask = lastBitMask - 1;
   2652     z = a;
   2653     roundingMode = float_rounding_mode;
   2654     if ( roundingMode == float_round_nearest_even ) {
   2655         z += lastBitMask>>1;
   2656         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   2657     }
   2658     else if ( roundingMode != float_round_to_zero ) {
   2659         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   2660             z += roundBitsMask;
   2661         }
   2662     }
   2663     z &= ~ roundBitsMask;
   2664     if ( z != a ) set_float_exception_inexact_flag();
   2665     return z;
   2666 
   2667 }
   2668 #endif
   2669 
   2670 /*
   2671 -------------------------------------------------------------------------------
   2672 Returns the result of adding the absolute values of the double-precision
   2673 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   2674 before being returned.  `zSign' is ignored if the result is a NaN.
   2675 The addition is performed according to the IEC/IEEE Standard for Binary
   2676 Floating-Point Arithmetic.
   2677 -------------------------------------------------------------------------------
   2678 */
   2679 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   2680 {
   2681     int16 aExp, bExp, zExp;
   2682     bits64 aSig, bSig, zSig;
   2683     int16 expDiff;
   2684 
   2685     aSig = extractFloat64Frac( a );
   2686     aExp = extractFloat64Exp( a );
   2687     bSig = extractFloat64Frac( b );
   2688     bExp = extractFloat64Exp( b );
   2689     expDiff = aExp - bExp;
   2690     aSig <<= 9;
   2691     bSig <<= 9;
   2692     if ( 0 < expDiff ) {
   2693         if ( aExp == 0x7FF ) {
   2694             if ( aSig ) return propagateFloat64NaN( a, b );
   2695             return a;
   2696         }
   2697         if ( bExp == 0 ) {
   2698             --expDiff;
   2699         }
   2700         else {
   2701             bSig |= LIT64( 0x2000000000000000 );
   2702         }
   2703         shift64RightJamming( bSig, expDiff, &bSig );
   2704         zExp = aExp;
   2705     }
   2706     else if ( expDiff < 0 ) {
   2707         if ( bExp == 0x7FF ) {
   2708             if ( bSig ) return propagateFloat64NaN( a, b );
   2709             return packFloat64( zSign, 0x7FF, 0 );
   2710         }
   2711         if ( aExp == 0 ) {
   2712             ++expDiff;
   2713         }
   2714         else {
   2715             aSig |= LIT64( 0x2000000000000000 );
   2716         }
   2717         shift64RightJamming( aSig, - expDiff, &aSig );
   2718         zExp = bExp;
   2719     }
   2720     else {
   2721         if ( aExp == 0x7FF ) {
   2722             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2723             return a;
   2724         }
   2725         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
   2726         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
   2727         zExp = aExp;
   2728         goto roundAndPack;
   2729     }
   2730     aSig |= LIT64( 0x2000000000000000 );
   2731     zSig = ( aSig + bSig )<<1;
   2732     --zExp;
   2733     if ( (sbits64) zSig < 0 ) {
   2734         zSig = aSig + bSig;
   2735         ++zExp;
   2736     }
   2737  roundAndPack:
   2738     return roundAndPackFloat64( zSign, zExp, zSig );
   2739 
   2740 }
   2741 
   2742 /*
   2743 -------------------------------------------------------------------------------
   2744 Returns the result of subtracting the absolute values of the double-
   2745 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   2746 difference is negated before being returned.  `zSign' is ignored if the
   2747 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   2748 Standard for Binary Floating-Point Arithmetic.
   2749 -------------------------------------------------------------------------------
   2750 */
   2751 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   2752 {
   2753     int16 aExp, bExp, zExp;
   2754     bits64 aSig, bSig, zSig;
   2755     int16 expDiff;
   2756 
   2757     aSig = extractFloat64Frac( a );
   2758     aExp = extractFloat64Exp( a );
   2759     bSig = extractFloat64Frac( b );
   2760     bExp = extractFloat64Exp( b );
   2761     expDiff = aExp - bExp;
   2762     aSig <<= 10;
   2763     bSig <<= 10;
   2764     if ( 0 < expDiff ) goto aExpBigger;
   2765     if ( expDiff < 0 ) goto bExpBigger;
   2766     if ( aExp == 0x7FF ) {
   2767         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2768         float_raise( float_flag_invalid );
   2769         return float64_default_nan;
   2770     }
   2771     if ( aExp == 0 ) {
   2772         aExp = 1;
   2773         bExp = 1;
   2774     }
   2775     if ( bSig < aSig ) goto aBigger;
   2776     if ( aSig < bSig ) goto bBigger;
   2777     return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
   2778  bExpBigger:
   2779     if ( bExp == 0x7FF ) {
   2780         if ( bSig ) return propagateFloat64NaN( a, b );
   2781         return packFloat64( zSign ^ 1, 0x7FF, 0 );
   2782     }
   2783     if ( aExp == 0 ) {
   2784         ++expDiff;
   2785     }
   2786     else {
   2787         aSig |= LIT64( 0x4000000000000000 );
   2788     }
   2789     shift64RightJamming( aSig, - expDiff, &aSig );
   2790     bSig |= LIT64( 0x4000000000000000 );
   2791  bBigger:
   2792     zSig = bSig - aSig;
   2793     zExp = bExp;
   2794     zSign ^= 1;
   2795     goto normalizeRoundAndPack;
   2796  aExpBigger:
   2797     if ( aExp == 0x7FF ) {
   2798         if ( aSig ) return propagateFloat64NaN( a, b );
   2799         return a;
   2800     }
   2801     if ( bExp == 0 ) {
   2802         --expDiff;
   2803     }
   2804     else {
   2805         bSig |= LIT64( 0x4000000000000000 );
   2806     }
   2807     shift64RightJamming( bSig, expDiff, &bSig );
   2808     aSig |= LIT64( 0x4000000000000000 );
   2809  aBigger:
   2810     zSig = aSig - bSig;
   2811     zExp = aExp;
   2812  normalizeRoundAndPack:
   2813     --zExp;
   2814     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
   2815 
   2816 }
   2817 
   2818 /*
   2819 -------------------------------------------------------------------------------
   2820 Returns the result of adding the double-precision floating-point values `a'
   2821 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   2822 Binary Floating-Point Arithmetic.
   2823 -------------------------------------------------------------------------------
   2824 */
   2825 float64 float64_add( float64 a, float64 b )
   2826 {
   2827     flag aSign, bSign;
   2828 
   2829     aSign = extractFloat64Sign( a );
   2830     bSign = extractFloat64Sign( b );
   2831     if ( aSign == bSign ) {
   2832         return addFloat64Sigs( a, b, aSign );
   2833     }
   2834     else {
   2835         return subFloat64Sigs( a, b, aSign );
   2836     }
   2837 
   2838 }
   2839 
   2840 /*
   2841 -------------------------------------------------------------------------------
   2842 Returns the result of subtracting the double-precision floating-point values
   2843 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2844 for Binary Floating-Point Arithmetic.
   2845 -------------------------------------------------------------------------------
   2846 */
   2847 float64 float64_sub( float64 a, float64 b )
   2848 {
   2849     flag aSign, bSign;
   2850 
   2851     aSign = extractFloat64Sign( a );
   2852     bSign = extractFloat64Sign( b );
   2853     if ( aSign == bSign ) {
   2854         return subFloat64Sigs( a, b, aSign );
   2855     }
   2856     else {
   2857         return addFloat64Sigs( a, b, aSign );
   2858     }
   2859 
   2860 }
   2861 
   2862 /*
   2863 -------------------------------------------------------------------------------
   2864 Returns the result of multiplying the double-precision floating-point values
   2865 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2866 for Binary Floating-Point Arithmetic.
   2867 -------------------------------------------------------------------------------
   2868 */
   2869 float64 float64_mul( float64 a, float64 b )
   2870 {
   2871     flag aSign, bSign, zSign;
   2872     int16 aExp, bExp, zExp;
   2873     bits64 aSig, bSig, zSig0, zSig1;
   2874 
   2875     aSig = extractFloat64Frac( a );
   2876     aExp = extractFloat64Exp( a );
   2877     aSign = extractFloat64Sign( a );
   2878     bSig = extractFloat64Frac( b );
   2879     bExp = extractFloat64Exp( b );
   2880     bSign = extractFloat64Sign( b );
   2881     zSign = aSign ^ bSign;
   2882     if ( aExp == 0x7FF ) {
   2883         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2884             return propagateFloat64NaN( a, b );
   2885         }
   2886         if ( ( bExp | bSig ) == 0 ) {
   2887             float_raise( float_flag_invalid );
   2888             return float64_default_nan;
   2889         }
   2890         return packFloat64( zSign, 0x7FF, 0 );
   2891     }
   2892     if ( bExp == 0x7FF ) {
   2893         if ( bSig ) return propagateFloat64NaN( a, b );
   2894         if ( ( aExp | aSig ) == 0 ) {
   2895             float_raise( float_flag_invalid );
   2896             return float64_default_nan;
   2897         }
   2898         return packFloat64( zSign, 0x7FF, 0 );
   2899     }
   2900     if ( aExp == 0 ) {
   2901         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2902         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2903     }
   2904     if ( bExp == 0 ) {
   2905         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
   2906         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2907     }
   2908     zExp = aExp + bExp - 0x3FF;
   2909     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2910     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2911     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   2912     zSig0 |= ( zSig1 != 0 );
   2913     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
   2914         zSig0 <<= 1;
   2915         --zExp;
   2916     }
   2917     return roundAndPackFloat64( zSign, zExp, zSig0 );
   2918 
   2919 }
   2920 
   2921 /*
   2922 -------------------------------------------------------------------------------
   2923 Returns the result of dividing the double-precision floating-point value `a'
   2924 by the corresponding value `b'.  The operation is performed according to
   2925 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2926 -------------------------------------------------------------------------------
   2927 */
   2928 float64 float64_div( float64 a, float64 b )
   2929 {
   2930     flag aSign, bSign, zSign;
   2931     int16 aExp, bExp, zExp;
   2932     bits64 aSig, bSig, zSig;
   2933     bits64 rem0, rem1;
   2934     bits64 term0, term1;
   2935 
   2936     aSig = extractFloat64Frac( a );
   2937     aExp = extractFloat64Exp( a );
   2938     aSign = extractFloat64Sign( a );
   2939     bSig = extractFloat64Frac( b );
   2940     bExp = extractFloat64Exp( b );
   2941     bSign = extractFloat64Sign( b );
   2942     zSign = aSign ^ bSign;
   2943     if ( aExp == 0x7FF ) {
   2944         if ( aSig ) return propagateFloat64NaN( a, b );
   2945         if ( bExp == 0x7FF ) {
   2946             if ( bSig ) return propagateFloat64NaN( a, b );
   2947             float_raise( float_flag_invalid );
   2948             return float64_default_nan;
   2949         }
   2950         return packFloat64( zSign, 0x7FF, 0 );
   2951     }
   2952     if ( bExp == 0x7FF ) {
   2953         if ( bSig ) return propagateFloat64NaN( a, b );
   2954         return packFloat64( zSign, 0, 0 );
   2955     }
   2956     if ( bExp == 0 ) {
   2957         if ( bSig == 0 ) {
   2958             if ( ( aExp | aSig ) == 0 ) {
   2959                 float_raise( float_flag_invalid );
   2960                 return float64_default_nan;
   2961             }
   2962             float_raise( float_flag_divbyzero );
   2963             return packFloat64( zSign, 0x7FF, 0 );
   2964         }
   2965         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2966     }
   2967     if ( aExp == 0 ) {
   2968         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2969         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2970     }
   2971     zExp = aExp - bExp + 0x3FD;
   2972     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2973     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2974     if ( bSig <= ( aSig + aSig ) ) {
   2975         aSig >>= 1;
   2976         ++zExp;
   2977     }
   2978     zSig = estimateDiv128To64( aSig, 0, bSig );
   2979     if ( ( zSig & 0x1FF ) <= 2 ) {
   2980         mul64To128( bSig, zSig, &term0, &term1 );
   2981         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   2982         while ( (sbits64) rem0 < 0 ) {
   2983             --zSig;
   2984             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   2985         }
   2986         zSig |= ( rem1 != 0 );
   2987     }
   2988     return roundAndPackFloat64( zSign, zExp, zSig );
   2989 
   2990 }
   2991 
   2992 #ifndef SOFTFLOAT_FOR_GCC
   2993 /*
   2994 -------------------------------------------------------------------------------
   2995 Returns the remainder of the double-precision floating-point value `a'
   2996 with respect to the corresponding value `b'.  The operation is performed
   2997 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2998 -------------------------------------------------------------------------------
   2999 */
   3000 float64 float64_rem( float64 a, float64 b )
   3001 {
   3002     flag aSign, bSign, zSign;
   3003     int16 aExp, bExp, expDiff;
   3004     bits64 aSig, bSig;
   3005     bits64 q, alternateASig;
   3006     sbits64 sigMean;
   3007 
   3008     aSig = extractFloat64Frac( a );
   3009     aExp = extractFloat64Exp( a );
   3010     aSign = extractFloat64Sign( a );
   3011     bSig = extractFloat64Frac( b );
   3012     bExp = extractFloat64Exp( b );
   3013     bSign = extractFloat64Sign( b );
   3014     if ( aExp == 0x7FF ) {
   3015         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   3016             return propagateFloat64NaN( a, b );
   3017         }
   3018         float_raise( float_flag_invalid );
   3019         return float64_default_nan;
   3020     }
   3021     if ( bExp == 0x7FF ) {
   3022         if ( bSig ) return propagateFloat64NaN( a, b );
   3023         return a;
   3024     }
   3025     if ( bExp == 0 ) {
   3026         if ( bSig == 0 ) {
   3027             float_raise( float_flag_invalid );
   3028             return float64_default_nan;
   3029         }
   3030         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   3031     }
   3032     if ( aExp == 0 ) {
   3033         if ( aSig == 0 ) return a;
   3034         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3035     }
   3036     expDiff = aExp - bExp;
   3037     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
   3038     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   3039     if ( expDiff < 0 ) {
   3040         if ( expDiff < -1 ) return a;
   3041         aSig >>= 1;
   3042     }
   3043     q = ( bSig <= aSig );
   3044     if ( q ) aSig -= bSig;
   3045     expDiff -= 64;
   3046     while ( 0 < expDiff ) {
   3047         q = estimateDiv128To64( aSig, 0, bSig );
   3048         q = ( 2 < q ) ? q - 2 : 0;
   3049         aSig = - ( ( bSig>>2 ) * q );
   3050         expDiff -= 62;
   3051     }
   3052     expDiff += 64;
   3053     if ( 0 < expDiff ) {
   3054         q = estimateDiv128To64( aSig, 0, bSig );
   3055         q = ( 2 < q ) ? q - 2 : 0;
   3056         q >>= 64 - expDiff;
   3057         bSig >>= 2;
   3058         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   3059     }
   3060     else {
   3061         aSig >>= 2;
   3062         bSig >>= 2;
   3063     }
   3064     do {
   3065         alternateASig = aSig;
   3066         ++q;
   3067         aSig -= bSig;
   3068     } while ( 0 <= (sbits64) aSig );
   3069     sigMean = aSig + alternateASig;
   3070     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   3071         aSig = alternateASig;
   3072     }
   3073     zSign = ( (sbits64) aSig < 0 );
   3074     if ( zSign ) aSig = - aSig;
   3075     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
   3076 
   3077 }
   3078 
   3079 /*
   3080 -------------------------------------------------------------------------------
   3081 Returns the square root of the double-precision floating-point value `a'.
   3082 The operation is performed according to the IEC/IEEE Standard for Binary
   3083 Floating-Point Arithmetic.
   3084 -------------------------------------------------------------------------------
   3085 */
   3086 float64 float64_sqrt( float64 a )
   3087 {
   3088     flag aSign;
   3089     int16 aExp, zExp;
   3090     bits64 aSig, zSig, doubleZSig;
   3091     bits64 rem0, rem1, term0, term1;
   3092 
   3093     aSig = extractFloat64Frac( a );
   3094     aExp = extractFloat64Exp( a );
   3095     aSign = extractFloat64Sign( a );
   3096     if ( aExp == 0x7FF ) {
   3097         if ( aSig ) return propagateFloat64NaN( a, a );
   3098         if ( ! aSign ) return a;
   3099         float_raise( float_flag_invalid );
   3100         return float64_default_nan;
   3101     }
   3102     if ( aSign ) {
   3103         if ( ( aExp | aSig ) == 0 ) return a;
   3104         float_raise( float_flag_invalid );
   3105         return float64_default_nan;
   3106     }
   3107     if ( aExp == 0 ) {
   3108         if ( aSig == 0 ) return 0;
   3109         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3110     }
   3111     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   3112     aSig |= LIT64( 0x0010000000000000 );
   3113     zSig = estimateSqrt32( aExp, aSig>>21 );
   3114     aSig <<= 9 - ( aExp & 1 );
   3115     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
   3116     if ( ( zSig & 0x1FF ) <= 5 ) {
   3117         doubleZSig = zSig<<1;
   3118         mul64To128( zSig, zSig, &term0, &term1 );
   3119         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   3120         while ( (sbits64) rem0 < 0 ) {
   3121             --zSig;
   3122             doubleZSig -= 2;
   3123             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
   3124         }
   3125         zSig |= ( ( rem0 | rem1 ) != 0 );
   3126     }
   3127     return roundAndPackFloat64( 0, zExp, zSig );
   3128 
   3129 }
   3130 #endif
   3131 
   3132 /*
   3133 -------------------------------------------------------------------------------
   3134 Returns 1 if the double-precision floating-point value `a' is equal to the
   3135 corresponding value `b', and 0 otherwise.  The comparison is performed
   3136 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3137 -------------------------------------------------------------------------------
   3138 */
   3139 flag float64_eq( float64 a, float64 b )
   3140 {
   3141 
   3142     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3143          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3144        ) {
   3145         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3146             float_raise( float_flag_invalid );
   3147         }
   3148         return 0;
   3149     }
   3150     return ( a == b ) ||
   3151 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   3152 
   3153 }
   3154 
   3155 /*
   3156 -------------------------------------------------------------------------------
   3157 Returns 1 if the double-precision floating-point value `a' is less than or
   3158 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   3159 performed according to the IEC/IEEE Standard for Binary Floating-Point
   3160 Arithmetic.
   3161 -------------------------------------------------------------------------------
   3162 */
   3163 flag float64_le( float64 a, float64 b )
   3164 {
   3165     flag aSign, bSign;
   3166 
   3167     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3168          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3169        ) {
   3170         float_raise( float_flag_invalid );
   3171         return 0;
   3172     }
   3173     aSign = extractFloat64Sign( a );
   3174     bSign = extractFloat64Sign( b );
   3175     if ( aSign != bSign )
   3176 	return aSign ||
   3177 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   3178 	      0 );
   3179     return ( a == b ) ||
   3180 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3181 
   3182 }
   3183 
   3184 /*
   3185 -------------------------------------------------------------------------------
   3186 Returns 1 if the double-precision floating-point value `a' is less than
   3187 the corresponding value `b', and 0 otherwise.  The comparison is performed
   3188 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3189 -------------------------------------------------------------------------------
   3190 */
   3191 flag float64_lt( float64 a, float64 b )
   3192 {
   3193     flag aSign, bSign;
   3194 
   3195     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3196          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3197        ) {
   3198         float_raise( float_flag_invalid );
   3199         return 0;
   3200     }
   3201     aSign = extractFloat64Sign( a );
   3202     bSign = extractFloat64Sign( b );
   3203     if ( aSign != bSign )
   3204 	return aSign &&
   3205 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   3206 	      0 );
   3207     return ( a != b ) &&
   3208 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3209 
   3210 }
   3211 
   3212 #ifndef SOFTFLOAT_FOR_GCC
   3213 /*
   3214 -------------------------------------------------------------------------------
   3215 Returns 1 if the double-precision floating-point value `a' is equal to the
   3216 corresponding value `b', and 0 otherwise.  The invalid exception is raised
   3217 if either operand is a NaN.  Otherwise, the comparison is performed
   3218 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3219 -------------------------------------------------------------------------------
   3220 */
   3221 flag float64_eq_signaling( float64 a, float64 b )
   3222 {
   3223 
   3224     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3225          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3226        ) {
   3227         float_raise( float_flag_invalid );
   3228         return 0;
   3229     }
   3230     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3231 
   3232 }
   3233 
   3234 /*
   3235 -------------------------------------------------------------------------------
   3236 Returns 1 if the double-precision floating-point value `a' is less than or
   3237 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   3238 cause an exception.  Otherwise, the comparison is performed according to the
   3239 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3240 -------------------------------------------------------------------------------
   3241 */
   3242 flag float64_le_quiet( float64 a, float64 b )
   3243 {
   3244     flag aSign, bSign;
   3245 
   3246     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3247          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3248        ) {
   3249         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3250             float_raise( float_flag_invalid );
   3251         }
   3252         return 0;
   3253     }
   3254     aSign = extractFloat64Sign( a );
   3255     bSign = extractFloat64Sign( b );
   3256     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3257     return ( a == b ) || ( aSign ^ ( a < b ) );
   3258 
   3259 }
   3260 
   3261 /*
   3262 -------------------------------------------------------------------------------
   3263 Returns 1 if the double-precision floating-point value `a' is less than
   3264 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   3265 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   3266 Standard for Binary Floating-Point Arithmetic.
   3267 -------------------------------------------------------------------------------
   3268 */
   3269 flag float64_lt_quiet( float64 a, float64 b )
   3270 {
   3271     flag aSign, bSign;
   3272 
   3273     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3274          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3275        ) {
   3276         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3277             float_raise( float_flag_invalid );
   3278         }
   3279         return 0;
   3280     }
   3281     aSign = extractFloat64Sign( a );
   3282     bSign = extractFloat64Sign( b );
   3283     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   3284     return ( a != b ) && ( aSign ^ ( a < b ) );
   3285 
   3286 }
   3287 #endif
   3288 
   3289 #ifdef FLOATX80
   3290 
   3291 /*
   3292 -------------------------------------------------------------------------------
   3293 Returns the result of converting the extended double-precision floating-
   3294 point value `a' to the 32-bit two's complement integer format.  The
   3295 conversion is performed according to the IEC/IEEE Standard for Binary
   3296 Floating-Point Arithmetic---which means in particular that the conversion
   3297 is rounded according to the current rounding mode.  If `a' is a NaN, the
   3298 largest positive integer is returned.  Otherwise, if the conversion
   3299 overflows, the largest integer with the same sign as `a' is returned.
   3300 -------------------------------------------------------------------------------
   3301 */
   3302 int32 floatx80_to_int32( floatx80 a )
   3303 {
   3304     flag aSign;
   3305     int32 aExp, shiftCount;
   3306     bits64 aSig;
   3307 
   3308     aSig = extractFloatx80Frac( a );
   3309     aExp = extractFloatx80Exp( a );
   3310     aSign = extractFloatx80Sign( a );
   3311     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3312     shiftCount = 0x4037 - aExp;
   3313     if ( shiftCount <= 0 ) shiftCount = 1;
   3314     shift64RightJamming( aSig, shiftCount, &aSig );
   3315     return roundAndPackInt32( aSign, aSig );
   3316 
   3317 }
   3318 
   3319 /*
   3320 -------------------------------------------------------------------------------
   3321 Returns the result of converting the extended double-precision floating-
   3322 point value `a' to the 32-bit two's complement integer format.  The
   3323 conversion is performed according to the IEC/IEEE Standard for Binary
   3324 Floating-Point Arithmetic, except that the conversion is always rounded
   3325 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3326 Otherwise, if the conversion overflows, the largest integer with the same
   3327 sign as `a' is returned.
   3328 -------------------------------------------------------------------------------
   3329 */
   3330 int32 floatx80_to_int32_round_to_zero( floatx80 a )
   3331 {
   3332     flag aSign;
   3333     int32 aExp, shiftCount;
   3334     bits64 aSig, savedASig;
   3335     int32 z;
   3336 
   3337     aSig = extractFloatx80Frac( a );
   3338     aExp = extractFloatx80Exp( a );
   3339     aSign = extractFloatx80Sign( a );
   3340     if ( 0x401E < aExp ) {
   3341         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3342         goto invalid;
   3343     }
   3344     else if ( aExp < 0x3FFF ) {
   3345         if ( aExp || aSig ) set_float_exception_inexact_flag();
   3346         return 0;
   3347     }
   3348     shiftCount = 0x403E - aExp;
   3349     savedASig = aSig;
   3350     aSig >>= shiftCount;
   3351     z = aSig;
   3352     if ( aSign ) z = - z;
   3353     if ( ( z < 0 ) ^ aSign ) {
   3354  invalid:
   3355         float_raise( float_flag_invalid );
   3356         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   3357     }
   3358     if ( ( aSig<<shiftCount ) != savedASig ) {
   3359         set_float_exception_inexact_flag();
   3360     }
   3361     return z;
   3362 
   3363 }
   3364 
   3365 /*
   3366 -------------------------------------------------------------------------------
   3367 Returns the result of converting the extended double-precision floating-
   3368 point value `a' to the 64-bit two's complement integer format.  The
   3369 conversion is performed according to the IEC/IEEE Standard for Binary
   3370 Floating-Point Arithmetic---which means in particular that the conversion
   3371 is rounded according to the current rounding mode.  If `a' is a NaN,
   3372 the largest positive integer is returned.  Otherwise, if the conversion
   3373 overflows, the largest integer with the same sign as `a' is returned.
   3374 -------------------------------------------------------------------------------
   3375 */
   3376 int64 floatx80_to_int64( floatx80 a )
   3377 {
   3378     flag aSign;
   3379     int32 aExp, shiftCount;
   3380     bits64 aSig, aSigExtra;
   3381 
   3382     aSig = extractFloatx80Frac( a );
   3383     aExp = extractFloatx80Exp( a );
   3384     aSign = extractFloatx80Sign( a );
   3385     shiftCount = 0x403E - aExp;
   3386     if ( shiftCount <= 0 ) {
   3387         if ( shiftCount ) {
   3388             float_raise( float_flag_invalid );
   3389             if (    ! aSign
   3390                  || (    ( aExp == 0x7FFF )
   3391                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
   3392                ) {
   3393                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3394             }
   3395             return (sbits64) LIT64( 0x8000000000000000 );
   3396         }
   3397         aSigExtra = 0;
   3398     }
   3399     else {
   3400         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   3401     }
   3402     return roundAndPackInt64( aSign, aSig, aSigExtra );
   3403 
   3404 }
   3405 
   3406 /*
   3407 -------------------------------------------------------------------------------
   3408 Returns the result of converting the extended double-precision floating-
   3409 point value `a' to the 64-bit two's complement integer format.  The
   3410 conversion is performed according to the IEC/IEEE Standard for Binary
   3411 Floating-Point Arithmetic, except that the conversion is always rounded
   3412 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3413 Otherwise, if the conversion overflows, the largest integer with the same
   3414 sign as `a' is returned.
   3415 -------------------------------------------------------------------------------
   3416 */
   3417 int64 floatx80_to_int64_round_to_zero( floatx80 a )
   3418 {
   3419     flag aSign;
   3420     int32 aExp, shiftCount;
   3421     bits64 aSig;
   3422     int64 z;
   3423 
   3424     aSig = extractFloatx80Frac( a );
   3425     aExp = extractFloatx80Exp( a );
   3426     aSign = extractFloatx80Sign( a );
   3427     shiftCount = aExp - 0x403E;
   3428     if ( 0 <= shiftCount ) {
   3429         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
   3430         if ( ( (a.high>>X80SHIFT) != 0xC03E ) || aSig ) {
   3431             float_raise( float_flag_invalid );
   3432             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
   3433                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3434             }
   3435         }
   3436         return (sbits64) LIT64( 0x8000000000000000 );
   3437     }
   3438     else if ( aExp < 0x3FFF ) {
   3439         if ( aExp | aSig ) set_float_exception_inexact_flag();
   3440         return 0;
   3441     }
   3442     z = aSig>>( - shiftCount );
   3443     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   3444         set_float_exception_inexact_flag();
   3445     }
   3446     if ( aSign ) z = - z;
   3447     return z;
   3448 
   3449 }
   3450 
   3451 /*
   3452 -------------------------------------------------------------------------------
   3453 Returns the result of converting the extended double-precision floating-
   3454 point value `a' to the single-precision floating-point format.  The
   3455 conversion is performed according to the IEC/IEEE Standard for Binary
   3456 Floating-Point Arithmetic.
   3457 -------------------------------------------------------------------------------
   3458 */
   3459 float32 floatx80_to_float32( floatx80 a )
   3460 {
   3461     flag aSign;
   3462     int32 aExp;
   3463     bits64 aSig;
   3464 
   3465     aSig = extractFloatx80Frac( a );
   3466     aExp = extractFloatx80Exp( a );
   3467     aSign = extractFloatx80Sign( a );
   3468     if ( aExp == 0x7FFF ) {
   3469         if ( (bits64) ( aSig<<1 ) ) {
   3470             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
   3471         }
   3472         return packFloat32( aSign, 0xFF, 0 );
   3473     }
   3474     shift64RightJamming( aSig, 33, &aSig );
   3475     if ( aExp || aSig ) aExp -= 0x3F81;
   3476     return roundAndPackFloat32( aSign, aExp, aSig );
   3477 
   3478 }
   3479 
   3480 /*
   3481 -------------------------------------------------------------------------------
   3482 Returns the result of converting the extended double-precision floating-
   3483 point value `a' to the double-precision floating-point format.  The
   3484 conversion is performed according to the IEC/IEEE Standard for Binary
   3485 Floating-Point Arithmetic.
   3486 -------------------------------------------------------------------------------
   3487 */
   3488 float64 floatx80_to_float64( floatx80 a )
   3489 {
   3490     flag aSign;
   3491     int32 aExp;
   3492     bits64 aSig, zSig;
   3493 
   3494     aSig = extractFloatx80Frac( a );
   3495     aExp = extractFloatx80Exp( a );
   3496     aSign = extractFloatx80Sign( a );
   3497     if ( aExp == 0x7FFF ) {
   3498         if ( (bits64) ( aSig<<1 ) ) {
   3499             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
   3500         }
   3501         return packFloat64( aSign, 0x7FF, 0 );
   3502     }
   3503     shift64RightJamming( aSig, 1, &zSig );
   3504     if ( aExp || aSig ) aExp -= 0x3C01;
   3505     return roundAndPackFloat64( aSign, aExp, zSig );
   3506 
   3507 }
   3508 
   3509 #ifdef FLOAT128
   3510 
   3511 /*
   3512 -------------------------------------------------------------------------------
   3513 Returns the result of converting the extended double-precision floating-
   3514 point value `a' to the quadruple-precision floating-point format.  The
   3515 conversion is performed according to the IEC/IEEE Standard for Binary
   3516 Floating-Point Arithmetic.
   3517 -------------------------------------------------------------------------------
   3518 */
   3519 float128 floatx80_to_float128( floatx80 a )
   3520 {
   3521     flag aSign;
   3522     int16 aExp;
   3523     bits64 aSig, zSig0, zSig1;
   3524 
   3525     aSig = extractFloatx80Frac( a );
   3526     aExp = extractFloatx80Exp( a );
   3527     aSign = extractFloatx80Sign( a );
   3528     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
   3529         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
   3530     }
   3531     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
   3532     return packFloat128( aSign, aExp, zSig0, zSig1 );
   3533 
   3534 }
   3535 
   3536 #endif
   3537 
   3538 /*
   3539 -------------------------------------------------------------------------------
   3540 Rounds the extended double-precision floating-point value `a' to an integer,
   3541 and returns the result as an extended quadruple-precision floating-point
   3542 value.  The operation is performed according to the IEC/IEEE Standard for
   3543 Binary Floating-Point Arithmetic.
   3544 -------------------------------------------------------------------------------
   3545 */
   3546 floatx80 floatx80_round_to_int( floatx80 a )
   3547 {
   3548     flag aSign;
   3549     int32 aExp;
   3550     bits64 lastBitMask, roundBitsMask;
   3551     int8 roundingMode;
   3552     floatx80 z;
   3553 
   3554     aExp = extractFloatx80Exp( a );
   3555     if ( 0x403E <= aExp ) {
   3556         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
   3557             return propagateFloatx80NaN( a, a );
   3558         }
   3559         return a;
   3560     }
   3561     if ( aExp < 0x3FFF ) {
   3562         if (    ( aExp == 0 )
   3563              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
   3564             return a;
   3565         }
   3566         set_float_exception_inexact_flag();
   3567         aSign = extractFloatx80Sign( a );
   3568         switch ( float_rounding_mode ) {
   3569          case float_round_nearest_even:
   3570             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
   3571                ) {
   3572                 return
   3573                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3574             }
   3575             break;
   3576 	 case float_round_to_zero:
   3577 	    break;
   3578          case float_round_down:
   3579             return
   3580                   aSign ?
   3581                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
   3582                 : packFloatx80( 0, 0, 0 );
   3583          case float_round_up:
   3584             return
   3585                   aSign ? packFloatx80( 1, 0, 0 )
   3586                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3587         }
   3588         return packFloatx80( aSign, 0, 0 );
   3589     }
   3590     lastBitMask = 1;
   3591     lastBitMask <<= 0x403E - aExp;
   3592     roundBitsMask = lastBitMask - 1;
   3593     z = a;
   3594     roundingMode = float_rounding_mode;
   3595     if ( roundingMode == float_round_nearest_even ) {
   3596         z.low += lastBitMask>>1;
   3597         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   3598     }
   3599     else if ( roundingMode != float_round_to_zero ) {
   3600         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   3601             z.low += roundBitsMask;
   3602         }
   3603     }
   3604     z.low &= ~ roundBitsMask;
   3605     if ( z.low == 0 ) {
   3606         ++z.high;
   3607         z.low = LIT64( 0x8000000000000000 );
   3608     }
   3609     z.high <<= X80SHIFT;
   3610     if ( z.low != a.low ) set_float_exception_inexact_flag();
   3611     return z;
   3612 
   3613 }
   3614 
   3615 /*
   3616 -------------------------------------------------------------------------------
   3617 Returns the result of adding the absolute values of the extended double-
   3618 precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
   3619 negated before being returned.  `zSign' is ignored if the result is a NaN.
   3620 The addition is performed according to the IEC/IEEE Standard for Binary
   3621 Floating-Point Arithmetic.
   3622 -------------------------------------------------------------------------------
   3623 */
   3624 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3625 {
   3626     int32 aExp, bExp, zExp;
   3627     bits64 aSig, bSig, zSig0, zSig1;
   3628     int32 expDiff;
   3629 
   3630     aSig = extractFloatx80Frac( a );
   3631     aExp = extractFloatx80Exp( a );
   3632     bSig = extractFloatx80Frac( b );
   3633     bExp = extractFloatx80Exp( b );
   3634     expDiff = aExp - bExp;
   3635     if ( 0 < expDiff ) {
   3636         if ( aExp == 0x7FFF ) {
   3637             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3638             return a;
   3639         }
   3640         if ( bExp == 0 ) --expDiff;
   3641         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3642         zExp = aExp;
   3643     }
   3644     else if ( expDiff < 0 ) {
   3645         if ( bExp == 0x7FFF ) {
   3646             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3647             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3648         }
   3649         if ( aExp == 0 ) ++expDiff;
   3650         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3651         zExp = bExp;
   3652     }
   3653     else {
   3654         if ( aExp == 0x7FFF ) {
   3655             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3656                 return propagateFloatx80NaN( a, b );
   3657             }
   3658             return a;
   3659         }
   3660         zSig1 = 0;
   3661         zSig0 = aSig + bSig;
   3662         if ( aExp == 0 ) {
   3663             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
   3664             goto roundAndPack;
   3665         }
   3666         zExp = aExp;
   3667         goto shiftRight1;
   3668     }
   3669     zSig0 = aSig + bSig;
   3670     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
   3671  shiftRight1:
   3672     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3673     zSig0 |= LIT64( 0x8000000000000000 );
   3674     ++zExp;
   3675  roundAndPack:
   3676     return
   3677         roundAndPackFloatx80(
   3678             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3679 
   3680 }
   3681 
   3682 /*
   3683 -------------------------------------------------------------------------------
   3684 Returns the result of subtracting the absolute values of the extended
   3685 double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
   3686 difference is negated before being returned.  `zSign' is ignored if the
   3687 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   3688 Standard for Binary Floating-Point Arithmetic.
   3689 -------------------------------------------------------------------------------
   3690 */
   3691 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3692 {
   3693     int32 aExp, bExp, zExp;
   3694     bits64 aSig, bSig, zSig0, zSig1;
   3695     int32 expDiff;
   3696     floatx80 z;
   3697 
   3698     aSig = extractFloatx80Frac( a );
   3699     aExp = extractFloatx80Exp( a );
   3700     bSig = extractFloatx80Frac( b );
   3701     bExp = extractFloatx80Exp( b );
   3702     expDiff = aExp - bExp;
   3703     if ( 0 < expDiff ) goto aExpBigger;
   3704     if ( expDiff < 0 ) goto bExpBigger;
   3705     if ( aExp == 0x7FFF ) {
   3706         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3707             return propagateFloatx80NaN( a, b );
   3708         }
   3709         float_raise( float_flag_invalid );
   3710         z.low = floatx80_default_nan_low;
   3711         z.high = floatx80_default_nan_high<<X80SHIFT;
   3712         return z;
   3713     }
   3714     if ( aExp == 0 ) {
   3715         aExp = 1;
   3716         bExp = 1;
   3717     }
   3718     zSig1 = 0;
   3719     if ( bSig < aSig ) goto aBigger;
   3720     if ( aSig < bSig ) goto bBigger;
   3721     return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
   3722  bExpBigger:
   3723     if ( bExp == 0x7FFF ) {
   3724         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3725         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3726     }
   3727     if ( aExp == 0 ) ++expDiff;
   3728     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3729  bBigger:
   3730     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
   3731     zExp = bExp;
   3732     zSign ^= 1;
   3733     goto normalizeRoundAndPack;
   3734  aExpBigger:
   3735     if ( aExp == 0x7FFF ) {
   3736         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3737         return a;
   3738     }
   3739     if ( bExp == 0 ) --expDiff;
   3740     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3741  aBigger:
   3742     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
   3743     zExp = aExp;
   3744  normalizeRoundAndPack:
   3745     return
   3746         normalizeRoundAndPackFloatx80(
   3747             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3748 
   3749 }
   3750 
   3751 /*
   3752 -------------------------------------------------------------------------------
   3753 Returns the result of adding the extended double-precision floating-point
   3754 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   3755 Standard for Binary Floating-Point Arithmetic.
   3756 -------------------------------------------------------------------------------
   3757 */
   3758 floatx80 floatx80_add( floatx80 a, floatx80 b )
   3759 {
   3760     flag aSign, bSign;
   3761 
   3762     aSign = extractFloatx80Sign( a );
   3763     bSign = extractFloatx80Sign( b );
   3764     if ( aSign == bSign ) {
   3765         return addFloatx80Sigs( a, b, aSign );
   3766     }
   3767     else {
   3768         return subFloatx80Sigs( a, b, aSign );
   3769     }
   3770 
   3771 }
   3772 
   3773 /*
   3774 -------------------------------------------------------------------------------
   3775 Returns the result of subtracting the extended double-precision floating-
   3776 point values `a' and `b'.  The operation is performed according to the
   3777 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3778 -------------------------------------------------------------------------------
   3779 */
   3780 floatx80 floatx80_sub( floatx80 a, floatx80 b )
   3781 {
   3782     flag aSign, bSign;
   3783 
   3784     aSign = extractFloatx80Sign( a );
   3785     bSign = extractFloatx80Sign( b );
   3786     if ( aSign == bSign ) {
   3787         return subFloatx80Sigs( a, b, aSign );
   3788     }
   3789     else {
   3790         return addFloatx80Sigs( a, b, aSign );
   3791     }
   3792 
   3793 }
   3794 
   3795 /*
   3796 -------------------------------------------------------------------------------
   3797 Returns the result of multiplying the extended double-precision floating-
   3798 point values `a' and `b'.  The operation is performed according to the
   3799 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3800 -------------------------------------------------------------------------------
   3801 */
   3802 floatx80 floatx80_mul( floatx80 a, floatx80 b )
   3803 {
   3804     flag aSign, bSign, zSign;
   3805     int32 aExp, bExp, zExp;
   3806     bits64 aSig, bSig, zSig0, zSig1;
   3807     floatx80 z;
   3808 
   3809     aSig = extractFloatx80Frac( a );
   3810     aExp = extractFloatx80Exp( a );
   3811     aSign = extractFloatx80Sign( a );
   3812     bSig = extractFloatx80Frac( b );
   3813     bExp = extractFloatx80Exp( b );
   3814     bSign = extractFloatx80Sign( b );
   3815     zSign = aSign ^ bSign;
   3816     if ( aExp == 0x7FFF ) {
   3817         if (    (bits64) ( aSig<<1 )
   3818              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3819             return propagateFloatx80NaN( a, b );
   3820         }
   3821         if ( ( bExp | bSig ) == 0 ) goto invalid;
   3822         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3823     }
   3824     if ( bExp == 0x7FFF ) {
   3825         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3826         if ( ( aExp | aSig ) == 0 ) {
   3827  invalid:
   3828             float_raise( float_flag_invalid );
   3829             z.low = floatx80_default_nan_low;
   3830             z.high = floatx80_default_nan_high<<X80SHIFT;
   3831             return z;
   3832         }
   3833         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3834     }
   3835     if ( aExp == 0 ) {
   3836         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3837         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3838     }
   3839     if ( bExp == 0 ) {
   3840         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3841         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3842     }
   3843     zExp = aExp + bExp - 0x3FFE;
   3844     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   3845     if ( 0 < (sbits64) zSig0 ) {
   3846         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3847         --zExp;
   3848     }
   3849     return
   3850         roundAndPackFloatx80(
   3851             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3852 
   3853 }
   3854 
   3855 /*
   3856 -------------------------------------------------------------------------------
   3857 Returns the result of dividing the extended double-precision floating-point
   3858 value `a' by the corresponding value `b'.  The operation is performed
   3859 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3860 -------------------------------------------------------------------------------
   3861 */
   3862 floatx80 floatx80_div( floatx80 a, floatx80 b )
   3863 {
   3864     flag aSign, bSign, zSign;
   3865     int32 aExp, bExp, zExp;
   3866     bits64 aSig, bSig, zSig0, zSig1;
   3867     bits64 rem0, rem1, rem2, term0, term1, term2;
   3868     floatx80 z;
   3869 
   3870     aSig = extractFloatx80Frac( a );
   3871     aExp = extractFloatx80Exp( a );
   3872     aSign = extractFloatx80Sign( a );
   3873     bSig = extractFloatx80Frac( b );
   3874     bExp = extractFloatx80Exp( b );
   3875     bSign = extractFloatx80Sign( b );
   3876     zSign = aSign ^ bSign;
   3877     if ( aExp == 0x7FFF ) {
   3878         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3879         if ( bExp == 0x7FFF ) {
   3880             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3881             goto invalid;
   3882         }
   3883         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3884     }
   3885     if ( bExp == 0x7FFF ) {
   3886         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3887         return packFloatx80( zSign, 0, 0 );
   3888     }
   3889     if ( bExp == 0 ) {
   3890         if ( bSig == 0 ) {
   3891             if ( ( aExp | aSig ) == 0 ) {
   3892  invalid:
   3893                 float_raise( float_flag_invalid );
   3894                 z.low = floatx80_default_nan_low;
   3895                 z.high = floatx80_default_nan_high<<X80SHIFT;
   3896                 return z;
   3897             }
   3898             float_raise( float_flag_divbyzero );
   3899             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3900         }
   3901         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3902     }
   3903     if ( aExp == 0 ) {
   3904         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3905         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3906     }
   3907     zExp = aExp - bExp + 0x3FFE;
   3908     rem1 = 0;
   3909     if ( bSig <= aSig ) {
   3910         shift128Right( aSig, 0, 1, &aSig, &rem1 );
   3911         ++zExp;
   3912     }
   3913     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
   3914     mul64To128( bSig, zSig0, &term0, &term1 );
   3915     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
   3916     while ( (sbits64) rem0 < 0 ) {
   3917         --zSig0;
   3918         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   3919     }
   3920     zSig1 = estimateDiv128To64( rem1, 0, bSig );
   3921     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
   3922         mul64To128( bSig, zSig1, &term1, &term2 );
   3923         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   3924         while ( (sbits64) rem1 < 0 ) {
   3925             --zSig1;
   3926             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
   3927         }
   3928         zSig1 |= ( ( rem1 | rem2 ) != 0 );
   3929     }
   3930     return
   3931         roundAndPackFloatx80(
   3932             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3933 
   3934 }
   3935 
   3936 /*
   3937 -------------------------------------------------------------------------------
   3938 Returns the remainder of the extended double-precision floating-point value
   3939 `a' with respect to the corresponding value `b'.  The operation is performed
   3940 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3941 -------------------------------------------------------------------------------
   3942 */
   3943 floatx80 floatx80_rem( floatx80 a, floatx80 b )
   3944 {
   3945     flag aSign, zSign;
   3946     int32 aExp, bExp, expDiff;
   3947     bits64 aSig0, aSig1, bSig;
   3948     bits64 q, term0, term1, alternateASig0, alternateASig1;
   3949     floatx80 z;
   3950 
   3951     aSig0 = extractFloatx80Frac( a );
   3952     aExp = extractFloatx80Exp( a );
   3953     aSign = extractFloatx80Sign( a );
   3954     bSig = extractFloatx80Frac( b );
   3955     bExp = extractFloatx80Exp( b );
   3956 
   3957     if ( aExp == 0x7FFF ) {
   3958         if (    (bits64) ( aSig0<<1 )
   3959              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3960             return propagateFloatx80NaN( a, b );
   3961         }
   3962         goto invalid;
   3963     }
   3964     if ( bExp == 0x7FFF ) {
   3965         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3966         return a;
   3967     }
   3968     if ( bExp == 0 ) {
   3969         if ( bSig == 0 ) {
   3970  invalid:
   3971             float_raise( float_flag_invalid );
   3972             z.low = floatx80_default_nan_low;
   3973             z.high = floatx80_default_nan_high<<X80SHIFT;
   3974             return z;
   3975         }
   3976         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3977     }
   3978     if ( aExp == 0 ) {
   3979         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
   3980         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   3981     }
   3982     bSig |= LIT64( 0x8000000000000000 );
   3983     zSign = aSign;
   3984     expDiff = aExp - bExp;
   3985     aSig1 = 0;
   3986     if ( expDiff < 0 ) {
   3987         if ( expDiff < -1 ) return a;
   3988         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
   3989         expDiff = 0;
   3990     }
   3991     q = ( bSig <= aSig0 );
   3992     if ( q ) aSig0 -= bSig;
   3993     expDiff -= 64;
   3994     while ( 0 < expDiff ) {
   3995         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3996         q = ( 2 < q ) ? q - 2 : 0;
   3997         mul64To128( bSig, q, &term0, &term1 );
   3998         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3999         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
   4000         expDiff -= 62;
   4001     }
   4002     expDiff += 64;
   4003     if ( 0 < expDiff ) {
   4004         q = estimateDiv128To64( aSig0, aSig1, bSig );
   4005         q = ( 2 < q ) ? q - 2 : 0;
   4006         q >>= 64 - expDiff;
   4007         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
   4008         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4009         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
   4010         while ( le128( term0, term1, aSig0, aSig1 ) ) {
   4011             ++q;
   4012             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4013         }
   4014     }
   4015     else {
   4016         term1 = 0;
   4017         term0 = bSig;
   4018     }
   4019     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
   4020     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4021          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4022               && ( q & 1 ) )
   4023        ) {
   4024         aSig0 = alternateASig0;
   4025         aSig1 = alternateASig1;
   4026         zSign = ! zSign;
   4027     }
   4028     return
   4029         normalizeRoundAndPackFloatx80(
   4030             80, zSign, bExp + expDiff, aSig0, aSig1 );
   4031 
   4032 }
   4033 
   4034 /*
   4035 -------------------------------------------------------------------------------
   4036 Returns the square root of the extended double-precision floating-point
   4037 value `a'.  The operation is performed according to the IEC/IEEE Standard
   4038 for Binary Floating-Point Arithmetic.
   4039 -------------------------------------------------------------------------------
   4040 */
   4041 floatx80 floatx80_sqrt( floatx80 a )
   4042 {
   4043     flag aSign;
   4044     int32 aExp, zExp;
   4045     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
   4046     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   4047     floatx80 z;
   4048 
   4049     aSig0 = extractFloatx80Frac( a );
   4050     aExp = extractFloatx80Exp( a );
   4051     aSign = extractFloatx80Sign( a );
   4052     if ( aExp == 0x7FFF ) {
   4053         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
   4054         if ( ! aSign ) return a;
   4055         goto invalid;
   4056     }
   4057     if ( aSign ) {
   4058         if ( ( aExp | aSig0 ) == 0 ) return a;
   4059  invalid:
   4060         float_raise( float_flag_invalid );
   4061         z.low = floatx80_default_nan_low;
   4062         z.high = floatx80_default_nan_high<<X80SHIFT;
   4063         return z;
   4064     }
   4065     if ( aExp == 0 ) {
   4066         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
   4067         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4068     }
   4069     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
   4070     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
   4071     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
   4072     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   4073     doubleZSig0 = zSig0<<1;
   4074     mul64To128( zSig0, zSig0, &term0, &term1 );
   4075     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   4076     while ( (sbits64) rem0 < 0 ) {
   4077         --zSig0;
   4078         doubleZSig0 -= 2;
   4079         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   4080     }
   4081     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   4082     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
   4083         if ( zSig1 == 0 ) zSig1 = 1;
   4084         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   4085         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   4086         mul64To128( zSig1, zSig1, &term2, &term3 );
   4087         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   4088         while ( (sbits64) rem1 < 0 ) {
   4089             --zSig1;
   4090             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   4091             term3 |= 1;
   4092             term2 |= doubleZSig0;
   4093             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   4094         }
   4095         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   4096     }
   4097     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
   4098     zSig0 |= doubleZSig0;
   4099     return
   4100         roundAndPackFloatx80(
   4101             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
   4102 
   4103 }
   4104 
   4105 /*
   4106 -------------------------------------------------------------------------------
   4107 Returns 1 if the extended double-precision floating-point value `a' is
   4108 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   4109 performed according to the IEC/IEEE Standard for Binary Floating-Point
   4110 Arithmetic.
   4111 -------------------------------------------------------------------------------
   4112 */
   4113 flag floatx80_eq( floatx80 a, floatx80 b )
   4114 {
   4115 
   4116     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4117               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4118          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4119               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4120        ) {
   4121         if (    floatx80_is_signaling_nan( a )
   4122              || floatx80_is_signaling_nan( b ) ) {
   4123             float_raise( float_flag_invalid );
   4124         }
   4125         return 0;
   4126     }
   4127     return
   4128            ( a.low == b.low )
   4129         && (    ( a.high == b.high )
   4130              || (    ( a.low == 0 )
   4131                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4132 	   == 0 ) ) );
   4133 
   4134 }
   4135 
   4136 /*
   4137 -------------------------------------------------------------------------------
   4138 Returns 1 if the extended double-precision floating-point value `a' is
   4139 less than or equal to the corresponding value `b', and 0 otherwise.  The
   4140 comparison is performed according to the IEC/IEEE Standard for Binary
   4141 Floating-Point Arithmetic.
   4142 -------------------------------------------------------------------------------
   4143 */
   4144 flag floatx80_le( floatx80 a, floatx80 b )
   4145 {
   4146     flag aSign, bSign;
   4147 
   4148     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4149               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4150          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4151               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4152        ) {
   4153         float_raise( float_flag_invalid );
   4154         return 0;
   4155     }
   4156     aSign = extractFloatx80Sign( a );
   4157     bSign = extractFloatx80Sign( b );
   4158     if ( aSign != bSign ) {
   4159         return
   4160                aSign
   4161             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4162 	         <<1 ) ) | a.low | b.low ) == 0 );
   4163     }
   4164     return
   4165           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4166         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4167 
   4168 }
   4169 
   4170 /*
   4171 -------------------------------------------------------------------------------
   4172 Returns 1 if the extended double-precision floating-point value `a' is
   4173 less than the corresponding value `b', and 0 otherwise.  The comparison
   4174 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4175 Arithmetic.
   4176 -------------------------------------------------------------------------------
   4177 */
   4178 flag floatx80_lt( floatx80 a, floatx80 b )
   4179 {
   4180     flag aSign, bSign;
   4181 
   4182     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4183               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4184          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4185               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4186        ) {
   4187         float_raise( float_flag_invalid );
   4188         return 0;
   4189     }
   4190     aSign = extractFloatx80Sign( a );
   4191     bSign = extractFloatx80Sign( b );
   4192     if ( aSign != bSign ) {
   4193         return
   4194                aSign
   4195             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4196 		 <<1 ) ) | a.low | b.low ) != 0 );
   4197     }
   4198     return
   4199           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4200         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4201 
   4202 }
   4203 
   4204 /*
   4205 -------------------------------------------------------------------------------
   4206 Returns 1 if the extended double-precision floating-point value `a' is equal
   4207 to the corresponding value `b', and 0 otherwise.  The invalid exception is
   4208 raised if either operand is a NaN.  Otherwise, the comparison is performed
   4209 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4210 -------------------------------------------------------------------------------
   4211 */
   4212 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
   4213 {
   4214 
   4215     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4216               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4217          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4218               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4219        ) {
   4220         float_raise( float_flag_invalid );
   4221         return 0;
   4222     }
   4223     return
   4224            ( a.low == b.low )
   4225         && (    ( (a.high>>X80SHIFT) == (b.high>>X80SHIFT) )
   4226              || (    ( a.low == 0 )
   4227                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4228 	    == 0 ) ) );
   4229 
   4230 }
   4231 
   4232 /*
   4233 -------------------------------------------------------------------------------
   4234 Returns 1 if the extended double-precision floating-point value `a' is less
   4235 than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
   4236 do not cause an exception.  Otherwise, the comparison is performed according
   4237 to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4238 -------------------------------------------------------------------------------
   4239 */
   4240 flag floatx80_le_quiet( floatx80 a, floatx80 b )
   4241 {
   4242     flag aSign, bSign;
   4243 
   4244     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4245               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4246          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4247               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4248        ) {
   4249         if (    floatx80_is_signaling_nan( a )
   4250              || floatx80_is_signaling_nan( b ) ) {
   4251             float_raise( float_flag_invalid );
   4252         }
   4253         return 0;
   4254     }
   4255     aSign = extractFloatx80Sign( a );
   4256     bSign = extractFloatx80Sign( b );
   4257     if ( aSign != bSign ) {
   4258         return
   4259                aSign
   4260             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1
   4261 		 ) ) | a.low | b.low ) == 0 );
   4262     }
   4263     return
   4264           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4265         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4266 
   4267 }
   4268 
   4269 /*
   4270 -------------------------------------------------------------------------------
   4271 Returns 1 if the extended double-precision floating-point value `a' is less
   4272 than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
   4273 an exception.  Otherwise, the comparison is performed according to the
   4274 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4275 -------------------------------------------------------------------------------
   4276 */
   4277 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
   4278 {
   4279     flag aSign, bSign;
   4280 
   4281     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4282               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4283          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4284               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4285        ) {
   4286         if (    floatx80_is_signaling_nan( a )
   4287              || floatx80_is_signaling_nan( b ) ) {
   4288             float_raise( float_flag_invalid );
   4289         }
   4290         return 0;
   4291     }
   4292     aSign = extractFloatx80Sign( a );
   4293     bSign = extractFloatx80Sign( b );
   4294     if ( aSign != bSign ) {
   4295         return
   4296                aSign
   4297             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4298 		 ) | a.low | b.low ) != 0 );
   4299     }
   4300     return
   4301           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4302         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4303 
   4304 }
   4305 
   4306 #endif
   4307 
   4308 #ifdef FLOAT128
   4309 
   4310 /*
   4311 -------------------------------------------------------------------------------
   4312 Returns the result of converting the quadruple-precision floating-point
   4313 value `a' to the 32-bit two's complement integer format.  The conversion
   4314 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4315 Arithmetic---which means in particular that the conversion is rounded
   4316 according to the current rounding mode.  If `a' is a NaN, the largest
   4317 positive integer is returned.  Otherwise, if the conversion overflows, the
   4318 largest integer with the same sign as `a' is returned.
   4319 -------------------------------------------------------------------------------
   4320 */
   4321 int32 float128_to_int32( float128 a )
   4322 {
   4323     flag aSign;
   4324     int32 aExp, shiftCount;
   4325     bits64 aSig0, aSig1;
   4326 
   4327     aSig1 = extractFloat128Frac1( a );
   4328     aSig0 = extractFloat128Frac0( a );
   4329     aExp = extractFloat128Exp( a );
   4330     aSign = extractFloat128Sign( a );
   4331     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   4332     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4333     aSig0 |= ( aSig1 != 0 );
   4334     shiftCount = 0x4028 - aExp;
   4335     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
   4336     return roundAndPackInt32( aSign, aSig0 );
   4337 
   4338 }
   4339 
   4340 /*
   4341 -------------------------------------------------------------------------------
   4342 Returns the result of converting the quadruple-precision floating-point
   4343 value `a' to the 32-bit two's complement integer format.  The conversion
   4344 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4345 Arithmetic, except that the conversion is always rounded toward zero.  If
   4346 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   4347 conversion overflows, the largest integer with the same sign as `a' is
   4348 returned.
   4349 -------------------------------------------------------------------------------
   4350 */
   4351 int32 float128_to_int32_round_to_zero( float128 a )
   4352 {
   4353     flag aSign;
   4354     int32 aExp, shiftCount;
   4355     bits64 aSig0, aSig1, savedASig;
   4356     int32 z;
   4357 
   4358     aSig1 = extractFloat128Frac1( a );
   4359     aSig0 = extractFloat128Frac0( a );
   4360     aExp = extractFloat128Exp( a );
   4361     aSign = extractFloat128Sign( a );
   4362     aSig0 |= ( aSig1 != 0 );
   4363     if ( 0x401E < aExp ) {
   4364         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
   4365         goto invalid;
   4366     }
   4367     else if ( aExp < 0x3FFF ) {
   4368         if ( aExp || aSig0 ) set_float_exception_inexact_flag();
   4369         return 0;
   4370     }
   4371     aSig0 |= LIT64( 0x0001000000000000 );
   4372     shiftCount = 0x402F - aExp;
   4373     savedASig = aSig0;
   4374     aSig0 >>= shiftCount;
   4375     z = (int32)aSig0;
   4376     if ( aSign ) z = - z;
   4377     if ( ( z < 0 ) ^ aSign ) {
   4378  invalid:
   4379         float_raise( float_flag_invalid );
   4380         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   4381     }
   4382     if ( ( aSig0<<shiftCount ) != savedASig ) {
   4383         set_float_exception_inexact_flag();
   4384     }
   4385     return z;
   4386 
   4387 }
   4388 
   4389 /*
   4390 -------------------------------------------------------------------------------
   4391 Returns the result of converting the quadruple-precision floating-point
   4392 value `a' to the 64-bit two's complement integer format.  The conversion
   4393 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4394 Arithmetic---which means in particular that the conversion is rounded
   4395 according to the current rounding mode.  If `a' is a NaN, the largest
   4396 positive integer is returned.  Otherwise, if the conversion overflows, the
   4397 largest integer with the same sign as `a' is returned.
   4398 -------------------------------------------------------------------------------
   4399 */
   4400 int64 float128_to_int64( float128 a )
   4401 {
   4402     flag aSign;
   4403     int32 aExp, shiftCount;
   4404     bits64 aSig0, aSig1;
   4405 
   4406     aSig1 = extractFloat128Frac1( a );
   4407     aSig0 = extractFloat128Frac0( a );
   4408     aExp = extractFloat128Exp( a );
   4409     aSign = extractFloat128Sign( a );
   4410     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4411     shiftCount = 0x402F - aExp;
   4412     if ( shiftCount <= 0 ) {
   4413         if ( 0x403E < aExp ) {
   4414             float_raise( float_flag_invalid );
   4415             if (    ! aSign
   4416                  || (    ( aExp == 0x7FFF )
   4417                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
   4418                     )
   4419                ) {
   4420                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   4421             }
   4422             return (sbits64) LIT64( 0x8000000000000000 );
   4423         }
   4424         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
   4425     }
   4426     else {
   4427         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
   4428     }
   4429     return roundAndPackInt64( aSign, aSig0, aSig1 );
   4430 
   4431 }
   4432 
   4433 /*
   4434 -------------------------------------------------------------------------------
   4435 Returns the result of converting the quadruple-precision floating-point
   4436 value `a' to the 64-bit two's complement integer format.  The conversion
   4437 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4438 Arithmetic, except that the conversion is always rounded toward zero.
   4439 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   4440 the conversion overflows, the largest integer with the same sign as `a' is
   4441 returned.
   4442 -------------------------------------------------------------------------------
   4443 */
   4444 int64 float128_to_int64_round_to_zero( float128 a )
   4445 {
   4446     flag aSign;
   4447     int32 aExp, shiftCount;
   4448     bits64 aSig0, aSig1;
   4449     int64 z;
   4450 
   4451     aSig1 = extractFloat128Frac1( a );
   4452     aSig0 = extractFloat128Frac0( a );
   4453     aExp = extractFloat128Exp( a );
   4454     aSign = extractFloat128Sign( a );
   4455     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4456     shiftCount = aExp - 0x402F;
   4457     if ( 0 < shiftCount ) {
   4458         if ( 0x403E <= aExp ) {
   4459             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4460             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4461                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4462                 if ( aSig1 ) set_float_exception_inexact_flag();
   4463             }
   4464             else {
   4465                 float_raise( float_flag_invalid );
   4466                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
   4467                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   4468                 }
   4469             }
   4470             return (sbits64) LIT64( 0x8000000000000000 );
   4471         }
   4472         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4473         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4474             set_float_exception_inexact_flag();
   4475         }
   4476     }
   4477     else {
   4478         if ( aExp < 0x3FFF ) {
   4479             if ( aExp | aSig0 | aSig1 ) {
   4480                 set_float_exception_inexact_flag();
   4481             }
   4482             return 0;
   4483         }
   4484         z = aSig0>>( - shiftCount );
   4485         if (    aSig1
   4486              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4487             set_float_exception_inexact_flag();
   4488         }
   4489     }
   4490     if ( aSign ) z = - z;
   4491     return z;
   4492 
   4493 }
   4494 
   4495 #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
   4496     && defined(SOFTFLOAT_NEED_FIXUNS)
   4497 /*
   4498  * just like above - but do not care for overflow of signed results
   4499  */
   4500 uint64 float128_to_uint64_round_to_zero( float128 a )
   4501 {
   4502     flag aSign;
   4503     int32 aExp, shiftCount;
   4504     bits64 aSig0, aSig1;
   4505     uint64 z;
   4506 
   4507     aSig1 = extractFloat128Frac1( a );
   4508     aSig0 = extractFloat128Frac0( a );
   4509     aExp = extractFloat128Exp( a );
   4510     aSign = extractFloat128Sign( a );
   4511     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4512     shiftCount = aExp - 0x402F;
   4513     if ( 0 < shiftCount ) {
   4514         if ( 0x403F <= aExp ) {
   4515             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4516             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4517                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4518                 if ( aSig1 ) set_float_exception_inexact_flag();
   4519             }
   4520             else {
   4521                 float_raise( float_flag_invalid );
   4522             }
   4523             return LIT64( 0xFFFFFFFFFFFFFFFF );
   4524         }
   4525         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4526         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4527             set_float_exception_inexact_flag();
   4528         }
   4529     }
   4530     else {
   4531         if ( aExp < 0x3FFF ) {
   4532             if ( aExp | aSig0 | aSig1 ) {
   4533                 set_float_exception_inexact_flag();
   4534             }
   4535             return 0;
   4536         }
   4537         z = aSig0>>( - shiftCount );
   4538         if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4539             set_float_exception_inexact_flag();
   4540         }
   4541     }
   4542     if ( aSign ) z = - z;
   4543     return z;
   4544 
   4545 }
   4546 #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
   4547 
   4548 /*
   4549 -------------------------------------------------------------------------------
   4550 Returns the result of converting the quadruple-precision floating-point
   4551 value `a' to the single-precision floating-point format.  The conversion
   4552 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4553 Arithmetic.
   4554 -------------------------------------------------------------------------------
   4555 */
   4556 float32 float128_to_float32( float128 a )
   4557 {
   4558     flag aSign;
   4559     int32 aExp;
   4560     bits64 aSig0, aSig1;
   4561     bits32 zSig;
   4562 
   4563     aSig1 = extractFloat128Frac1( a );
   4564     aSig0 = extractFloat128Frac0( a );
   4565     aExp = extractFloat128Exp( a );
   4566     aSign = extractFloat128Sign( a );
   4567     if ( aExp == 0x7FFF ) {
   4568         if ( aSig0 | aSig1 ) {
   4569             return commonNaNToFloat32( float128ToCommonNaN( a ) );
   4570         }
   4571         return packFloat32( aSign, 0xFF, 0 );
   4572     }
   4573     aSig0 |= ( aSig1 != 0 );
   4574     shift64RightJamming( aSig0, 18, &aSig0 );
   4575     zSig = (bits32)aSig0;
   4576     if ( aExp || zSig ) {
   4577         zSig |= 0x40000000;
   4578         aExp -= 0x3F81;
   4579     }
   4580     return roundAndPackFloat32( aSign, aExp, zSig );
   4581 
   4582 }
   4583 
   4584 /*
   4585 -------------------------------------------------------------------------------
   4586 Returns the result of converting the quadruple-precision floating-point
   4587 value `a' to the double-precision floating-point format.  The conversion
   4588 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4589 Arithmetic.
   4590 -------------------------------------------------------------------------------
   4591 */
   4592 float64 float128_to_float64( float128 a )
   4593 {
   4594     flag aSign;
   4595     int32 aExp;
   4596     bits64 aSig0, aSig1;
   4597 
   4598     aSig1 = extractFloat128Frac1( a );
   4599     aSig0 = extractFloat128Frac0( a );
   4600     aExp = extractFloat128Exp( a );
   4601     aSign = extractFloat128Sign( a );
   4602     if ( aExp == 0x7FFF ) {
   4603         if ( aSig0 | aSig1 ) {
   4604             return commonNaNToFloat64( float128ToCommonNaN( a ) );
   4605         }
   4606         return packFloat64( aSign, 0x7FF, 0 );
   4607     }
   4608     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4609     aSig0 |= ( aSig1 != 0 );
   4610     if ( aExp || aSig0 ) {
   4611         aSig0 |= LIT64( 0x4000000000000000 );
   4612         aExp -= 0x3C01;
   4613     }
   4614     return roundAndPackFloat64( aSign, aExp, aSig0 );
   4615 
   4616 }
   4617 
   4618 #ifdef FLOATX80
   4619 
   4620 /*
   4621 -------------------------------------------------------------------------------
   4622 Returns the result of converting the quadruple-precision floating-point
   4623 value `a' to the extended double-precision floating-point format.  The
   4624 conversion is performed according to the IEC/IEEE Standard for Binary
   4625 Floating-Point Arithmetic.
   4626 -------------------------------------------------------------------------------
   4627 */
   4628 floatx80 float128_to_floatx80( float128 a )
   4629 {
   4630     flag aSign;
   4631     int32 aExp;
   4632     bits64 aSig0, aSig1;
   4633 
   4634     aSig1 = extractFloat128Frac1( a );
   4635     aSig0 = extractFloat128Frac0( a );
   4636     aExp = extractFloat128Exp( a );
   4637     aSign = extractFloat128Sign( a );
   4638     if ( aExp == 0x7FFF ) {
   4639         if ( aSig0 | aSig1 ) {
   4640             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
   4641         }
   4642         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   4643     }
   4644     if ( aExp == 0 ) {
   4645         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
   4646         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4647     }
   4648     else {
   4649         aSig0 |= LIT64( 0x0001000000000000 );
   4650     }
   4651     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
   4652     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
   4653 
   4654 }
   4655 
   4656 #endif
   4657 
   4658 /*
   4659 -------------------------------------------------------------------------------
   4660 Rounds the quadruple-precision floating-point value `a' to an integer, and
   4661 returns the result as a quadruple-precision floating-point value.  The
   4662 operation is performed according to the IEC/IEEE Standard for Binary
   4663 Floating-Point Arithmetic.
   4664 -------------------------------------------------------------------------------
   4665 */
   4666 float128 float128_round_to_int( float128 a )
   4667 {
   4668     flag aSign;
   4669     int32 aExp;
   4670     bits64 lastBitMask, roundBitsMask;
   4671     int8 roundingMode;
   4672     float128 z;
   4673 
   4674     aExp = extractFloat128Exp( a );
   4675     if ( 0x402F <= aExp ) {
   4676         if ( 0x406F <= aExp ) {
   4677             if (    ( aExp == 0x7FFF )
   4678                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
   4679                ) {
   4680                 return propagateFloat128NaN( a, a );
   4681             }
   4682             return a;
   4683         }
   4684         lastBitMask = 1;
   4685         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
   4686         roundBitsMask = lastBitMask - 1;
   4687         z = a;
   4688         roundingMode = float_rounding_mode;
   4689         if ( roundingMode == float_round_nearest_even ) {
   4690             if ( lastBitMask ) {
   4691                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   4692                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   4693             }
   4694             else {
   4695                 if ( (sbits64) z.low < 0 ) {
   4696                     ++z.high;
   4697                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
   4698                 }
   4699             }
   4700         }
   4701         else if ( roundingMode != float_round_to_zero ) {
   4702             if (   extractFloat128Sign( z )
   4703                  ^ ( roundingMode == float_round_up ) ) {
   4704                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   4705             }
   4706         }
   4707         z.low &= ~ roundBitsMask;
   4708     }
   4709     else {
   4710         if ( aExp < 0x3FFF ) {
   4711             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   4712             set_float_exception_inexact_flag();
   4713             aSign = extractFloat128Sign( a );
   4714             switch ( float_rounding_mode ) {
   4715              case float_round_nearest_even:
   4716                 if (    ( aExp == 0x3FFE )
   4717                      && (   extractFloat128Frac0( a )
   4718                           | extractFloat128Frac1( a ) )
   4719                    ) {
   4720                     return packFloat128( aSign, 0x3FFF, 0, 0 );
   4721                 }
   4722                 break;
   4723 	     case float_round_to_zero:
   4724 		break;
   4725              case float_round_down:
   4726                 return
   4727                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
   4728                     : packFloat128( 0, 0, 0, 0 );
   4729              case float_round_up:
   4730                 return
   4731                       aSign ? packFloat128( 1, 0, 0, 0 )
   4732                     : packFloat128( 0, 0x3FFF, 0, 0 );
   4733             }
   4734             return packFloat128( aSign, 0, 0, 0 );
   4735         }
   4736         lastBitMask = 1;
   4737         lastBitMask <<= 0x402F - aExp;
   4738         roundBitsMask = lastBitMask - 1;
   4739         z.low = 0;
   4740         z.high = a.high;
   4741         roundingMode = float_rounding_mode;
   4742         if ( roundingMode == float_round_nearest_even ) {
   4743             z.high += lastBitMask>>1;
   4744             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   4745                 z.high &= ~ lastBitMask;
   4746             }
   4747         }
   4748         else if ( roundingMode != float_round_to_zero ) {
   4749             if (   extractFloat128Sign( z )
   4750                  ^ ( roundingMode == float_round_up ) ) {
   4751                 z.high |= ( a.low != 0 );
   4752                 z.high += roundBitsMask;
   4753             }
   4754         }
   4755         z.high &= ~ roundBitsMask;
   4756     }
   4757     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   4758         set_float_exception_inexact_flag();
   4759     }
   4760     return z;
   4761 
   4762 }
   4763 
   4764 /*
   4765 -------------------------------------------------------------------------------
   4766 Returns the result of adding the absolute values of the quadruple-precision
   4767 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   4768 before being returned.  `zSign' is ignored if the result is a NaN.
   4769 The addition is performed according to the IEC/IEEE Standard for Binary
   4770 Floating-Point Arithmetic.
   4771 -------------------------------------------------------------------------------
   4772 */
   4773 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
   4774 {
   4775     int32 aExp, bExp, zExp;
   4776     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4777     int32 expDiff;
   4778 
   4779     aSig1 = extractFloat128Frac1( a );
   4780     aSig0 = extractFloat128Frac0( a );
   4781     aExp = extractFloat128Exp( a );
   4782     bSig1 = extractFloat128Frac1( b );
   4783     bSig0 = extractFloat128Frac0( b );
   4784     bExp = extractFloat128Exp( b );
   4785     expDiff = aExp - bExp;
   4786     if ( 0 < expDiff ) {
   4787         if ( aExp == 0x7FFF ) {
   4788             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4789             return a;
   4790         }
   4791         if ( bExp == 0 ) {
   4792             --expDiff;
   4793         }
   4794         else {
   4795             bSig0 |= LIT64( 0x0001000000000000 );
   4796         }
   4797         shift128ExtraRightJamming(
   4798             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   4799         zExp = aExp;
   4800     }
   4801     else if ( expDiff < 0 ) {
   4802         if ( bExp == 0x7FFF ) {
   4803             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4804             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4805         }
   4806         if ( aExp == 0 ) {
   4807             ++expDiff;
   4808         }
   4809         else {
   4810             aSig0 |= LIT64( 0x0001000000000000 );
   4811         }
   4812         shift128ExtraRightJamming(
   4813             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   4814         zExp = bExp;
   4815     }
   4816     else {
   4817         if ( aExp == 0x7FFF ) {
   4818             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4819                 return propagateFloat128NaN( a, b );
   4820             }
   4821             return a;
   4822         }
   4823         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4824         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
   4825         zSig2 = 0;
   4826         zSig0 |= LIT64( 0x0002000000000000 );
   4827         zExp = aExp;
   4828         goto shiftRight1;
   4829     }
   4830     aSig0 |= LIT64( 0x0001000000000000 );
   4831     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4832     --zExp;
   4833     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
   4834     ++zExp;
   4835  shiftRight1:
   4836     shift128ExtraRightJamming(
   4837         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4838  roundAndPack:
   4839     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4840 
   4841 }
   4842 
   4843 /*
   4844 -------------------------------------------------------------------------------
   4845 Returns the result of subtracting the absolute values of the quadruple-
   4846 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   4847 difference is negated before being returned.  `zSign' is ignored if the
   4848 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   4849 Standard for Binary Floating-Point Arithmetic.
   4850 -------------------------------------------------------------------------------
   4851 */
   4852 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
   4853 {
   4854     int32 aExp, bExp, zExp;
   4855     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   4856     int32 expDiff;
   4857     float128 z;
   4858 
   4859     aSig1 = extractFloat128Frac1( a );
   4860     aSig0 = extractFloat128Frac0( a );
   4861     aExp = extractFloat128Exp( a );
   4862     bSig1 = extractFloat128Frac1( b );
   4863     bSig0 = extractFloat128Frac0( b );
   4864     bExp = extractFloat128Exp( b );
   4865     expDiff = aExp - bExp;
   4866     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4867     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
   4868     if ( 0 < expDiff ) goto aExpBigger;
   4869     if ( expDiff < 0 ) goto bExpBigger;
   4870     if ( aExp == 0x7FFF ) {
   4871         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4872             return propagateFloat128NaN( a, b );
   4873         }
   4874         float_raise( float_flag_invalid );
   4875         z.low = float128_default_nan_low;
   4876         z.high = float128_default_nan_high;
   4877         return z;
   4878     }
   4879     if ( aExp == 0 ) {
   4880         aExp = 1;
   4881         bExp = 1;
   4882     }
   4883     if ( bSig0 < aSig0 ) goto aBigger;
   4884     if ( aSig0 < bSig0 ) goto bBigger;
   4885     if ( bSig1 < aSig1 ) goto aBigger;
   4886     if ( aSig1 < bSig1 ) goto bBigger;
   4887     return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
   4888  bExpBigger:
   4889     if ( bExp == 0x7FFF ) {
   4890         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4891         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
   4892     }
   4893     if ( aExp == 0 ) {
   4894         ++expDiff;
   4895     }
   4896     else {
   4897         aSig0 |= LIT64( 0x4000000000000000 );
   4898     }
   4899     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   4900     bSig0 |= LIT64( 0x4000000000000000 );
   4901  bBigger:
   4902     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4903     zExp = bExp;
   4904     zSign ^= 1;
   4905     goto normalizeRoundAndPack;
   4906  aExpBigger:
   4907     if ( aExp == 0x7FFF ) {
   4908         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4909         return a;
   4910     }
   4911     if ( bExp == 0 ) {
   4912         --expDiff;
   4913     }
   4914     else {
   4915         bSig0 |= LIT64( 0x4000000000000000 );
   4916     }
   4917     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   4918     aSig0 |= LIT64( 0x4000000000000000 );
   4919  aBigger:
   4920     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4921     zExp = aExp;
   4922  normalizeRoundAndPack:
   4923     --zExp;
   4924     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
   4925 
   4926 }
   4927 
   4928 /*
   4929 -------------------------------------------------------------------------------
   4930 Returns the result of adding the quadruple-precision floating-point values
   4931 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   4932 for Binary Floating-Point Arithmetic.
   4933 -------------------------------------------------------------------------------
   4934 */
   4935 float128 float128_add( float128 a, float128 b )
   4936 {
   4937     flag aSign, bSign;
   4938 
   4939     aSign = extractFloat128Sign( a );
   4940     bSign = extractFloat128Sign( b );
   4941     if ( aSign == bSign ) {
   4942         return addFloat128Sigs( a, b, aSign );
   4943     }
   4944     else {
   4945         return subFloat128Sigs( a, b, aSign );
   4946     }
   4947 
   4948 }
   4949 
   4950 /*
   4951 -------------------------------------------------------------------------------
   4952 Returns the result of subtracting the quadruple-precision floating-point
   4953 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4954 Standard for Binary Floating-Point Arithmetic.
   4955 -------------------------------------------------------------------------------
   4956 */
   4957 float128 float128_sub( float128 a, float128 b )
   4958 {
   4959     flag aSign, bSign;
   4960 
   4961     aSign = extractFloat128Sign( a );
   4962     bSign = extractFloat128Sign( b );
   4963     if ( aSign == bSign ) {
   4964         return subFloat128Sigs( a, b, aSign );
   4965     }
   4966     else {
   4967         return addFloat128Sigs( a, b, aSign );
   4968     }
   4969 
   4970 }
   4971 
   4972 /*
   4973 -------------------------------------------------------------------------------
   4974 Returns the result of multiplying the quadruple-precision floating-point
   4975 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4976 Standard for Binary Floating-Point Arithmetic.
   4977 -------------------------------------------------------------------------------
   4978 */
   4979 float128 float128_mul( float128 a, float128 b )
   4980 {
   4981     flag aSign, bSign, zSign;
   4982     int32 aExp, bExp, zExp;
   4983     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   4984     float128 z;
   4985 
   4986     aSig1 = extractFloat128Frac1( a );
   4987     aSig0 = extractFloat128Frac0( a );
   4988     aExp = extractFloat128Exp( a );
   4989     aSign = extractFloat128Sign( a );
   4990     bSig1 = extractFloat128Frac1( b );
   4991     bSig0 = extractFloat128Frac0( b );
   4992     bExp = extractFloat128Exp( b );
   4993     bSign = extractFloat128Sign( b );
   4994     zSign = aSign ^ bSign;
   4995     if ( aExp == 0x7FFF ) {
   4996         if (    ( aSig0 | aSig1 )
   4997              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   4998             return propagateFloat128NaN( a, b );
   4999         }
   5000         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   5001         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5002     }
   5003     if ( bExp == 0x7FFF ) {
   5004         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5005         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5006  invalid:
   5007             float_raise( float_flag_invalid );
   5008             z.low = float128_default_nan_low;
   5009             z.high = float128_default_nan_high;
   5010             return z;
   5011         }
   5012         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5013     }
   5014     if ( aExp == 0 ) {
   5015         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5016         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5017     }
   5018     if ( bExp == 0 ) {
   5019         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5020         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5021     }
   5022     zExp = aExp + bExp - 0x4000;
   5023     aSig0 |= LIT64( 0x0001000000000000 );
   5024     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
   5025     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   5026     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   5027     zSig2 |= ( zSig3 != 0 );
   5028     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
   5029         shift128ExtraRightJamming(
   5030             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   5031         ++zExp;
   5032     }
   5033     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5034 
   5035 }
   5036 
   5037 /*
   5038 -------------------------------------------------------------------------------
   5039 Returns the result of dividing the quadruple-precision floating-point value
   5040 `a' by the corresponding value `b'.  The operation is performed according to
   5041 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5042 -------------------------------------------------------------------------------
   5043 */
   5044 float128 float128_div( float128 a, float128 b )
   5045 {
   5046     flag aSign, bSign, zSign;
   5047     int32 aExp, bExp, zExp;
   5048     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   5049     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5050     float128 z;
   5051 
   5052     aSig1 = extractFloat128Frac1( a );
   5053     aSig0 = extractFloat128Frac0( a );
   5054     aExp = extractFloat128Exp( a );
   5055     aSign = extractFloat128Sign( a );
   5056     bSig1 = extractFloat128Frac1( b );
   5057     bSig0 = extractFloat128Frac0( b );
   5058     bExp = extractFloat128Exp( b );
   5059     bSign = extractFloat128Sign( b );
   5060     zSign = aSign ^ bSign;
   5061     if ( aExp == 0x7FFF ) {
   5062         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   5063         if ( bExp == 0x7FFF ) {
   5064             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5065             goto invalid;
   5066         }
   5067         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5068     }
   5069     if ( bExp == 0x7FFF ) {
   5070         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5071         return packFloat128( zSign, 0, 0, 0 );
   5072     }
   5073     if ( bExp == 0 ) {
   5074         if ( ( bSig0 | bSig1 ) == 0 ) {
   5075             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5076  invalid:
   5077                 float_raise( float_flag_invalid );
   5078                 z.low = float128_default_nan_low;
   5079                 z.high = float128_default_nan_high;
   5080                 return z;
   5081             }
   5082             float_raise( float_flag_divbyzero );
   5083             return packFloat128( zSign, 0x7FFF, 0, 0 );
   5084         }
   5085         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5086     }
   5087     if ( aExp == 0 ) {
   5088         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5089         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5090     }
   5091     zExp = aExp - bExp + 0x3FFD;
   5092     shortShift128Left(
   5093         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
   5094     shortShift128Left(
   5095         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5096     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
   5097         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   5098         ++zExp;
   5099     }
   5100     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5101     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   5102     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   5103     while ( (sbits64) rem0 < 0 ) {
   5104         --zSig0;
   5105         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   5106     }
   5107     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
   5108     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
   5109         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   5110         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   5111         while ( (sbits64) rem1 < 0 ) {
   5112             --zSig1;
   5113             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   5114         }
   5115         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5116     }
   5117     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
   5118     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5119 
   5120 }
   5121 
   5122 /*
   5123 -------------------------------------------------------------------------------
   5124 Returns the remainder of the quadruple-precision floating-point value `a'
   5125 with respect to the corresponding value `b'.  The operation is performed
   5126 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5127 -------------------------------------------------------------------------------
   5128 */
   5129 float128 float128_rem( float128 a, float128 b )
   5130 {
   5131     flag aSign, zSign;
   5132     int32 aExp, bExp, expDiff;
   5133     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   5134     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
   5135     sbits64 sigMean0;
   5136     float128 z;
   5137 
   5138     aSig1 = extractFloat128Frac1( a );
   5139     aSig0 = extractFloat128Frac0( a );
   5140     aExp = extractFloat128Exp( a );
   5141     aSign = extractFloat128Sign( a );
   5142     bSig1 = extractFloat128Frac1( b );
   5143     bSig0 = extractFloat128Frac0( b );
   5144     bExp = extractFloat128Exp( b );
   5145     if ( aExp == 0x7FFF ) {
   5146         if (    ( aSig0 | aSig1 )
   5147              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5148             return propagateFloat128NaN( a, b );
   5149         }
   5150         goto invalid;
   5151     }
   5152     if ( bExp == 0x7FFF ) {
   5153         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5154         return a;
   5155     }
   5156     if ( bExp == 0 ) {
   5157         if ( ( bSig0 | bSig1 ) == 0 ) {
   5158  invalid:
   5159             float_raise( float_flag_invalid );
   5160             z.low = float128_default_nan_low;
   5161             z.high = float128_default_nan_high;
   5162             return z;
   5163         }
   5164         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5165     }
   5166     if ( aExp == 0 ) {
   5167         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   5168         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5169     }
   5170     expDiff = aExp - bExp;
   5171     if ( expDiff < -1 ) return a;
   5172     shortShift128Left(
   5173         aSig0 | LIT64( 0x0001000000000000 ),
   5174         aSig1,
   5175         15 - ( expDiff < 0 ),
   5176         &aSig0,
   5177         &aSig1
   5178     );
   5179     shortShift128Left(
   5180         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5181     q = le128( bSig0, bSig1, aSig0, aSig1 );
   5182     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5183     expDiff -= 64;
   5184     while ( 0 < expDiff ) {
   5185         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5186         q = ( 4 < q ) ? q - 4 : 0;
   5187         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5188         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
   5189         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
   5190         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   5191         expDiff -= 61;
   5192     }
   5193     if ( -64 < expDiff ) {
   5194         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5195         q = ( 4 < q ) ? q - 4 : 0;
   5196         q >>= - expDiff;
   5197         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5198         expDiff += 52;
   5199         if ( expDiff < 0 ) {
   5200             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   5201         }
   5202         else {
   5203             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   5204         }
   5205         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5206         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   5207     }
   5208     else {
   5209         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
   5210         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5211     }
   5212     do {
   5213         alternateASig0 = aSig0;
   5214         alternateASig1 = aSig1;
   5215         ++q;
   5216         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5217     } while ( 0 <= (sbits64) aSig0 );
   5218     add128(
   5219         aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
   5220     if (    ( sigMean0 < 0 )
   5221          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   5222         aSig0 = alternateASig0;
   5223         aSig1 = alternateASig1;
   5224     }
   5225     zSign = ( (sbits64) aSig0 < 0 );
   5226     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   5227     return
   5228         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   5229 
   5230 }
   5231 
   5232 /*
   5233 -------------------------------------------------------------------------------
   5234 Returns the square root of the quadruple-precision floating-point value `a'.
   5235 The operation is performed according to the IEC/IEEE Standard for Binary
   5236 Floating-Point Arithmetic.
   5237 -------------------------------------------------------------------------------
   5238 */
   5239 float128 float128_sqrt( float128 a )
   5240 {
   5241     flag aSign;
   5242     int32 aExp, zExp;
   5243     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   5244     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5245     float128 z;
   5246 
   5247     aSig1 = extractFloat128Frac1( a );
   5248     aSig0 = extractFloat128Frac0( a );
   5249     aExp = extractFloat128Exp( a );
   5250     aSign = extractFloat128Sign( a );
   5251     if ( aExp == 0x7FFF ) {
   5252         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
   5253         if ( ! aSign ) return a;
   5254         goto invalid;
   5255     }
   5256     if ( aSign ) {
   5257         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   5258  invalid:
   5259         float_raise( float_flag_invalid );
   5260         z.low = float128_default_nan_low;
   5261         z.high = float128_default_nan_high;
   5262         return z;
   5263     }
   5264     if ( aExp == 0 ) {
   5265         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
   5266         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5267     }
   5268     zExp = (int32) ( (bits32)(aExp - 0x3FFF) >> 1) + 0x3FFE;
   5269     aSig0 |= LIT64( 0x0001000000000000 );
   5270     zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
   5271     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
   5272     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   5273     doubleZSig0 = zSig0<<1;
   5274     mul64To128( zSig0, zSig0, &term0, &term1 );
   5275     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   5276     while ( (sbits64) rem0 < 0 ) {
   5277         --zSig0;
   5278         doubleZSig0 -= 2;
   5279         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   5280     }
   5281     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   5282     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
   5283         if ( zSig1 == 0 ) zSig1 = 1;
   5284         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   5285         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   5286         mul64To128( zSig1, zSig1, &term2, &term3 );
   5287         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   5288         while ( (sbits64) rem1 < 0 ) {
   5289             --zSig1;
   5290             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   5291             term3 |= 1;
   5292             term2 |= doubleZSig0;
   5293             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   5294         }
   5295         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5296     }
   5297     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
   5298     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
   5299 
   5300 }
   5301 
   5302 /*
   5303 -------------------------------------------------------------------------------
   5304 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5305 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5306 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5307 -------------------------------------------------------------------------------
   5308 */
   5309 flag float128_eq( float128 a, float128 b )
   5310 {
   5311 
   5312     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5313               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5314          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5315               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5316        ) {
   5317         if (    float128_is_signaling_nan( a )
   5318              || float128_is_signaling_nan( b ) ) {
   5319             float_raise( float_flag_invalid );
   5320         }
   5321         return 0;
   5322     }
   5323     return
   5324            ( a.low == b.low )
   5325         && (    ( a.high == b.high )
   5326              || (    ( a.low == 0 )
   5327                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5328            );
   5329 
   5330 }
   5331 
   5332 /*
   5333 -------------------------------------------------------------------------------
   5334 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5335 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   5336 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   5337 Arithmetic.
   5338 -------------------------------------------------------------------------------
   5339 */
   5340 flag float128_le( float128 a, float128 b )
   5341 {
   5342     flag aSign, bSign;
   5343 
   5344     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5345               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5346          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5347               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5348        ) {
   5349         float_raise( float_flag_invalid );
   5350         return 0;
   5351     }
   5352     aSign = extractFloat128Sign( a );
   5353     bSign = extractFloat128Sign( b );
   5354     if ( aSign != bSign ) {
   5355         return
   5356                aSign
   5357             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5358                  == 0 );
   5359     }
   5360     return
   5361           aSign ? le128( b.high, b.low, a.high, a.low )
   5362         : le128( a.high, a.low, b.high, b.low );
   5363 
   5364 }
   5365 
   5366 /*
   5367 -------------------------------------------------------------------------------
   5368 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5369 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5370 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5371 -------------------------------------------------------------------------------
   5372 */
   5373 flag float128_lt( float128 a, float128 b )
   5374 {
   5375     flag aSign, bSign;
   5376 
   5377     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5378               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5379          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5380               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5381        ) {
   5382         float_raise( float_flag_invalid );
   5383         return 0;
   5384     }
   5385     aSign = extractFloat128Sign( a );
   5386     bSign = extractFloat128Sign( b );
   5387     if ( aSign != bSign ) {
   5388         return
   5389                aSign
   5390             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5391                  != 0 );
   5392     }
   5393     return
   5394           aSign ? lt128( b.high, b.low, a.high, a.low )
   5395         : lt128( a.high, a.low, b.high, b.low );
   5396 
   5397 }
   5398 
   5399 /*
   5400 -------------------------------------------------------------------------------
   5401 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5402 the corresponding value `b', and 0 otherwise.  The invalid exception is
   5403 raised if either operand is a NaN.  Otherwise, the comparison is performed
   5404 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5405 -------------------------------------------------------------------------------
   5406 */
   5407 flag float128_eq_signaling( float128 a, float128 b )
   5408 {
   5409 
   5410     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5411               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5412          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5413               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5414        ) {
   5415         float_raise( float_flag_invalid );
   5416         return 0;
   5417     }
   5418     return
   5419            ( a.low == b.low )
   5420         && (    ( a.high == b.high )
   5421              || (    ( a.low == 0 )
   5422                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5423            );
   5424 
   5425 }
   5426 
   5427 /*
   5428 -------------------------------------------------------------------------------
   5429 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5430 or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   5431 cause an exception.  Otherwise, the comparison is performed according to the
   5432 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5433 -------------------------------------------------------------------------------
   5434 */
   5435 flag float128_le_quiet( float128 a, float128 b )
   5436 {
   5437     flag aSign, bSign;
   5438 
   5439     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5440               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5441          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5442               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5443        ) {
   5444         if (    float128_is_signaling_nan( a )
   5445              || float128_is_signaling_nan( b ) ) {
   5446             float_raise( float_flag_invalid );
   5447         }
   5448         return 0;
   5449     }
   5450     aSign = extractFloat128Sign( a );
   5451     bSign = extractFloat128Sign( b );
   5452     if ( aSign != bSign ) {
   5453         return
   5454                aSign
   5455             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5456                  == 0 );
   5457     }
   5458     return
   5459           aSign ? le128( b.high, b.low, a.high, a.low )
   5460         : le128( a.high, a.low, b.high, b.low );
   5461 
   5462 }
   5463 
   5464 /*
   5465 -------------------------------------------------------------------------------
   5466 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5467 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   5468 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   5469 Standard for Binary Floating-Point Arithmetic.
   5470 -------------------------------------------------------------------------------
   5471 */
   5472 flag float128_lt_quiet( float128 a, float128 b )
   5473 {
   5474     flag aSign, bSign;
   5475 
   5476     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5477               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5478          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5479               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5480        ) {
   5481         if (    float128_is_signaling_nan( a )
   5482              || float128_is_signaling_nan( b ) ) {
   5483             float_raise( float_flag_invalid );
   5484         }
   5485         return 0;
   5486     }
   5487     aSign = extractFloat128Sign( a );
   5488     bSign = extractFloat128Sign( b );
   5489     if ( aSign != bSign ) {
   5490         return
   5491                aSign
   5492             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5493                  != 0 );
   5494     }
   5495     return
   5496           aSign ? lt128( b.high, b.low, a.high, a.low )
   5497         : lt128( a.high, a.low, b.high, b.low );
   5498 
   5499 }
   5500 
   5501 #endif
   5502 
   5503 
   5504 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
   5505 
   5506 /*
   5507  * These two routines are not part of the original softfloat distribution.
   5508  *
   5509  * They are based on the corresponding conversions to integer but return
   5510  * unsigned numbers instead since these functions are required by GCC.
   5511  *
   5512  * Added by Mark Brinicombe <mark (at) NetBSD.org>	27/09/97
   5513  *
   5514  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
   5515  */
   5516 
   5517 /*
   5518 -------------------------------------------------------------------------------
   5519 Returns the result of converting the double-precision floating-point value
   5520 `a' to the 32-bit unsigned integer format.  The conversion is
   5521 performed according to the IEC/IEEE Standard for Binary Floating-point
   5522 Arithmetic, except that the conversion is always rounded toward zero.  If
   5523 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5524 overflows, the largest integer positive is returned.
   5525 -------------------------------------------------------------------------------
   5526 */
   5527 uint32 float64_to_uint32_round_to_zero( float64 a )
   5528 {
   5529     flag aSign;
   5530     int16 aExp, shiftCount;
   5531     bits64 aSig, savedASig;
   5532     uint32 z;
   5533 
   5534     aSig = extractFloat64Frac( a );
   5535     aExp = extractFloat64Exp( a );
   5536     aSign = extractFloat64Sign( a );
   5537 
   5538     if (aSign) {
   5539         float_raise( float_flag_invalid );
   5540     	return(0);
   5541     }
   5542 
   5543     if ( 0x41E < aExp ) {
   5544         float_raise( float_flag_invalid );
   5545         return 0xffffffff;
   5546     }
   5547     else if ( aExp < 0x3FF ) {
   5548         if ( aExp || aSig ) set_float_exception_inexact_flag();
   5549         return 0;
   5550     }
   5551     aSig |= LIT64( 0x0010000000000000 );
   5552     shiftCount = 0x433 - aExp;
   5553     savedASig = aSig;
   5554     aSig >>= shiftCount;
   5555     z = (uint32)aSig;
   5556     if ( ( aSig<<shiftCount ) != savedASig ) {
   5557         set_float_exception_inexact_flag();
   5558     }
   5559     return z;
   5560 
   5561 }
   5562 
   5563 /*
   5564 -------------------------------------------------------------------------------
   5565 Returns the result of converting the single-precision floating-point value
   5566 `a' to the 32-bit unsigned integer format.  The conversion is
   5567 performed according to the IEC/IEEE Standard for Binary Floating-point
   5568 Arithmetic, except that the conversion is always rounded toward zero.  If
   5569 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5570 overflows, the largest positive integer is returned.
   5571 -------------------------------------------------------------------------------
   5572 */
   5573 uint32 float32_to_uint32_round_to_zero( float32 a )
   5574 {
   5575     flag aSign;
   5576     int16 aExp, shiftCount;
   5577     bits32 aSig;
   5578     uint32 z;
   5579 
   5580     aSig = extractFloat32Frac( a );
   5581     aExp = extractFloat32Exp( a );
   5582     aSign = extractFloat32Sign( a );
   5583     shiftCount = aExp - 0x9E;
   5584 
   5585     if (aSign) {
   5586         float_raise( float_flag_invalid );
   5587     	return(0);
   5588     }
   5589     if ( 0 < shiftCount ) {
   5590         float_raise( float_flag_invalid );
   5591         return 0xFFFFFFFF;
   5592     }
   5593     else if ( aExp <= 0x7E ) {
   5594         if ( aExp | aSig ) set_float_exception_inexact_flag();
   5595         return 0;
   5596     }
   5597     aSig = ( aSig | 0x800000 )<<8;
   5598     z = aSig>>( - shiftCount );
   5599     if ( aSig<<( shiftCount & 31 ) ) {
   5600         set_float_exception_inexact_flag();
   5601     }
   5602     return z;
   5603 
   5604 }
   5605 
   5606 #endif
   5607