Home | History | Annotate | Line # | Download | only in bits32
softfloat.c revision 1.3
      1  1.3      matt /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
      2  1.1     bjh21 
      3  1.1     bjh21 /*
      4  1.1     bjh21  * This version hacked for use with gcc -msoft-float by bjh21.
      5  1.1     bjh21  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  1.1     bjh21  *  itself).
      7  1.1     bjh21  */
      8  1.1     bjh21 
      9  1.1     bjh21 /*
     10  1.1     bjh21  * Things you may want to define:
     11  1.1     bjh21  *
     12  1.1     bjh21  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  1.1     bjh21  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  1.1     bjh21  *   properly renamed.
     15  1.1     bjh21  */
     16  1.1     bjh21 
     17  1.1     bjh21 /*
     18  1.1     bjh21  * This differs from the standard bits32/softfloat.c in that float64
     19  1.1     bjh21  * is defined to be a 64-bit integer rather than a structure.  The
     20  1.1     bjh21  * structure is float64s, with translation between the two going via
     21  1.1     bjh21  * float64u.
     22  1.1     bjh21  */
     23  1.1     bjh21 
     24  1.1     bjh21 /*
     25  1.1     bjh21 ===============================================================================
     26  1.1     bjh21 
     27  1.1     bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
     28  1.1     bjh21 Arithmetic Package, Release 2a.
     29  1.1     bjh21 
     30  1.1     bjh21 Written by John R. Hauser.  This work was made possible in part by the
     31  1.1     bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
     32  1.1     bjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
     33  1.1     bjh21 National Science Foundation under grant MIP-9311980.  The original version
     34  1.1     bjh21 of this code was written as part of a project to build a fixed-point vector
     35  1.1     bjh21 processor in collaboration with the University of California at Berkeley,
     36  1.1     bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     37  1.1     bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     38  1.1     bjh21 arithmetic/SoftFloat.html'.
     39  1.1     bjh21 
     40  1.1     bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     41  1.1     bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     42  1.1     bjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     43  1.1     bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     44  1.1     bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     45  1.1     bjh21 
     46  1.1     bjh21 Derivative works are acceptable, even for commercial purposes, so long as
     47  1.1     bjh21 (1) they include prominent notice that the work is derivative, and (2) they
     48  1.1     bjh21 include prominent notice akin to these four paragraphs for those parts of
     49  1.1     bjh21 this code that are retained.
     50  1.1     bjh21 
     51  1.1     bjh21 ===============================================================================
     52  1.1     bjh21 */
     53  1.1     bjh21 
     54  1.1     bjh21 #include <sys/cdefs.h>
     55  1.1     bjh21 #if defined(LIBC_SCCS) && !defined(lint)
     56  1.3      matt __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
     57  1.1     bjh21 #endif /* LIBC_SCCS and not lint */
     58  1.1     bjh21 
     59  1.1     bjh21 #ifdef SOFTFLOAT_FOR_GCC
     60  1.1     bjh21 #include "softfloat-for-gcc.h"
     61  1.1     bjh21 #endif
     62  1.1     bjh21 
     63  1.1     bjh21 #include "milieu.h"
     64  1.1     bjh21 #include "softfloat.h"
     65  1.1     bjh21 
     66  1.1     bjh21 /*
     67  1.1     bjh21  * Conversions between floats as stored in memory and floats as
     68  1.1     bjh21  * SoftFloat uses them
     69  1.1     bjh21  */
     70  1.1     bjh21 #ifndef FLOAT64_DEMANGLE
     71  1.1     bjh21 #define FLOAT64_DEMANGLE(a)	(a)
     72  1.1     bjh21 #endif
     73  1.1     bjh21 #ifndef FLOAT64_MANGLE
     74  1.1     bjh21 #define FLOAT64_MANGLE(a)	(a)
     75  1.1     bjh21 #endif
     76  1.1     bjh21 
     77  1.1     bjh21 /*
     78  1.1     bjh21 -------------------------------------------------------------------------------
     79  1.1     bjh21 Floating-point rounding mode and exception flags.
     80  1.1     bjh21 -------------------------------------------------------------------------------
     81  1.1     bjh21 */
     82  1.3      matt #ifndef set_float_rounding_mode
     83  1.1     bjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
     84  1.1     bjh21 fp_except float_exception_flags = 0;
     85  1.3      matt #endif
     86  1.3      matt #ifndef set_float_exception_inexact_flag
     87  1.3      matt #define set_float_exception_inexact_flag() \
     88  1.3      matt 	((void)(float_exception_flags |= float_flag_inexact))
     89  1.3      matt #endif
     90  1.1     bjh21 
     91  1.1     bjh21 /*
     92  1.1     bjh21 -------------------------------------------------------------------------------
     93  1.1     bjh21 Primitive arithmetic functions, including multi-word arithmetic, and
     94  1.1     bjh21 division and square root approximations.  (Can be specialized to target if
     95  1.1     bjh21 desired.)
     96  1.1     bjh21 -------------------------------------------------------------------------------
     97  1.1     bjh21 */
     98  1.1     bjh21 #include "softfloat-macros"
     99  1.1     bjh21 
    100  1.1     bjh21 /*
    101  1.1     bjh21 -------------------------------------------------------------------------------
    102  1.1     bjh21 Functions and definitions to determine:  (1) whether tininess for underflow
    103  1.1     bjh21 is detected before or after rounding by default, (2) what (if anything)
    104  1.1     bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
    105  1.1     bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
    106  1.1     bjh21 are propagated from function inputs to output.  These details are target-
    107  1.1     bjh21 specific.
    108  1.1     bjh21 -------------------------------------------------------------------------------
    109  1.1     bjh21 */
    110  1.1     bjh21 #include "softfloat-specialize"
    111  1.1     bjh21 
    112  1.1     bjh21 /*
    113  1.1     bjh21 -------------------------------------------------------------------------------
    114  1.1     bjh21 Returns the fraction bits of the single-precision floating-point value `a'.
    115  1.1     bjh21 -------------------------------------------------------------------------------
    116  1.1     bjh21 */
    117  1.1     bjh21 INLINE bits32 extractFloat32Frac( float32 a )
    118  1.1     bjh21 {
    119  1.1     bjh21 
    120  1.1     bjh21     return a & 0x007FFFFF;
    121  1.1     bjh21 
    122  1.1     bjh21 }
    123  1.1     bjh21 
    124  1.1     bjh21 /*
    125  1.1     bjh21 -------------------------------------------------------------------------------
    126  1.1     bjh21 Returns the exponent bits of the single-precision floating-point value `a'.
    127  1.1     bjh21 -------------------------------------------------------------------------------
    128  1.1     bjh21 */
    129  1.1     bjh21 INLINE int16 extractFloat32Exp( float32 a )
    130  1.1     bjh21 {
    131  1.1     bjh21 
    132  1.1     bjh21     return ( a>>23 ) & 0xFF;
    133  1.1     bjh21 
    134  1.1     bjh21 }
    135  1.1     bjh21 
    136  1.1     bjh21 /*
    137  1.1     bjh21 -------------------------------------------------------------------------------
    138  1.1     bjh21 Returns the sign bit of the single-precision floating-point value `a'.
    139  1.1     bjh21 -------------------------------------------------------------------------------
    140  1.1     bjh21 */
    141  1.1     bjh21 INLINE flag extractFloat32Sign( float32 a )
    142  1.1     bjh21 {
    143  1.1     bjh21 
    144  1.1     bjh21     return a>>31;
    145  1.1     bjh21 
    146  1.1     bjh21 }
    147  1.1     bjh21 
    148  1.1     bjh21 /*
    149  1.1     bjh21 -------------------------------------------------------------------------------
    150  1.1     bjh21 Normalizes the subnormal single-precision floating-point value represented
    151  1.1     bjh21 by the denormalized significand `aSig'.  The normalized exponent and
    152  1.1     bjh21 significand are stored at the locations pointed to by `zExpPtr' and
    153  1.1     bjh21 `zSigPtr', respectively.
    154  1.1     bjh21 -------------------------------------------------------------------------------
    155  1.1     bjh21 */
    156  1.1     bjh21 static void
    157  1.1     bjh21  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    158  1.1     bjh21 {
    159  1.1     bjh21     int8 shiftCount;
    160  1.1     bjh21 
    161  1.1     bjh21     shiftCount = countLeadingZeros32( aSig ) - 8;
    162  1.1     bjh21     *zSigPtr = aSig<<shiftCount;
    163  1.1     bjh21     *zExpPtr = 1 - shiftCount;
    164  1.1     bjh21 
    165  1.1     bjh21 }
    166  1.1     bjh21 
    167  1.1     bjh21 /*
    168  1.1     bjh21 -------------------------------------------------------------------------------
    169  1.1     bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    170  1.1     bjh21 single-precision floating-point value, returning the result.  After being
    171  1.1     bjh21 shifted into the proper positions, the three fields are simply added
    172  1.1     bjh21 together to form the result.  This means that any integer portion of `zSig'
    173  1.1     bjh21 will be added into the exponent.  Since a properly normalized significand
    174  1.1     bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
    175  1.1     bjh21 than the desired result exponent whenever `zSig' is a complete, normalized
    176  1.1     bjh21 significand.
    177  1.1     bjh21 -------------------------------------------------------------------------------
    178  1.1     bjh21 */
    179  1.1     bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    180  1.1     bjh21 {
    181  1.1     bjh21 
    182  1.1     bjh21     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    183  1.1     bjh21 
    184  1.1     bjh21 }
    185  1.1     bjh21 
    186  1.1     bjh21 /*
    187  1.1     bjh21 -------------------------------------------------------------------------------
    188  1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    189  1.1     bjh21 and significand `zSig', and returns the proper single-precision floating-
    190  1.1     bjh21 point value corresponding to the abstract input.  Ordinarily, the abstract
    191  1.1     bjh21 value is simply rounded and packed into the single-precision format, with
    192  1.1     bjh21 the inexact exception raised if the abstract input cannot be represented
    193  1.1     bjh21 exactly.  However, if the abstract value is too large, the overflow and
    194  1.1     bjh21 inexact exceptions are raised and an infinity or maximal finite value is
    195  1.1     bjh21 returned.  If the abstract value is too small, the input value is rounded to
    196  1.1     bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
    197  1.1     bjh21 the abstract input cannot be represented exactly as a subnormal single-
    198  1.1     bjh21 precision floating-point number.
    199  1.1     bjh21     The input significand `zSig' has its binary point between bits 30
    200  1.1     bjh21 and 29, which is 7 bits to the left of the usual location.  This shifted
    201  1.1     bjh21 significand must be normalized or smaller.  If `zSig' is not normalized,
    202  1.1     bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
    203  1.1     bjh21 and it must not require rounding.  In the usual case that `zSig' is
    204  1.1     bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    205  1.1     bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
    206  1.1     bjh21 Binary Floating-Point Arithmetic.
    207  1.1     bjh21 -------------------------------------------------------------------------------
    208  1.1     bjh21 */
    209  1.1     bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    210  1.1     bjh21 {
    211  1.1     bjh21     int8 roundingMode;
    212  1.1     bjh21     flag roundNearestEven;
    213  1.1     bjh21     int8 roundIncrement, roundBits;
    214  1.1     bjh21     flag isTiny;
    215  1.1     bjh21 
    216  1.1     bjh21     roundingMode = float_rounding_mode;
    217  1.1     bjh21     roundNearestEven = roundingMode == float_round_nearest_even;
    218  1.1     bjh21     roundIncrement = 0x40;
    219  1.1     bjh21     if ( ! roundNearestEven ) {
    220  1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    221  1.1     bjh21             roundIncrement = 0;
    222  1.1     bjh21         }
    223  1.1     bjh21         else {
    224  1.1     bjh21             roundIncrement = 0x7F;
    225  1.1     bjh21             if ( zSign ) {
    226  1.1     bjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    227  1.1     bjh21             }
    228  1.1     bjh21             else {
    229  1.1     bjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    230  1.1     bjh21             }
    231  1.1     bjh21         }
    232  1.1     bjh21     }
    233  1.1     bjh21     roundBits = zSig & 0x7F;
    234  1.1     bjh21     if ( 0xFD <= (bits16) zExp ) {
    235  1.1     bjh21         if (    ( 0xFD < zExp )
    236  1.1     bjh21              || (    ( zExp == 0xFD )
    237  1.1     bjh21                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    238  1.1     bjh21            ) {
    239  1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
    240  1.1     bjh21             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    241  1.1     bjh21         }
    242  1.1     bjh21         if ( zExp < 0 ) {
    243  1.1     bjh21             isTiny =
    244  1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    245  1.1     bjh21                 || ( zExp < -1 )
    246  1.2  christos                 || ( zSig + roundIncrement < (uint32)0x80000000 );
    247  1.1     bjh21             shift32RightJamming( zSig, - zExp, &zSig );
    248  1.1     bjh21             zExp = 0;
    249  1.1     bjh21             roundBits = zSig & 0x7F;
    250  1.1     bjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    251  1.1     bjh21         }
    252  1.1     bjh21     }
    253  1.3      matt     if ( roundBits ) set_float_exception_inexact_flag();
    254  1.1     bjh21     zSig = ( zSig + roundIncrement )>>7;
    255  1.1     bjh21     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    256  1.1     bjh21     if ( zSig == 0 ) zExp = 0;
    257  1.1     bjh21     return packFloat32( zSign, zExp, zSig );
    258  1.1     bjh21 
    259  1.1     bjh21 }
    260  1.1     bjh21 
    261  1.1     bjh21 /*
    262  1.1     bjh21 -------------------------------------------------------------------------------
    263  1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    264  1.1     bjh21 and significand `zSig', and returns the proper single-precision floating-
    265  1.1     bjh21 point value corresponding to the abstract input.  This routine is just like
    266  1.1     bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    267  1.1     bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    268  1.1     bjh21 floating-point exponent.
    269  1.1     bjh21 -------------------------------------------------------------------------------
    270  1.1     bjh21 */
    271  1.1     bjh21 static float32
    272  1.1     bjh21  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    273  1.1     bjh21 {
    274  1.1     bjh21     int8 shiftCount;
    275  1.1     bjh21 
    276  1.1     bjh21     shiftCount = countLeadingZeros32( zSig ) - 1;
    277  1.1     bjh21     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    278  1.1     bjh21 
    279  1.1     bjh21 }
    280  1.1     bjh21 
    281  1.1     bjh21 /*
    282  1.1     bjh21 -------------------------------------------------------------------------------
    283  1.1     bjh21 Returns the least-significant 32 fraction bits of the double-precision
    284  1.1     bjh21 floating-point value `a'.
    285  1.1     bjh21 -------------------------------------------------------------------------------
    286  1.1     bjh21 */
    287  1.1     bjh21 INLINE bits32 extractFloat64Frac1( float64 a )
    288  1.1     bjh21 {
    289  1.1     bjh21 
    290  1.2  christos     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
    291  1.1     bjh21 
    292  1.1     bjh21 }
    293  1.1     bjh21 
    294  1.1     bjh21 /*
    295  1.1     bjh21 -------------------------------------------------------------------------------
    296  1.1     bjh21 Returns the most-significant 20 fraction bits of the double-precision
    297  1.1     bjh21 floating-point value `a'.
    298  1.1     bjh21 -------------------------------------------------------------------------------
    299  1.1     bjh21 */
    300  1.1     bjh21 INLINE bits32 extractFloat64Frac0( float64 a )
    301  1.1     bjh21 {
    302  1.1     bjh21 
    303  1.2  christos     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
    304  1.1     bjh21 
    305  1.1     bjh21 }
    306  1.1     bjh21 
    307  1.1     bjh21 /*
    308  1.1     bjh21 -------------------------------------------------------------------------------
    309  1.1     bjh21 Returns the exponent bits of the double-precision floating-point value `a'.
    310  1.1     bjh21 -------------------------------------------------------------------------------
    311  1.1     bjh21 */
    312  1.1     bjh21 INLINE int16 extractFloat64Exp( float64 a )
    313  1.1     bjh21 {
    314  1.1     bjh21 
    315  1.2  christos     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    316  1.1     bjh21 
    317  1.1     bjh21 }
    318  1.1     bjh21 
    319  1.1     bjh21 /*
    320  1.1     bjh21 -------------------------------------------------------------------------------
    321  1.1     bjh21 Returns the sign bit of the double-precision floating-point value `a'.
    322  1.1     bjh21 -------------------------------------------------------------------------------
    323  1.1     bjh21 */
    324  1.1     bjh21 INLINE flag extractFloat64Sign( float64 a )
    325  1.1     bjh21 {
    326  1.1     bjh21 
    327  1.2  christos     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    328  1.1     bjh21 
    329  1.1     bjh21 }
    330  1.1     bjh21 
    331  1.1     bjh21 /*
    332  1.1     bjh21 -------------------------------------------------------------------------------
    333  1.1     bjh21 Normalizes the subnormal double-precision floating-point value represented
    334  1.1     bjh21 by the denormalized significand formed by the concatenation of `aSig0' and
    335  1.1     bjh21 `aSig1'.  The normalized exponent is stored at the location pointed to by
    336  1.1     bjh21 `zExpPtr'.  The most significant 21 bits of the normalized significand are
    337  1.1     bjh21 stored at the location pointed to by `zSig0Ptr', and the least significant
    338  1.1     bjh21 32 bits of the normalized significand are stored at the location pointed to
    339  1.1     bjh21 by `zSig1Ptr'.
    340  1.1     bjh21 -------------------------------------------------------------------------------
    341  1.1     bjh21 */
    342  1.1     bjh21 static void
    343  1.1     bjh21  normalizeFloat64Subnormal(
    344  1.1     bjh21      bits32 aSig0,
    345  1.1     bjh21      bits32 aSig1,
    346  1.1     bjh21      int16 *zExpPtr,
    347  1.1     bjh21      bits32 *zSig0Ptr,
    348  1.1     bjh21      bits32 *zSig1Ptr
    349  1.1     bjh21  )
    350  1.1     bjh21 {
    351  1.1     bjh21     int8 shiftCount;
    352  1.1     bjh21 
    353  1.1     bjh21     if ( aSig0 == 0 ) {
    354  1.1     bjh21         shiftCount = countLeadingZeros32( aSig1 ) - 11;
    355  1.1     bjh21         if ( shiftCount < 0 ) {
    356  1.1     bjh21             *zSig0Ptr = aSig1>>( - shiftCount );
    357  1.1     bjh21             *zSig1Ptr = aSig1<<( shiftCount & 31 );
    358  1.1     bjh21         }
    359  1.1     bjh21         else {
    360  1.1     bjh21             *zSig0Ptr = aSig1<<shiftCount;
    361  1.1     bjh21             *zSig1Ptr = 0;
    362  1.1     bjh21         }
    363  1.1     bjh21         *zExpPtr = - shiftCount - 31;
    364  1.1     bjh21     }
    365  1.1     bjh21     else {
    366  1.1     bjh21         shiftCount = countLeadingZeros32( aSig0 ) - 11;
    367  1.1     bjh21         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    368  1.1     bjh21         *zExpPtr = 1 - shiftCount;
    369  1.1     bjh21     }
    370  1.1     bjh21 
    371  1.1     bjh21 }
    372  1.1     bjh21 
    373  1.1     bjh21 /*
    374  1.1     bjh21 -------------------------------------------------------------------------------
    375  1.1     bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by
    376  1.1     bjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
    377  1.1     bjh21 point value, returning the result.  After being shifted into the proper
    378  1.1     bjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
    379  1.1     bjh21 together to form the most significant 32 bits of the result.  This means
    380  1.1     bjh21 that any integer portion of `zSig0' will be added into the exponent.  Since
    381  1.1     bjh21 a properly normalized significand will have an integer portion equal to 1,
    382  1.1     bjh21 the `zExp' input should be 1 less than the desired result exponent whenever
    383  1.1     bjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
    384  1.1     bjh21 -------------------------------------------------------------------------------
    385  1.1     bjh21 */
    386  1.1     bjh21 INLINE float64
    387  1.1     bjh21  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
    388  1.1     bjh21 {
    389  1.1     bjh21 
    390  1.1     bjh21     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    391  1.1     bjh21 			   ( ( (bits64) zExp )<<52 ) +
    392  1.1     bjh21 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
    393  1.1     bjh21 
    394  1.1     bjh21 
    395  1.1     bjh21 }
    396  1.1     bjh21 
    397  1.1     bjh21 /*
    398  1.1     bjh21 -------------------------------------------------------------------------------
    399  1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    400  1.1     bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
    401  1.1     bjh21 and `zSig2', and returns the proper double-precision floating-point value
    402  1.1     bjh21 corresponding to the abstract input.  Ordinarily, the abstract value is
    403  1.1     bjh21 simply rounded and packed into the double-precision format, with the inexact
    404  1.1     bjh21 exception raised if the abstract input cannot be represented exactly.
    405  1.1     bjh21 However, if the abstract value is too large, the overflow and inexact
    406  1.1     bjh21 exceptions are raised and an infinity or maximal finite value is returned.
    407  1.1     bjh21 If the abstract value is too small, the input value is rounded to a
    408  1.1     bjh21 subnormal number, and the underflow and inexact exceptions are raised if the
    409  1.1     bjh21 abstract input cannot be represented exactly as a subnormal double-precision
    410  1.1     bjh21 floating-point number.
    411  1.1     bjh21     The input significand must be normalized or smaller.  If the input
    412  1.1     bjh21 significand is not normalized, `zExp' must be 0; in that case, the result
    413  1.1     bjh21 returned is a subnormal number, and it must not require rounding.  In the
    414  1.1     bjh21 usual case that the input significand is normalized, `zExp' must be 1 less
    415  1.1     bjh21 than the ``true'' floating-point exponent.  The handling of underflow and
    416  1.1     bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    417  1.1     bjh21 -------------------------------------------------------------------------------
    418  1.1     bjh21 */
    419  1.1     bjh21 static float64
    420  1.1     bjh21  roundAndPackFloat64(
    421  1.1     bjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
    422  1.1     bjh21 {
    423  1.1     bjh21     int8 roundingMode;
    424  1.1     bjh21     flag roundNearestEven, increment, isTiny;
    425  1.1     bjh21 
    426  1.1     bjh21     roundingMode = float_rounding_mode;
    427  1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    428  1.1     bjh21     increment = ( (sbits32) zSig2 < 0 );
    429  1.1     bjh21     if ( ! roundNearestEven ) {
    430  1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    431  1.1     bjh21             increment = 0;
    432  1.1     bjh21         }
    433  1.1     bjh21         else {
    434  1.1     bjh21             if ( zSign ) {
    435  1.1     bjh21                 increment = ( roundingMode == float_round_down ) && zSig2;
    436  1.1     bjh21             }
    437  1.1     bjh21             else {
    438  1.1     bjh21                 increment = ( roundingMode == float_round_up ) && zSig2;
    439  1.1     bjh21             }
    440  1.1     bjh21         }
    441  1.1     bjh21     }
    442  1.1     bjh21     if ( 0x7FD <= (bits16) zExp ) {
    443  1.1     bjh21         if (    ( 0x7FD < zExp )
    444  1.1     bjh21              || (    ( zExp == 0x7FD )
    445  1.1     bjh21                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
    446  1.1     bjh21                   && increment
    447  1.1     bjh21                 )
    448  1.1     bjh21            ) {
    449  1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
    450  1.1     bjh21             if (    ( roundingMode == float_round_to_zero )
    451  1.1     bjh21                  || ( zSign && ( roundingMode == float_round_up ) )
    452  1.1     bjh21                  || ( ! zSign && ( roundingMode == float_round_down ) )
    453  1.1     bjh21                ) {
    454  1.1     bjh21                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
    455  1.1     bjh21             }
    456  1.1     bjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
    457  1.1     bjh21         }
    458  1.1     bjh21         if ( zExp < 0 ) {
    459  1.1     bjh21             isTiny =
    460  1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    461  1.1     bjh21                 || ( zExp < -1 )
    462  1.1     bjh21                 || ! increment
    463  1.1     bjh21                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
    464  1.1     bjh21             shift64ExtraRightJamming(
    465  1.1     bjh21                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
    466  1.1     bjh21             zExp = 0;
    467  1.1     bjh21             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
    468  1.1     bjh21             if ( roundNearestEven ) {
    469  1.1     bjh21                 increment = ( (sbits32) zSig2 < 0 );
    470  1.1     bjh21             }
    471  1.1     bjh21             else {
    472  1.1     bjh21                 if ( zSign ) {
    473  1.1     bjh21                     increment = ( roundingMode == float_round_down ) && zSig2;
    474  1.1     bjh21                 }
    475  1.1     bjh21                 else {
    476  1.1     bjh21                     increment = ( roundingMode == float_round_up ) && zSig2;
    477  1.1     bjh21                 }
    478  1.1     bjh21             }
    479  1.1     bjh21         }
    480  1.1     bjh21     }
    481  1.3      matt     if ( zSig2 ) set_float_exception_inexact_flag();
    482  1.1     bjh21     if ( increment ) {
    483  1.1     bjh21         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
    484  1.1     bjh21         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
    485  1.1     bjh21     }
    486  1.1     bjh21     else {
    487  1.1     bjh21         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
    488  1.1     bjh21     }
    489  1.1     bjh21     return packFloat64( zSign, zExp, zSig0, zSig1 );
    490  1.1     bjh21 
    491  1.1     bjh21 }
    492  1.1     bjh21 
    493  1.1     bjh21 /*
    494  1.1     bjh21 -------------------------------------------------------------------------------
    495  1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    496  1.1     bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
    497  1.1     bjh21 returns the proper double-precision floating-point value corresponding
    498  1.1     bjh21 to the abstract input.  This routine is just like `roundAndPackFloat64'
    499  1.1     bjh21 except that the input significand has fewer bits and does not have to be
    500  1.1     bjh21 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
    501  1.1     bjh21 point exponent.
    502  1.1     bjh21 -------------------------------------------------------------------------------
    503  1.1     bjh21 */
    504  1.1     bjh21 static float64
    505  1.1     bjh21  normalizeRoundAndPackFloat64(
    506  1.1     bjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
    507  1.1     bjh21 {
    508  1.1     bjh21     int8 shiftCount;
    509  1.1     bjh21     bits32 zSig2;
    510  1.1     bjh21 
    511  1.1     bjh21     if ( zSig0 == 0 ) {
    512  1.1     bjh21         zSig0 = zSig1;
    513  1.1     bjh21         zSig1 = 0;
    514  1.1     bjh21         zExp -= 32;
    515  1.1     bjh21     }
    516  1.1     bjh21     shiftCount = countLeadingZeros32( zSig0 ) - 11;
    517  1.1     bjh21     if ( 0 <= shiftCount ) {
    518  1.1     bjh21         zSig2 = 0;
    519  1.1     bjh21         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    520  1.1     bjh21     }
    521  1.1     bjh21     else {
    522  1.1     bjh21         shift64ExtraRightJamming(
    523  1.1     bjh21             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
    524  1.1     bjh21     }
    525  1.1     bjh21     zExp -= shiftCount;
    526  1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
    527  1.1     bjh21 
    528  1.1     bjh21 }
    529  1.1     bjh21 
    530  1.1     bjh21 /*
    531  1.1     bjh21 -------------------------------------------------------------------------------
    532  1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
    533  1.1     bjh21 the single-precision floating-point format.  The conversion is performed
    534  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    535  1.1     bjh21 -------------------------------------------------------------------------------
    536  1.1     bjh21 */
    537  1.1     bjh21 float32 int32_to_float32( int32 a )
    538  1.1     bjh21 {
    539  1.1     bjh21     flag zSign;
    540  1.1     bjh21 
    541  1.1     bjh21     if ( a == 0 ) return 0;
    542  1.1     bjh21     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
    543  1.1     bjh21     zSign = ( a < 0 );
    544  1.2  christos     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
    545  1.1     bjh21 
    546  1.1     bjh21 }
    547  1.1     bjh21 
    548  1.1     bjh21 /*
    549  1.1     bjh21 -------------------------------------------------------------------------------
    550  1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
    551  1.1     bjh21 the double-precision floating-point format.  The conversion is performed
    552  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    553  1.1     bjh21 -------------------------------------------------------------------------------
    554  1.1     bjh21 */
    555  1.1     bjh21 float64 int32_to_float64( int32 a )
    556  1.1     bjh21 {
    557  1.1     bjh21     flag zSign;
    558  1.1     bjh21     bits32 absA;
    559  1.1     bjh21     int8 shiftCount;
    560  1.1     bjh21     bits32 zSig0, zSig1;
    561  1.1     bjh21 
    562  1.1     bjh21     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
    563  1.1     bjh21     zSign = ( a < 0 );
    564  1.1     bjh21     absA = zSign ? - a : a;
    565  1.1     bjh21     shiftCount = countLeadingZeros32( absA ) - 11;
    566  1.1     bjh21     if ( 0 <= shiftCount ) {
    567  1.1     bjh21         zSig0 = absA<<shiftCount;
    568  1.1     bjh21         zSig1 = 0;
    569  1.1     bjh21     }
    570  1.1     bjh21     else {
    571  1.1     bjh21         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
    572  1.1     bjh21     }
    573  1.1     bjh21     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
    574  1.1     bjh21 
    575  1.1     bjh21 }
    576  1.1     bjh21 
    577  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
    578  1.1     bjh21 /*
    579  1.1     bjh21 -------------------------------------------------------------------------------
    580  1.1     bjh21 Returns the result of converting the single-precision floating-point value
    581  1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
    582  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
    583  1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
    584  1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
    585  1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
    586  1.1     bjh21 largest integer with the same sign as `a' is returned.
    587  1.1     bjh21 -------------------------------------------------------------------------------
    588  1.1     bjh21 */
    589  1.1     bjh21 int32 float32_to_int32( float32 a )
    590  1.1     bjh21 {
    591  1.1     bjh21     flag aSign;
    592  1.1     bjh21     int16 aExp, shiftCount;
    593  1.1     bjh21     bits32 aSig, aSigExtra;
    594  1.1     bjh21     int32 z;
    595  1.1     bjh21     int8 roundingMode;
    596  1.1     bjh21 
    597  1.1     bjh21     aSig = extractFloat32Frac( a );
    598  1.1     bjh21     aExp = extractFloat32Exp( a );
    599  1.1     bjh21     aSign = extractFloat32Sign( a );
    600  1.1     bjh21     shiftCount = aExp - 0x96;
    601  1.1     bjh21     if ( 0 <= shiftCount ) {
    602  1.1     bjh21         if ( 0x9E <= aExp ) {
    603  1.1     bjh21             if ( a != 0xCF000000 ) {
    604  1.1     bjh21                 float_raise( float_flag_invalid );
    605  1.1     bjh21                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
    606  1.1     bjh21                     return 0x7FFFFFFF;
    607  1.1     bjh21                 }
    608  1.1     bjh21             }
    609  1.1     bjh21             return (sbits32) 0x80000000;
    610  1.1     bjh21         }
    611  1.1     bjh21         z = ( aSig | 0x00800000 )<<shiftCount;
    612  1.1     bjh21         if ( aSign ) z = - z;
    613  1.1     bjh21     }
    614  1.1     bjh21     else {
    615  1.1     bjh21         if ( aExp < 0x7E ) {
    616  1.1     bjh21             aSigExtra = aExp | aSig;
    617  1.1     bjh21             z = 0;
    618  1.1     bjh21         }
    619  1.1     bjh21         else {
    620  1.1     bjh21             aSig |= 0x00800000;
    621  1.1     bjh21             aSigExtra = aSig<<( shiftCount & 31 );
    622  1.1     bjh21             z = aSig>>( - shiftCount );
    623  1.1     bjh21         }
    624  1.3      matt         if ( aSigExtra ) set_float_exception_inexact_flag();
    625  1.1     bjh21         roundingMode = float_rounding_mode;
    626  1.1     bjh21         if ( roundingMode == float_round_nearest_even ) {
    627  1.1     bjh21             if ( (sbits32) aSigExtra < 0 ) {
    628  1.1     bjh21                 ++z;
    629  1.1     bjh21                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
    630  1.1     bjh21             }
    631  1.1     bjh21             if ( aSign ) z = - z;
    632  1.1     bjh21         }
    633  1.1     bjh21         else {
    634  1.1     bjh21             aSigExtra = ( aSigExtra != 0 );
    635  1.1     bjh21             if ( aSign ) {
    636  1.1     bjh21                 z += ( roundingMode == float_round_down ) & aSigExtra;
    637  1.1     bjh21                 z = - z;
    638  1.1     bjh21             }
    639  1.1     bjh21             else {
    640  1.1     bjh21                 z += ( roundingMode == float_round_up ) & aSigExtra;
    641  1.1     bjh21             }
    642  1.1     bjh21         }
    643  1.1     bjh21     }
    644  1.1     bjh21     return z;
    645  1.1     bjh21 
    646  1.1     bjh21 }
    647  1.1     bjh21 #endif
    648  1.1     bjh21 
    649  1.1     bjh21 /*
    650  1.1     bjh21 -------------------------------------------------------------------------------
    651  1.1     bjh21 Returns the result of converting the single-precision floating-point value
    652  1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
    653  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
    654  1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
    655  1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
    656  1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
    657  1.1     bjh21 returned.
    658  1.1     bjh21 -------------------------------------------------------------------------------
    659  1.1     bjh21 */
    660  1.1     bjh21 int32 float32_to_int32_round_to_zero( float32 a )
    661  1.1     bjh21 {
    662  1.1     bjh21     flag aSign;
    663  1.1     bjh21     int16 aExp, shiftCount;
    664  1.1     bjh21     bits32 aSig;
    665  1.1     bjh21     int32 z;
    666  1.1     bjh21 
    667  1.1     bjh21     aSig = extractFloat32Frac( a );
    668  1.1     bjh21     aExp = extractFloat32Exp( a );
    669  1.1     bjh21     aSign = extractFloat32Sign( a );
    670  1.1     bjh21     shiftCount = aExp - 0x9E;
    671  1.1     bjh21     if ( 0 <= shiftCount ) {
    672  1.1     bjh21         if ( a != 0xCF000000 ) {
    673  1.1     bjh21             float_raise( float_flag_invalid );
    674  1.1     bjh21             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
    675  1.1     bjh21         }
    676  1.1     bjh21         return (sbits32) 0x80000000;
    677  1.1     bjh21     }
    678  1.1     bjh21     else if ( aExp <= 0x7E ) {
    679  1.3      matt         if ( aExp | aSig ) set_float_exception_inexact_flag();
    680  1.1     bjh21         return 0;
    681  1.1     bjh21     }
    682  1.1     bjh21     aSig = ( aSig | 0x00800000 )<<8;
    683  1.1     bjh21     z = aSig>>( - shiftCount );
    684  1.1     bjh21     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
    685  1.3      matt         set_float_exception_inexact_flag();
    686  1.1     bjh21     }
    687  1.1     bjh21     if ( aSign ) z = - z;
    688  1.1     bjh21     return z;
    689  1.1     bjh21 
    690  1.1     bjh21 }
    691  1.1     bjh21 
    692  1.1     bjh21 /*
    693  1.1     bjh21 -------------------------------------------------------------------------------
    694  1.1     bjh21 Returns the result of converting the single-precision floating-point value
    695  1.1     bjh21 `a' to the double-precision floating-point format.  The conversion is
    696  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
    697  1.1     bjh21 Arithmetic.
    698  1.1     bjh21 -------------------------------------------------------------------------------
    699  1.1     bjh21 */
    700  1.1     bjh21 float64 float32_to_float64( float32 a )
    701  1.1     bjh21 {
    702  1.1     bjh21     flag aSign;
    703  1.1     bjh21     int16 aExp;
    704  1.1     bjh21     bits32 aSig, zSig0, zSig1;
    705  1.1     bjh21 
    706  1.1     bjh21     aSig = extractFloat32Frac( a );
    707  1.1     bjh21     aExp = extractFloat32Exp( a );
    708  1.1     bjh21     aSign = extractFloat32Sign( a );
    709  1.1     bjh21     if ( aExp == 0xFF ) {
    710  1.1     bjh21         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
    711  1.1     bjh21         return packFloat64( aSign, 0x7FF, 0, 0 );
    712  1.1     bjh21     }
    713  1.1     bjh21     if ( aExp == 0 ) {
    714  1.1     bjh21         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
    715  1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
    716  1.1     bjh21         --aExp;
    717  1.1     bjh21     }
    718  1.1     bjh21     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
    719  1.1     bjh21     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
    720  1.1     bjh21 
    721  1.1     bjh21 }
    722  1.1     bjh21 
    723  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
    724  1.1     bjh21 /*
    725  1.1     bjh21 -------------------------------------------------------------------------------
    726  1.1     bjh21 Rounds the single-precision floating-point value `a' to an integer,
    727  1.1     bjh21 and returns the result as a single-precision floating-point value.  The
    728  1.1     bjh21 operation is performed according to the IEC/IEEE Standard for Binary
    729  1.1     bjh21 Floating-Point Arithmetic.
    730  1.1     bjh21 -------------------------------------------------------------------------------
    731  1.1     bjh21 */
    732  1.1     bjh21 float32 float32_round_to_int( float32 a )
    733  1.1     bjh21 {
    734  1.1     bjh21     flag aSign;
    735  1.1     bjh21     int16 aExp;
    736  1.1     bjh21     bits32 lastBitMask, roundBitsMask;
    737  1.1     bjh21     int8 roundingMode;
    738  1.1     bjh21     float32 z;
    739  1.1     bjh21 
    740  1.1     bjh21     aExp = extractFloat32Exp( a );
    741  1.1     bjh21     if ( 0x96 <= aExp ) {
    742  1.1     bjh21         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
    743  1.1     bjh21             return propagateFloat32NaN( a, a );
    744  1.1     bjh21         }
    745  1.1     bjh21         return a;
    746  1.1     bjh21     }
    747  1.1     bjh21     if ( aExp <= 0x7E ) {
    748  1.1     bjh21         if ( (bits32) ( a<<1 ) == 0 ) return a;
    749  1.3      matt         set_float_exception_inexact_flag();
    750  1.1     bjh21         aSign = extractFloat32Sign( a );
    751  1.1     bjh21         switch ( float_rounding_mode ) {
    752  1.1     bjh21          case float_round_nearest_even:
    753  1.1     bjh21             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
    754  1.1     bjh21                 return packFloat32( aSign, 0x7F, 0 );
    755  1.1     bjh21             }
    756  1.1     bjh21             break;
    757  1.1     bjh21 	 case float_round_to_zero:
    758  1.1     bjh21 	    break;
    759  1.1     bjh21          case float_round_down:
    760  1.1     bjh21             return aSign ? 0xBF800000 : 0;
    761  1.1     bjh21          case float_round_up:
    762  1.1     bjh21             return aSign ? 0x80000000 : 0x3F800000;
    763  1.1     bjh21         }
    764  1.1     bjh21         return packFloat32( aSign, 0, 0 );
    765  1.1     bjh21     }
    766  1.1     bjh21     lastBitMask = 1;
    767  1.1     bjh21     lastBitMask <<= 0x96 - aExp;
    768  1.1     bjh21     roundBitsMask = lastBitMask - 1;
    769  1.1     bjh21     z = a;
    770  1.1     bjh21     roundingMode = float_rounding_mode;
    771  1.1     bjh21     if ( roundingMode == float_round_nearest_even ) {
    772  1.1     bjh21         z += lastBitMask>>1;
    773  1.1     bjh21         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
    774  1.1     bjh21     }
    775  1.1     bjh21     else if ( roundingMode != float_round_to_zero ) {
    776  1.1     bjh21         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
    777  1.1     bjh21             z += roundBitsMask;
    778  1.1     bjh21         }
    779  1.1     bjh21     }
    780  1.1     bjh21     z &= ~ roundBitsMask;
    781  1.3      matt     if ( z != a ) set_float_exception_inexact_flag();
    782  1.1     bjh21     return z;
    783  1.1     bjh21 
    784  1.1     bjh21 }
    785  1.1     bjh21 #endif
    786  1.1     bjh21 
    787  1.1     bjh21 /*
    788  1.1     bjh21 -------------------------------------------------------------------------------
    789  1.1     bjh21 Returns the result of adding the absolute values of the single-precision
    790  1.1     bjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
    791  1.1     bjh21 before being returned.  `zSign' is ignored if the result is a NaN.
    792  1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
    793  1.1     bjh21 Floating-Point Arithmetic.
    794  1.1     bjh21 -------------------------------------------------------------------------------
    795  1.1     bjh21 */
    796  1.1     bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
    797  1.1     bjh21 {
    798  1.1     bjh21     int16 aExp, bExp, zExp;
    799  1.1     bjh21     bits32 aSig, bSig, zSig;
    800  1.1     bjh21     int16 expDiff;
    801  1.1     bjh21 
    802  1.1     bjh21     aSig = extractFloat32Frac( a );
    803  1.1     bjh21     aExp = extractFloat32Exp( a );
    804  1.1     bjh21     bSig = extractFloat32Frac( b );
    805  1.1     bjh21     bExp = extractFloat32Exp( b );
    806  1.1     bjh21     expDiff = aExp - bExp;
    807  1.1     bjh21     aSig <<= 6;
    808  1.1     bjh21     bSig <<= 6;
    809  1.1     bjh21     if ( 0 < expDiff ) {
    810  1.1     bjh21         if ( aExp == 0xFF ) {
    811  1.1     bjh21             if ( aSig ) return propagateFloat32NaN( a, b );
    812  1.1     bjh21             return a;
    813  1.1     bjh21         }
    814  1.1     bjh21         if ( bExp == 0 ) {
    815  1.1     bjh21             --expDiff;
    816  1.1     bjh21         }
    817  1.1     bjh21         else {
    818  1.1     bjh21             bSig |= 0x20000000;
    819  1.1     bjh21         }
    820  1.1     bjh21         shift32RightJamming( bSig, expDiff, &bSig );
    821  1.1     bjh21         zExp = aExp;
    822  1.1     bjh21     }
    823  1.1     bjh21     else if ( expDiff < 0 ) {
    824  1.1     bjh21         if ( bExp == 0xFF ) {
    825  1.1     bjh21             if ( bSig ) return propagateFloat32NaN( a, b );
    826  1.1     bjh21             return packFloat32( zSign, 0xFF, 0 );
    827  1.1     bjh21         }
    828  1.1     bjh21         if ( aExp == 0 ) {
    829  1.1     bjh21             ++expDiff;
    830  1.1     bjh21         }
    831  1.1     bjh21         else {
    832  1.1     bjh21             aSig |= 0x20000000;
    833  1.1     bjh21         }
    834  1.1     bjh21         shift32RightJamming( aSig, - expDiff, &aSig );
    835  1.1     bjh21         zExp = bExp;
    836  1.1     bjh21     }
    837  1.1     bjh21     else {
    838  1.1     bjh21         if ( aExp == 0xFF ) {
    839  1.1     bjh21             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
    840  1.1     bjh21             return a;
    841  1.1     bjh21         }
    842  1.1     bjh21         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
    843  1.1     bjh21         zSig = 0x40000000 + aSig + bSig;
    844  1.1     bjh21         zExp = aExp;
    845  1.1     bjh21         goto roundAndPack;
    846  1.1     bjh21     }
    847  1.1     bjh21     aSig |= 0x20000000;
    848  1.1     bjh21     zSig = ( aSig + bSig )<<1;
    849  1.1     bjh21     --zExp;
    850  1.1     bjh21     if ( (sbits32) zSig < 0 ) {
    851  1.1     bjh21         zSig = aSig + bSig;
    852  1.1     bjh21         ++zExp;
    853  1.1     bjh21     }
    854  1.1     bjh21  roundAndPack:
    855  1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig );
    856  1.1     bjh21 
    857  1.1     bjh21 }
    858  1.1     bjh21 
    859  1.1     bjh21 /*
    860  1.1     bjh21 -------------------------------------------------------------------------------
    861  1.1     bjh21 Returns the result of subtracting the absolute values of the single-
    862  1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
    863  1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
    864  1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
    865  1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
    866  1.1     bjh21 -------------------------------------------------------------------------------
    867  1.1     bjh21 */
    868  1.1     bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
    869  1.1     bjh21 {
    870  1.1     bjh21     int16 aExp, bExp, zExp;
    871  1.1     bjh21     bits32 aSig, bSig, zSig;
    872  1.1     bjh21     int16 expDiff;
    873  1.1     bjh21 
    874  1.1     bjh21     aSig = extractFloat32Frac( a );
    875  1.1     bjh21     aExp = extractFloat32Exp( a );
    876  1.1     bjh21     bSig = extractFloat32Frac( b );
    877  1.1     bjh21     bExp = extractFloat32Exp( b );
    878  1.1     bjh21     expDiff = aExp - bExp;
    879  1.1     bjh21     aSig <<= 7;
    880  1.1     bjh21     bSig <<= 7;
    881  1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
    882  1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
    883  1.1     bjh21     if ( aExp == 0xFF ) {
    884  1.1     bjh21         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
    885  1.1     bjh21         float_raise( float_flag_invalid );
    886  1.1     bjh21         return float32_default_nan;
    887  1.1     bjh21     }
    888  1.1     bjh21     if ( aExp == 0 ) {
    889  1.1     bjh21         aExp = 1;
    890  1.1     bjh21         bExp = 1;
    891  1.1     bjh21     }
    892  1.1     bjh21     if ( bSig < aSig ) goto aBigger;
    893  1.1     bjh21     if ( aSig < bSig ) goto bBigger;
    894  1.1     bjh21     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
    895  1.1     bjh21  bExpBigger:
    896  1.1     bjh21     if ( bExp == 0xFF ) {
    897  1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
    898  1.1     bjh21         return packFloat32( zSign ^ 1, 0xFF, 0 );
    899  1.1     bjh21     }
    900  1.1     bjh21     if ( aExp == 0 ) {
    901  1.1     bjh21         ++expDiff;
    902  1.1     bjh21     }
    903  1.1     bjh21     else {
    904  1.1     bjh21         aSig |= 0x40000000;
    905  1.1     bjh21     }
    906  1.1     bjh21     shift32RightJamming( aSig, - expDiff, &aSig );
    907  1.1     bjh21     bSig |= 0x40000000;
    908  1.1     bjh21  bBigger:
    909  1.1     bjh21     zSig = bSig - aSig;
    910  1.1     bjh21     zExp = bExp;
    911  1.1     bjh21     zSign ^= 1;
    912  1.1     bjh21     goto normalizeRoundAndPack;
    913  1.1     bjh21  aExpBigger:
    914  1.1     bjh21     if ( aExp == 0xFF ) {
    915  1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, b );
    916  1.1     bjh21         return a;
    917  1.1     bjh21     }
    918  1.1     bjh21     if ( bExp == 0 ) {
    919  1.1     bjh21         --expDiff;
    920  1.1     bjh21     }
    921  1.1     bjh21     else {
    922  1.1     bjh21         bSig |= 0x40000000;
    923  1.1     bjh21     }
    924  1.1     bjh21     shift32RightJamming( bSig, expDiff, &bSig );
    925  1.1     bjh21     aSig |= 0x40000000;
    926  1.1     bjh21  aBigger:
    927  1.1     bjh21     zSig = aSig - bSig;
    928  1.1     bjh21     zExp = aExp;
    929  1.1     bjh21  normalizeRoundAndPack:
    930  1.1     bjh21     --zExp;
    931  1.1     bjh21     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
    932  1.1     bjh21 
    933  1.1     bjh21 }
    934  1.1     bjh21 
    935  1.1     bjh21 /*
    936  1.1     bjh21 -------------------------------------------------------------------------------
    937  1.1     bjh21 Returns the result of adding the single-precision floating-point values `a'
    938  1.1     bjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
    939  1.1     bjh21 Binary Floating-Point Arithmetic.
    940  1.1     bjh21 -------------------------------------------------------------------------------
    941  1.1     bjh21 */
    942  1.1     bjh21 float32 float32_add( float32 a, float32 b )
    943  1.1     bjh21 {
    944  1.1     bjh21     flag aSign, bSign;
    945  1.1     bjh21 
    946  1.1     bjh21     aSign = extractFloat32Sign( a );
    947  1.1     bjh21     bSign = extractFloat32Sign( b );
    948  1.1     bjh21     if ( aSign == bSign ) {
    949  1.1     bjh21         return addFloat32Sigs( a, b, aSign );
    950  1.1     bjh21     }
    951  1.1     bjh21     else {
    952  1.1     bjh21         return subFloat32Sigs( a, b, aSign );
    953  1.1     bjh21     }
    954  1.1     bjh21 
    955  1.1     bjh21 }
    956  1.1     bjh21 
    957  1.1     bjh21 /*
    958  1.1     bjh21 -------------------------------------------------------------------------------
    959  1.1     bjh21 Returns the result of subtracting the single-precision floating-point values
    960  1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
    961  1.1     bjh21 for Binary Floating-Point Arithmetic.
    962  1.1     bjh21 -------------------------------------------------------------------------------
    963  1.1     bjh21 */
    964  1.1     bjh21 float32 float32_sub( float32 a, float32 b )
    965  1.1     bjh21 {
    966  1.1     bjh21     flag aSign, bSign;
    967  1.1     bjh21 
    968  1.1     bjh21     aSign = extractFloat32Sign( a );
    969  1.1     bjh21     bSign = extractFloat32Sign( b );
    970  1.1     bjh21     if ( aSign == bSign ) {
    971  1.1     bjh21         return subFloat32Sigs( a, b, aSign );
    972  1.1     bjh21     }
    973  1.1     bjh21     else {
    974  1.1     bjh21         return addFloat32Sigs( a, b, aSign );
    975  1.1     bjh21     }
    976  1.1     bjh21 
    977  1.1     bjh21 }
    978  1.1     bjh21 
    979  1.1     bjh21 /*
    980  1.1     bjh21 -------------------------------------------------------------------------------
    981  1.1     bjh21 Returns the result of multiplying the single-precision floating-point values
    982  1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
    983  1.1     bjh21 for Binary Floating-Point Arithmetic.
    984  1.1     bjh21 -------------------------------------------------------------------------------
    985  1.1     bjh21 */
    986  1.1     bjh21 float32 float32_mul( float32 a, float32 b )
    987  1.1     bjh21 {
    988  1.1     bjh21     flag aSign, bSign, zSign;
    989  1.1     bjh21     int16 aExp, bExp, zExp;
    990  1.1     bjh21     bits32 aSig, bSig, zSig0, zSig1;
    991  1.1     bjh21 
    992  1.1     bjh21     aSig = extractFloat32Frac( a );
    993  1.1     bjh21     aExp = extractFloat32Exp( a );
    994  1.1     bjh21     aSign = extractFloat32Sign( a );
    995  1.1     bjh21     bSig = extractFloat32Frac( b );
    996  1.1     bjh21     bExp = extractFloat32Exp( b );
    997  1.1     bjh21     bSign = extractFloat32Sign( b );
    998  1.1     bjh21     zSign = aSign ^ bSign;
    999  1.1     bjh21     if ( aExp == 0xFF ) {
   1000  1.1     bjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1001  1.1     bjh21             return propagateFloat32NaN( a, b );
   1002  1.1     bjh21         }
   1003  1.1     bjh21         if ( ( bExp | bSig ) == 0 ) {
   1004  1.1     bjh21             float_raise( float_flag_invalid );
   1005  1.1     bjh21             return float32_default_nan;
   1006  1.1     bjh21         }
   1007  1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1008  1.1     bjh21     }
   1009  1.1     bjh21     if ( bExp == 0xFF ) {
   1010  1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1011  1.1     bjh21         if ( ( aExp | aSig ) == 0 ) {
   1012  1.1     bjh21             float_raise( float_flag_invalid );
   1013  1.1     bjh21             return float32_default_nan;
   1014  1.1     bjh21         }
   1015  1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1016  1.1     bjh21     }
   1017  1.1     bjh21     if ( aExp == 0 ) {
   1018  1.1     bjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1019  1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1020  1.1     bjh21     }
   1021  1.1     bjh21     if ( bExp == 0 ) {
   1022  1.1     bjh21         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1023  1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1024  1.1     bjh21     }
   1025  1.1     bjh21     zExp = aExp + bExp - 0x7F;
   1026  1.1     bjh21     aSig = ( aSig | 0x00800000 )<<7;
   1027  1.1     bjh21     bSig = ( bSig | 0x00800000 )<<8;
   1028  1.1     bjh21     mul32To64( aSig, bSig, &zSig0, &zSig1 );
   1029  1.1     bjh21     zSig0 |= ( zSig1 != 0 );
   1030  1.1     bjh21     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
   1031  1.1     bjh21         zSig0 <<= 1;
   1032  1.1     bjh21         --zExp;
   1033  1.1     bjh21     }
   1034  1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig0 );
   1035  1.1     bjh21 
   1036  1.1     bjh21 }
   1037  1.1     bjh21 
   1038  1.1     bjh21 /*
   1039  1.1     bjh21 -------------------------------------------------------------------------------
   1040  1.1     bjh21 Returns the result of dividing the single-precision floating-point value `a'
   1041  1.1     bjh21 by the corresponding value `b'.  The operation is performed according to the
   1042  1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1043  1.1     bjh21 -------------------------------------------------------------------------------
   1044  1.1     bjh21 */
   1045  1.1     bjh21 float32 float32_div( float32 a, float32 b )
   1046  1.1     bjh21 {
   1047  1.1     bjh21     flag aSign, bSign, zSign;
   1048  1.1     bjh21     int16 aExp, bExp, zExp;
   1049  1.1     bjh21     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
   1050  1.1     bjh21 
   1051  1.1     bjh21     aSig = extractFloat32Frac( a );
   1052  1.1     bjh21     aExp = extractFloat32Exp( a );
   1053  1.1     bjh21     aSign = extractFloat32Sign( a );
   1054  1.1     bjh21     bSig = extractFloat32Frac( b );
   1055  1.1     bjh21     bExp = extractFloat32Exp( b );
   1056  1.1     bjh21     bSign = extractFloat32Sign( b );
   1057  1.1     bjh21     zSign = aSign ^ bSign;
   1058  1.1     bjh21     if ( aExp == 0xFF ) {
   1059  1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, b );
   1060  1.1     bjh21         if ( bExp == 0xFF ) {
   1061  1.1     bjh21             if ( bSig ) return propagateFloat32NaN( a, b );
   1062  1.1     bjh21             float_raise( float_flag_invalid );
   1063  1.1     bjh21             return float32_default_nan;
   1064  1.1     bjh21         }
   1065  1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1066  1.1     bjh21     }
   1067  1.1     bjh21     if ( bExp == 0xFF ) {
   1068  1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1069  1.1     bjh21         return packFloat32( zSign, 0, 0 );
   1070  1.1     bjh21     }
   1071  1.1     bjh21     if ( bExp == 0 ) {
   1072  1.1     bjh21         if ( bSig == 0 ) {
   1073  1.1     bjh21             if ( ( aExp | aSig ) == 0 ) {
   1074  1.1     bjh21                 float_raise( float_flag_invalid );
   1075  1.1     bjh21                 return float32_default_nan;
   1076  1.1     bjh21             }
   1077  1.1     bjh21             float_raise( float_flag_divbyzero );
   1078  1.1     bjh21             return packFloat32( zSign, 0xFF, 0 );
   1079  1.1     bjh21         }
   1080  1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1081  1.1     bjh21     }
   1082  1.1     bjh21     if ( aExp == 0 ) {
   1083  1.1     bjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1084  1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1085  1.1     bjh21     }
   1086  1.1     bjh21     zExp = aExp - bExp + 0x7D;
   1087  1.1     bjh21     aSig = ( aSig | 0x00800000 )<<7;
   1088  1.1     bjh21     bSig = ( bSig | 0x00800000 )<<8;
   1089  1.1     bjh21     if ( bSig <= ( aSig + aSig ) ) {
   1090  1.1     bjh21         aSig >>= 1;
   1091  1.1     bjh21         ++zExp;
   1092  1.1     bjh21     }
   1093  1.1     bjh21     zSig = estimateDiv64To32( aSig, 0, bSig );
   1094  1.1     bjh21     if ( ( zSig & 0x3F ) <= 2 ) {
   1095  1.1     bjh21         mul32To64( bSig, zSig, &term0, &term1 );
   1096  1.1     bjh21         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
   1097  1.1     bjh21         while ( (sbits32) rem0 < 0 ) {
   1098  1.1     bjh21             --zSig;
   1099  1.1     bjh21             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
   1100  1.1     bjh21         }
   1101  1.1     bjh21         zSig |= ( rem1 != 0 );
   1102  1.1     bjh21     }
   1103  1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig );
   1104  1.1     bjh21 
   1105  1.1     bjh21 }
   1106  1.1     bjh21 
   1107  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   1108  1.1     bjh21 /*
   1109  1.1     bjh21 -------------------------------------------------------------------------------
   1110  1.1     bjh21 Returns the remainder of the single-precision floating-point value `a'
   1111  1.1     bjh21 with respect to the corresponding value `b'.  The operation is performed
   1112  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1113  1.1     bjh21 -------------------------------------------------------------------------------
   1114  1.1     bjh21 */
   1115  1.1     bjh21 float32 float32_rem( float32 a, float32 b )
   1116  1.1     bjh21 {
   1117  1.1     bjh21     flag aSign, bSign, zSign;
   1118  1.1     bjh21     int16 aExp, bExp, expDiff;
   1119  1.1     bjh21     bits32 aSig, bSig, q, allZero, alternateASig;
   1120  1.1     bjh21     sbits32 sigMean;
   1121  1.1     bjh21 
   1122  1.1     bjh21     aSig = extractFloat32Frac( a );
   1123  1.1     bjh21     aExp = extractFloat32Exp( a );
   1124  1.1     bjh21     aSign = extractFloat32Sign( a );
   1125  1.1     bjh21     bSig = extractFloat32Frac( b );
   1126  1.1     bjh21     bExp = extractFloat32Exp( b );
   1127  1.1     bjh21     bSign = extractFloat32Sign( b );
   1128  1.1     bjh21     if ( aExp == 0xFF ) {
   1129  1.1     bjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1130  1.1     bjh21             return propagateFloat32NaN( a, b );
   1131  1.1     bjh21         }
   1132  1.1     bjh21         float_raise( float_flag_invalid );
   1133  1.1     bjh21         return float32_default_nan;
   1134  1.1     bjh21     }
   1135  1.1     bjh21     if ( bExp == 0xFF ) {
   1136  1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1137  1.1     bjh21         return a;
   1138  1.1     bjh21     }
   1139  1.1     bjh21     if ( bExp == 0 ) {
   1140  1.1     bjh21         if ( bSig == 0 ) {
   1141  1.1     bjh21             float_raise( float_flag_invalid );
   1142  1.1     bjh21             return float32_default_nan;
   1143  1.1     bjh21         }
   1144  1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1145  1.1     bjh21     }
   1146  1.1     bjh21     if ( aExp == 0 ) {
   1147  1.1     bjh21         if ( aSig == 0 ) return a;
   1148  1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1149  1.1     bjh21     }
   1150  1.1     bjh21     expDiff = aExp - bExp;
   1151  1.1     bjh21     aSig = ( aSig | 0x00800000 )<<8;
   1152  1.1     bjh21     bSig = ( bSig | 0x00800000 )<<8;
   1153  1.1     bjh21     if ( expDiff < 0 ) {
   1154  1.1     bjh21         if ( expDiff < -1 ) return a;
   1155  1.1     bjh21         aSig >>= 1;
   1156  1.1     bjh21     }
   1157  1.1     bjh21     q = ( bSig <= aSig );
   1158  1.1     bjh21     if ( q ) aSig -= bSig;
   1159  1.1     bjh21     expDiff -= 32;
   1160  1.1     bjh21     while ( 0 < expDiff ) {
   1161  1.1     bjh21         q = estimateDiv64To32( aSig, 0, bSig );
   1162  1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   1163  1.1     bjh21         aSig = - ( ( bSig>>2 ) * q );
   1164  1.1     bjh21         expDiff -= 30;
   1165  1.1     bjh21     }
   1166  1.1     bjh21     expDiff += 32;
   1167  1.1     bjh21     if ( 0 < expDiff ) {
   1168  1.1     bjh21         q = estimateDiv64To32( aSig, 0, bSig );
   1169  1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   1170  1.1     bjh21         q >>= 32 - expDiff;
   1171  1.1     bjh21         bSig >>= 2;
   1172  1.1     bjh21         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   1173  1.1     bjh21     }
   1174  1.1     bjh21     else {
   1175  1.1     bjh21         aSig >>= 2;
   1176  1.1     bjh21         bSig >>= 2;
   1177  1.1     bjh21     }
   1178  1.1     bjh21     do {
   1179  1.1     bjh21         alternateASig = aSig;
   1180  1.1     bjh21         ++q;
   1181  1.1     bjh21         aSig -= bSig;
   1182  1.1     bjh21     } while ( 0 <= (sbits32) aSig );
   1183  1.1     bjh21     sigMean = aSig + alternateASig;
   1184  1.1     bjh21     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   1185  1.1     bjh21         aSig = alternateASig;
   1186  1.1     bjh21     }
   1187  1.1     bjh21     zSign = ( (sbits32) aSig < 0 );
   1188  1.1     bjh21     if ( zSign ) aSig = - aSig;
   1189  1.1     bjh21     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   1190  1.1     bjh21 
   1191  1.1     bjh21 }
   1192  1.1     bjh21 #endif
   1193  1.1     bjh21 
   1194  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   1195  1.1     bjh21 /*
   1196  1.1     bjh21 -------------------------------------------------------------------------------
   1197  1.1     bjh21 Returns the square root of the single-precision floating-point value `a'.
   1198  1.1     bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
   1199  1.1     bjh21 Floating-Point Arithmetic.
   1200  1.1     bjh21 -------------------------------------------------------------------------------
   1201  1.1     bjh21 */
   1202  1.1     bjh21 float32 float32_sqrt( float32 a )
   1203  1.1     bjh21 {
   1204  1.1     bjh21     flag aSign;
   1205  1.1     bjh21     int16 aExp, zExp;
   1206  1.1     bjh21     bits32 aSig, zSig, rem0, rem1, term0, term1;
   1207  1.1     bjh21 
   1208  1.1     bjh21     aSig = extractFloat32Frac( a );
   1209  1.1     bjh21     aExp = extractFloat32Exp( a );
   1210  1.1     bjh21     aSign = extractFloat32Sign( a );
   1211  1.1     bjh21     if ( aExp == 0xFF ) {
   1212  1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, 0 );
   1213  1.1     bjh21         if ( ! aSign ) return a;
   1214  1.1     bjh21         float_raise( float_flag_invalid );
   1215  1.1     bjh21         return float32_default_nan;
   1216  1.1     bjh21     }
   1217  1.1     bjh21     if ( aSign ) {
   1218  1.1     bjh21         if ( ( aExp | aSig ) == 0 ) return a;
   1219  1.1     bjh21         float_raise( float_flag_invalid );
   1220  1.1     bjh21         return float32_default_nan;
   1221  1.1     bjh21     }
   1222  1.1     bjh21     if ( aExp == 0 ) {
   1223  1.1     bjh21         if ( aSig == 0 ) return 0;
   1224  1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1225  1.1     bjh21     }
   1226  1.1     bjh21     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   1227  1.1     bjh21     aSig = ( aSig | 0x00800000 )<<8;
   1228  1.1     bjh21     zSig = estimateSqrt32( aExp, aSig ) + 2;
   1229  1.1     bjh21     if ( ( zSig & 0x7F ) <= 5 ) {
   1230  1.1     bjh21         if ( zSig < 2 ) {
   1231  1.1     bjh21             zSig = 0x7FFFFFFF;
   1232  1.1     bjh21             goto roundAndPack;
   1233  1.1     bjh21         }
   1234  1.1     bjh21         else {
   1235  1.1     bjh21             aSig >>= aExp & 1;
   1236  1.1     bjh21             mul32To64( zSig, zSig, &term0, &term1 );
   1237  1.1     bjh21             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
   1238  1.1     bjh21             while ( (sbits32) rem0 < 0 ) {
   1239  1.1     bjh21                 --zSig;
   1240  1.1     bjh21                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
   1241  1.1     bjh21                 term1 |= 1;
   1242  1.1     bjh21                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
   1243  1.1     bjh21             }
   1244  1.1     bjh21             zSig |= ( ( rem0 | rem1 ) != 0 );
   1245  1.1     bjh21         }
   1246  1.1     bjh21     }
   1247  1.1     bjh21     shift32RightJamming( zSig, 1, &zSig );
   1248  1.1     bjh21  roundAndPack:
   1249  1.1     bjh21     return roundAndPackFloat32( 0, zExp, zSig );
   1250  1.1     bjh21 
   1251  1.1     bjh21 }
   1252  1.1     bjh21 #endif
   1253  1.1     bjh21 
   1254  1.1     bjh21 /*
   1255  1.1     bjh21 -------------------------------------------------------------------------------
   1256  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
   1257  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   1258  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1259  1.1     bjh21 -------------------------------------------------------------------------------
   1260  1.1     bjh21 */
   1261  1.1     bjh21 flag float32_eq( float32 a, float32 b )
   1262  1.1     bjh21 {
   1263  1.1     bjh21 
   1264  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1265  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1266  1.1     bjh21        ) {
   1267  1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1268  1.1     bjh21             float_raise( float_flag_invalid );
   1269  1.1     bjh21         }
   1270  1.1     bjh21         return 0;
   1271  1.1     bjh21     }
   1272  1.1     bjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1273  1.1     bjh21 
   1274  1.1     bjh21 }
   1275  1.1     bjh21 
   1276  1.1     bjh21 /*
   1277  1.1     bjh21 -------------------------------------------------------------------------------
   1278  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   1279  1.1     bjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   1280  1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1281  1.1     bjh21 Arithmetic.
   1282  1.1     bjh21 -------------------------------------------------------------------------------
   1283  1.1     bjh21 */
   1284  1.1     bjh21 flag float32_le( float32 a, float32 b )
   1285  1.1     bjh21 {
   1286  1.1     bjh21     flag aSign, bSign;
   1287  1.1     bjh21 
   1288  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1289  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1290  1.1     bjh21        ) {
   1291  1.1     bjh21         float_raise( float_flag_invalid );
   1292  1.1     bjh21         return 0;
   1293  1.1     bjh21     }
   1294  1.1     bjh21     aSign = extractFloat32Sign( a );
   1295  1.1     bjh21     bSign = extractFloat32Sign( b );
   1296  1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1297  1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   1298  1.1     bjh21 
   1299  1.1     bjh21 }
   1300  1.1     bjh21 
   1301  1.1     bjh21 /*
   1302  1.1     bjh21 -------------------------------------------------------------------------------
   1303  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   1304  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   1305  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1306  1.1     bjh21 -------------------------------------------------------------------------------
   1307  1.1     bjh21 */
   1308  1.1     bjh21 flag float32_lt( float32 a, float32 b )
   1309  1.1     bjh21 {
   1310  1.1     bjh21     flag aSign, bSign;
   1311  1.1     bjh21 
   1312  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1313  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1314  1.1     bjh21        ) {
   1315  1.1     bjh21         float_raise( float_flag_invalid );
   1316  1.1     bjh21         return 0;
   1317  1.1     bjh21     }
   1318  1.1     bjh21     aSign = extractFloat32Sign( a );
   1319  1.1     bjh21     bSign = extractFloat32Sign( b );
   1320  1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   1321  1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   1322  1.1     bjh21 
   1323  1.1     bjh21 }
   1324  1.1     bjh21 
   1325  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1326  1.1     bjh21 /*
   1327  1.1     bjh21 -------------------------------------------------------------------------------
   1328  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
   1329  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
   1330  1.1     bjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
   1331  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1332  1.1     bjh21 -------------------------------------------------------------------------------
   1333  1.1     bjh21 */
   1334  1.1     bjh21 flag float32_eq_signaling( float32 a, float32 b )
   1335  1.1     bjh21 {
   1336  1.1     bjh21 
   1337  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1338  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1339  1.1     bjh21        ) {
   1340  1.1     bjh21         float_raise( float_flag_invalid );
   1341  1.1     bjh21         return 0;
   1342  1.1     bjh21     }
   1343  1.1     bjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1344  1.1     bjh21 
   1345  1.1     bjh21 }
   1346  1.1     bjh21 
   1347  1.1     bjh21 /*
   1348  1.1     bjh21 -------------------------------------------------------------------------------
   1349  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than or
   1350  1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   1351  1.1     bjh21 cause an exception.  Otherwise, the comparison is performed according to the
   1352  1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1353  1.1     bjh21 -------------------------------------------------------------------------------
   1354  1.1     bjh21 */
   1355  1.1     bjh21 flag float32_le_quiet( float32 a, float32 b )
   1356  1.1     bjh21 {
   1357  1.1     bjh21     flag aSign, bSign;
   1358  1.1     bjh21     int16 aExp, bExp;
   1359  1.1     bjh21 
   1360  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1361  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1362  1.1     bjh21        ) {
   1363  1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1364  1.1     bjh21             float_raise( float_flag_invalid );
   1365  1.1     bjh21         }
   1366  1.1     bjh21         return 0;
   1367  1.1     bjh21     }
   1368  1.1     bjh21     aSign = extractFloat32Sign( a );
   1369  1.1     bjh21     bSign = extractFloat32Sign( b );
   1370  1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1371  1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   1372  1.1     bjh21 
   1373  1.1     bjh21 }
   1374  1.1     bjh21 
   1375  1.1     bjh21 /*
   1376  1.1     bjh21 -------------------------------------------------------------------------------
   1377  1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   1378  1.1     bjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   1379  1.1     bjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   1380  1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   1381  1.1     bjh21 -------------------------------------------------------------------------------
   1382  1.1     bjh21 */
   1383  1.1     bjh21 flag float32_lt_quiet( float32 a, float32 b )
   1384  1.1     bjh21 {
   1385  1.1     bjh21     flag aSign, bSign;
   1386  1.1     bjh21 
   1387  1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1388  1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1389  1.1     bjh21        ) {
   1390  1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1391  1.1     bjh21             float_raise( float_flag_invalid );
   1392  1.1     bjh21         }
   1393  1.1     bjh21         return 0;
   1394  1.1     bjh21     }
   1395  1.1     bjh21     aSign = extractFloat32Sign( a );
   1396  1.1     bjh21     bSign = extractFloat32Sign( b );
   1397  1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   1398  1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   1399  1.1     bjh21 
   1400  1.1     bjh21 }
   1401  1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1402  1.1     bjh21 
   1403  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1404  1.1     bjh21 /*
   1405  1.1     bjh21 -------------------------------------------------------------------------------
   1406  1.1     bjh21 Returns the result of converting the double-precision floating-point value
   1407  1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   1408  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1409  1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   1410  1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   1411  1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   1412  1.1     bjh21 largest integer with the same sign as `a' is returned.
   1413  1.1     bjh21 -------------------------------------------------------------------------------
   1414  1.1     bjh21 */
   1415  1.1     bjh21 int32 float64_to_int32( float64 a )
   1416  1.1     bjh21 {
   1417  1.1     bjh21     flag aSign;
   1418  1.1     bjh21     int16 aExp, shiftCount;
   1419  1.1     bjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
   1420  1.1     bjh21     int32 z;
   1421  1.1     bjh21     int8 roundingMode;
   1422  1.1     bjh21 
   1423  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1424  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1425  1.1     bjh21     aExp = extractFloat64Exp( a );
   1426  1.1     bjh21     aSign = extractFloat64Sign( a );
   1427  1.1     bjh21     shiftCount = aExp - 0x413;
   1428  1.1     bjh21     if ( 0 <= shiftCount ) {
   1429  1.1     bjh21         if ( 0x41E < aExp ) {
   1430  1.1     bjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   1431  1.1     bjh21             goto invalid;
   1432  1.1     bjh21         }
   1433  1.1     bjh21         shortShift64Left(
   1434  1.1     bjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
   1435  1.1     bjh21         if ( 0x80000000 < absZ ) goto invalid;
   1436  1.1     bjh21     }
   1437  1.1     bjh21     else {
   1438  1.1     bjh21         aSig1 = ( aSig1 != 0 );
   1439  1.1     bjh21         if ( aExp < 0x3FE ) {
   1440  1.1     bjh21             aSigExtra = aExp | aSig0 | aSig1;
   1441  1.1     bjh21             absZ = 0;
   1442  1.1     bjh21         }
   1443  1.1     bjh21         else {
   1444  1.1     bjh21             aSig0 |= 0x00100000;
   1445  1.1     bjh21             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
   1446  1.1     bjh21             absZ = aSig0>>( - shiftCount );
   1447  1.1     bjh21         }
   1448  1.1     bjh21     }
   1449  1.1     bjh21     roundingMode = float_rounding_mode;
   1450  1.1     bjh21     if ( roundingMode == float_round_nearest_even ) {
   1451  1.1     bjh21         if ( (sbits32) aSigExtra < 0 ) {
   1452  1.1     bjh21             ++absZ;
   1453  1.1     bjh21             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
   1454  1.1     bjh21         }
   1455  1.1     bjh21         z = aSign ? - absZ : absZ;
   1456  1.1     bjh21     }
   1457  1.1     bjh21     else {
   1458  1.1     bjh21         aSigExtra = ( aSigExtra != 0 );
   1459  1.1     bjh21         if ( aSign ) {
   1460  1.1     bjh21             z = - (   absZ
   1461  1.1     bjh21                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
   1462  1.1     bjh21         }
   1463  1.1     bjh21         else {
   1464  1.1     bjh21             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
   1465  1.1     bjh21         }
   1466  1.1     bjh21     }
   1467  1.1     bjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
   1468  1.1     bjh21  invalid:
   1469  1.1     bjh21         float_raise( float_flag_invalid );
   1470  1.1     bjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   1471  1.1     bjh21     }
   1472  1.3      matt     if ( aSigExtra ) set_float_exception_inexact_flag();
   1473  1.1     bjh21     return z;
   1474  1.1     bjh21 
   1475  1.1     bjh21 }
   1476  1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1477  1.1     bjh21 
   1478  1.1     bjh21 /*
   1479  1.1     bjh21 -------------------------------------------------------------------------------
   1480  1.1     bjh21 Returns the result of converting the double-precision floating-point value
   1481  1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   1482  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1483  1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
   1484  1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1485  1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
   1486  1.1     bjh21 returned.
   1487  1.1     bjh21 -------------------------------------------------------------------------------
   1488  1.1     bjh21 */
   1489  1.1     bjh21 int32 float64_to_int32_round_to_zero( float64 a )
   1490  1.1     bjh21 {
   1491  1.1     bjh21     flag aSign;
   1492  1.1     bjh21     int16 aExp, shiftCount;
   1493  1.1     bjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
   1494  1.1     bjh21     int32 z;
   1495  1.1     bjh21 
   1496  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1497  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1498  1.1     bjh21     aExp = extractFloat64Exp( a );
   1499  1.1     bjh21     aSign = extractFloat64Sign( a );
   1500  1.1     bjh21     shiftCount = aExp - 0x413;
   1501  1.1     bjh21     if ( 0 <= shiftCount ) {
   1502  1.1     bjh21         if ( 0x41E < aExp ) {
   1503  1.1     bjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   1504  1.1     bjh21             goto invalid;
   1505  1.1     bjh21         }
   1506  1.1     bjh21         shortShift64Left(
   1507  1.1     bjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
   1508  1.1     bjh21     }
   1509  1.1     bjh21     else {
   1510  1.1     bjh21         if ( aExp < 0x3FF ) {
   1511  1.1     bjh21             if ( aExp | aSig0 | aSig1 ) {
   1512  1.3      matt                 set_float_exception_inexact_flag();
   1513  1.1     bjh21             }
   1514  1.1     bjh21             return 0;
   1515  1.1     bjh21         }
   1516  1.1     bjh21         aSig0 |= 0x00100000;
   1517  1.1     bjh21         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
   1518  1.1     bjh21         absZ = aSig0>>( - shiftCount );
   1519  1.1     bjh21     }
   1520  1.1     bjh21     z = aSign ? - absZ : absZ;
   1521  1.1     bjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
   1522  1.1     bjh21  invalid:
   1523  1.1     bjh21         float_raise( float_flag_invalid );
   1524  1.1     bjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   1525  1.1     bjh21     }
   1526  1.3      matt     if ( aSigExtra ) set_float_exception_inexact_flag();
   1527  1.1     bjh21     return z;
   1528  1.1     bjh21 
   1529  1.1     bjh21 }
   1530  1.1     bjh21 
   1531  1.1     bjh21 /*
   1532  1.1     bjh21 -------------------------------------------------------------------------------
   1533  1.1     bjh21 Returns the result of converting the double-precision floating-point value
   1534  1.1     bjh21 `a' to the single-precision floating-point format.  The conversion is
   1535  1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1536  1.1     bjh21 Arithmetic.
   1537  1.1     bjh21 -------------------------------------------------------------------------------
   1538  1.1     bjh21 */
   1539  1.1     bjh21 float32 float64_to_float32( float64 a )
   1540  1.1     bjh21 {
   1541  1.1     bjh21     flag aSign;
   1542  1.1     bjh21     int16 aExp;
   1543  1.1     bjh21     bits32 aSig0, aSig1, zSig;
   1544  1.1     bjh21     bits32 allZero;
   1545  1.1     bjh21 
   1546  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1547  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1548  1.1     bjh21     aExp = extractFloat64Exp( a );
   1549  1.1     bjh21     aSign = extractFloat64Sign( a );
   1550  1.1     bjh21     if ( aExp == 0x7FF ) {
   1551  1.1     bjh21         if ( aSig0 | aSig1 ) {
   1552  1.1     bjh21             return commonNaNToFloat32( float64ToCommonNaN( a ) );
   1553  1.1     bjh21         }
   1554  1.1     bjh21         return packFloat32( aSign, 0xFF, 0 );
   1555  1.1     bjh21     }
   1556  1.1     bjh21     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
   1557  1.1     bjh21     if ( aExp ) zSig |= 0x40000000;
   1558  1.1     bjh21     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
   1559  1.1     bjh21 
   1560  1.1     bjh21 }
   1561  1.1     bjh21 
   1562  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   1563  1.1     bjh21 /*
   1564  1.1     bjh21 -------------------------------------------------------------------------------
   1565  1.1     bjh21 Rounds the double-precision floating-point value `a' to an integer,
   1566  1.1     bjh21 and returns the result as a double-precision floating-point value.  The
   1567  1.1     bjh21 operation is performed according to the IEC/IEEE Standard for Binary
   1568  1.1     bjh21 Floating-Point Arithmetic.
   1569  1.1     bjh21 -------------------------------------------------------------------------------
   1570  1.1     bjh21 */
   1571  1.1     bjh21 float64 float64_round_to_int( float64 a )
   1572  1.1     bjh21 {
   1573  1.1     bjh21     flag aSign;
   1574  1.1     bjh21     int16 aExp;
   1575  1.1     bjh21     bits32 lastBitMask, roundBitsMask;
   1576  1.1     bjh21     int8 roundingMode;
   1577  1.1     bjh21     float64 z;
   1578  1.1     bjh21 
   1579  1.1     bjh21     aExp = extractFloat64Exp( a );
   1580  1.1     bjh21     if ( 0x413 <= aExp ) {
   1581  1.1     bjh21         if ( 0x433 <= aExp ) {
   1582  1.1     bjh21             if (    ( aExp == 0x7FF )
   1583  1.1     bjh21                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
   1584  1.1     bjh21                 return propagateFloat64NaN( a, a );
   1585  1.1     bjh21             }
   1586  1.1     bjh21             return a;
   1587  1.1     bjh21         }
   1588  1.1     bjh21         lastBitMask = 1;
   1589  1.1     bjh21         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
   1590  1.1     bjh21         roundBitsMask = lastBitMask - 1;
   1591  1.1     bjh21         z = a;
   1592  1.1     bjh21         roundingMode = float_rounding_mode;
   1593  1.1     bjh21         if ( roundingMode == float_round_nearest_even ) {
   1594  1.1     bjh21             if ( lastBitMask ) {
   1595  1.1     bjh21                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   1596  1.1     bjh21                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   1597  1.1     bjh21             }
   1598  1.1     bjh21             else {
   1599  1.1     bjh21                 if ( (sbits32) z.low < 0 ) {
   1600  1.1     bjh21                     ++z.high;
   1601  1.1     bjh21                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
   1602  1.1     bjh21                 }
   1603  1.1     bjh21             }
   1604  1.1     bjh21         }
   1605  1.1     bjh21         else if ( roundingMode != float_round_to_zero ) {
   1606  1.1     bjh21             if (   extractFloat64Sign( z )
   1607  1.1     bjh21                  ^ ( roundingMode == float_round_up ) ) {
   1608  1.1     bjh21                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   1609  1.1     bjh21             }
   1610  1.1     bjh21         }
   1611  1.1     bjh21         z.low &= ~ roundBitsMask;
   1612  1.1     bjh21     }
   1613  1.1     bjh21     else {
   1614  1.1     bjh21         if ( aExp <= 0x3FE ) {
   1615  1.1     bjh21             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   1616  1.3      matt             set_float_exception_inexact_flag();
   1617  1.1     bjh21             aSign = extractFloat64Sign( a );
   1618  1.1     bjh21             switch ( float_rounding_mode ) {
   1619  1.1     bjh21              case float_round_nearest_even:
   1620  1.1     bjh21                 if (    ( aExp == 0x3FE )
   1621  1.1     bjh21                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
   1622  1.1     bjh21                    ) {
   1623  1.1     bjh21                     return packFloat64( aSign, 0x3FF, 0, 0 );
   1624  1.1     bjh21                 }
   1625  1.1     bjh21                 break;
   1626  1.1     bjh21              case float_round_down:
   1627  1.1     bjh21                 return
   1628  1.1     bjh21                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
   1629  1.1     bjh21                     : packFloat64( 0, 0, 0, 0 );
   1630  1.1     bjh21              case float_round_up:
   1631  1.1     bjh21                 return
   1632  1.1     bjh21                       aSign ? packFloat64( 1, 0, 0, 0 )
   1633  1.1     bjh21                     : packFloat64( 0, 0x3FF, 0, 0 );
   1634  1.1     bjh21             }
   1635  1.1     bjh21             return packFloat64( aSign, 0, 0, 0 );
   1636  1.1     bjh21         }
   1637  1.1     bjh21         lastBitMask = 1;
   1638  1.1     bjh21         lastBitMask <<= 0x413 - aExp;
   1639  1.1     bjh21         roundBitsMask = lastBitMask - 1;
   1640  1.1     bjh21         z.low = 0;
   1641  1.1     bjh21         z.high = a.high;
   1642  1.1     bjh21         roundingMode = float_rounding_mode;
   1643  1.1     bjh21         if ( roundingMode == float_round_nearest_even ) {
   1644  1.1     bjh21             z.high += lastBitMask>>1;
   1645  1.1     bjh21             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   1646  1.1     bjh21                 z.high &= ~ lastBitMask;
   1647  1.1     bjh21             }
   1648  1.1     bjh21         }
   1649  1.1     bjh21         else if ( roundingMode != float_round_to_zero ) {
   1650  1.1     bjh21             if (   extractFloat64Sign( z )
   1651  1.1     bjh21                  ^ ( roundingMode == float_round_up ) ) {
   1652  1.1     bjh21                 z.high |= ( a.low != 0 );
   1653  1.1     bjh21                 z.high += roundBitsMask;
   1654  1.1     bjh21             }
   1655  1.1     bjh21         }
   1656  1.1     bjh21         z.high &= ~ roundBitsMask;
   1657  1.1     bjh21     }
   1658  1.1     bjh21     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   1659  1.3      matt         set_float_exception_inexact_flag();
   1660  1.1     bjh21     }
   1661  1.1     bjh21     return z;
   1662  1.1     bjh21 
   1663  1.1     bjh21 }
   1664  1.1     bjh21 #endif
   1665  1.1     bjh21 
   1666  1.1     bjh21 /*
   1667  1.1     bjh21 -------------------------------------------------------------------------------
   1668  1.1     bjh21 Returns the result of adding the absolute values of the double-precision
   1669  1.1     bjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1670  1.1     bjh21 before being returned.  `zSign' is ignored if the result is a NaN.
   1671  1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
   1672  1.1     bjh21 Floating-Point Arithmetic.
   1673  1.1     bjh21 -------------------------------------------------------------------------------
   1674  1.1     bjh21 */
   1675  1.1     bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   1676  1.1     bjh21 {
   1677  1.1     bjh21     int16 aExp, bExp, zExp;
   1678  1.1     bjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   1679  1.1     bjh21     int16 expDiff;
   1680  1.1     bjh21 
   1681  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1682  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1683  1.1     bjh21     aExp = extractFloat64Exp( a );
   1684  1.1     bjh21     bSig1 = extractFloat64Frac1( b );
   1685  1.1     bjh21     bSig0 = extractFloat64Frac0( b );
   1686  1.1     bjh21     bExp = extractFloat64Exp( b );
   1687  1.1     bjh21     expDiff = aExp - bExp;
   1688  1.1     bjh21     if ( 0 < expDiff ) {
   1689  1.1     bjh21         if ( aExp == 0x7FF ) {
   1690  1.1     bjh21             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1691  1.1     bjh21             return a;
   1692  1.1     bjh21         }
   1693  1.1     bjh21         if ( bExp == 0 ) {
   1694  1.1     bjh21             --expDiff;
   1695  1.1     bjh21         }
   1696  1.1     bjh21         else {
   1697  1.1     bjh21             bSig0 |= 0x00100000;
   1698  1.1     bjh21         }
   1699  1.1     bjh21         shift64ExtraRightJamming(
   1700  1.1     bjh21             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   1701  1.1     bjh21         zExp = aExp;
   1702  1.1     bjh21     }
   1703  1.1     bjh21     else if ( expDiff < 0 ) {
   1704  1.1     bjh21         if ( bExp == 0x7FF ) {
   1705  1.1     bjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1706  1.1     bjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
   1707  1.1     bjh21         }
   1708  1.1     bjh21         if ( aExp == 0 ) {
   1709  1.1     bjh21             ++expDiff;
   1710  1.1     bjh21         }
   1711  1.1     bjh21         else {
   1712  1.1     bjh21             aSig0 |= 0x00100000;
   1713  1.1     bjh21         }
   1714  1.1     bjh21         shift64ExtraRightJamming(
   1715  1.1     bjh21             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   1716  1.1     bjh21         zExp = bExp;
   1717  1.1     bjh21     }
   1718  1.1     bjh21     else {
   1719  1.1     bjh21         if ( aExp == 0x7FF ) {
   1720  1.1     bjh21             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   1721  1.1     bjh21                 return propagateFloat64NaN( a, b );
   1722  1.1     bjh21             }
   1723  1.1     bjh21             return a;
   1724  1.1     bjh21         }
   1725  1.1     bjh21         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1726  1.1     bjh21         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
   1727  1.1     bjh21         zSig2 = 0;
   1728  1.1     bjh21         zSig0 |= 0x00200000;
   1729  1.1     bjh21         zExp = aExp;
   1730  1.1     bjh21         goto shiftRight1;
   1731  1.1     bjh21     }
   1732  1.1     bjh21     aSig0 |= 0x00100000;
   1733  1.1     bjh21     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1734  1.1     bjh21     --zExp;
   1735  1.1     bjh21     if ( zSig0 < 0x00200000 ) goto roundAndPack;
   1736  1.1     bjh21     ++zExp;
   1737  1.1     bjh21  shiftRight1:
   1738  1.1     bjh21     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   1739  1.1     bjh21  roundAndPack:
   1740  1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   1741  1.1     bjh21 
   1742  1.1     bjh21 }
   1743  1.1     bjh21 
   1744  1.1     bjh21 /*
   1745  1.1     bjh21 -------------------------------------------------------------------------------
   1746  1.1     bjh21 Returns the result of subtracting the absolute values of the double-
   1747  1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1748  1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
   1749  1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1750  1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   1751  1.1     bjh21 -------------------------------------------------------------------------------
   1752  1.1     bjh21 */
   1753  1.1     bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   1754  1.1     bjh21 {
   1755  1.1     bjh21     int16 aExp, bExp, zExp;
   1756  1.1     bjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   1757  1.1     bjh21     int16 expDiff;
   1758  1.1     bjh21 
   1759  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1760  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1761  1.1     bjh21     aExp = extractFloat64Exp( a );
   1762  1.1     bjh21     bSig1 = extractFloat64Frac1( b );
   1763  1.1     bjh21     bSig0 = extractFloat64Frac0( b );
   1764  1.1     bjh21     bExp = extractFloat64Exp( b );
   1765  1.1     bjh21     expDiff = aExp - bExp;
   1766  1.1     bjh21     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
   1767  1.1     bjh21     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
   1768  1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
   1769  1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
   1770  1.1     bjh21     if ( aExp == 0x7FF ) {
   1771  1.1     bjh21         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   1772  1.1     bjh21             return propagateFloat64NaN( a, b );
   1773  1.1     bjh21         }
   1774  1.1     bjh21         float_raise( float_flag_invalid );
   1775  1.1     bjh21         return float64_default_nan;
   1776  1.1     bjh21     }
   1777  1.1     bjh21     if ( aExp == 0 ) {
   1778  1.1     bjh21         aExp = 1;
   1779  1.1     bjh21         bExp = 1;
   1780  1.1     bjh21     }
   1781  1.1     bjh21     if ( bSig0 < aSig0 ) goto aBigger;
   1782  1.1     bjh21     if ( aSig0 < bSig0 ) goto bBigger;
   1783  1.1     bjh21     if ( bSig1 < aSig1 ) goto aBigger;
   1784  1.1     bjh21     if ( aSig1 < bSig1 ) goto bBigger;
   1785  1.1     bjh21     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
   1786  1.1     bjh21  bExpBigger:
   1787  1.1     bjh21     if ( bExp == 0x7FF ) {
   1788  1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1789  1.1     bjh21         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
   1790  1.1     bjh21     }
   1791  1.1     bjh21     if ( aExp == 0 ) {
   1792  1.1     bjh21         ++expDiff;
   1793  1.1     bjh21     }
   1794  1.1     bjh21     else {
   1795  1.1     bjh21         aSig0 |= 0x40000000;
   1796  1.1     bjh21     }
   1797  1.1     bjh21     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   1798  1.1     bjh21     bSig0 |= 0x40000000;
   1799  1.1     bjh21  bBigger:
   1800  1.1     bjh21     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   1801  1.1     bjh21     zExp = bExp;
   1802  1.1     bjh21     zSign ^= 1;
   1803  1.1     bjh21     goto normalizeRoundAndPack;
   1804  1.1     bjh21  aExpBigger:
   1805  1.1     bjh21     if ( aExp == 0x7FF ) {
   1806  1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1807  1.1     bjh21         return a;
   1808  1.1     bjh21     }
   1809  1.1     bjh21     if ( bExp == 0 ) {
   1810  1.1     bjh21         --expDiff;
   1811  1.1     bjh21     }
   1812  1.1     bjh21     else {
   1813  1.1     bjh21         bSig0 |= 0x40000000;
   1814  1.1     bjh21     }
   1815  1.1     bjh21     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   1816  1.1     bjh21     aSig0 |= 0x40000000;
   1817  1.1     bjh21  aBigger:
   1818  1.1     bjh21     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1819  1.1     bjh21     zExp = aExp;
   1820  1.1     bjh21  normalizeRoundAndPack:
   1821  1.1     bjh21     --zExp;
   1822  1.1     bjh21     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
   1823  1.1     bjh21 
   1824  1.1     bjh21 }
   1825  1.1     bjh21 
   1826  1.1     bjh21 /*
   1827  1.1     bjh21 -------------------------------------------------------------------------------
   1828  1.1     bjh21 Returns the result of adding the double-precision floating-point values `a'
   1829  1.1     bjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1830  1.1     bjh21 Binary Floating-Point Arithmetic.
   1831  1.1     bjh21 -------------------------------------------------------------------------------
   1832  1.1     bjh21 */
   1833  1.1     bjh21 float64 float64_add( float64 a, float64 b )
   1834  1.1     bjh21 {
   1835  1.1     bjh21     flag aSign, bSign;
   1836  1.1     bjh21 
   1837  1.1     bjh21     aSign = extractFloat64Sign( a );
   1838  1.1     bjh21     bSign = extractFloat64Sign( b );
   1839  1.1     bjh21     if ( aSign == bSign ) {
   1840  1.1     bjh21         return addFloat64Sigs( a, b, aSign );
   1841  1.1     bjh21     }
   1842  1.1     bjh21     else {
   1843  1.1     bjh21         return subFloat64Sigs( a, b, aSign );
   1844  1.1     bjh21     }
   1845  1.1     bjh21 
   1846  1.1     bjh21 }
   1847  1.1     bjh21 
   1848  1.1     bjh21 /*
   1849  1.1     bjh21 -------------------------------------------------------------------------------
   1850  1.1     bjh21 Returns the result of subtracting the double-precision floating-point values
   1851  1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1852  1.1     bjh21 for Binary Floating-Point Arithmetic.
   1853  1.1     bjh21 -------------------------------------------------------------------------------
   1854  1.1     bjh21 */
   1855  1.1     bjh21 float64 float64_sub( float64 a, float64 b )
   1856  1.1     bjh21 {
   1857  1.1     bjh21     flag aSign, bSign;
   1858  1.1     bjh21 
   1859  1.1     bjh21     aSign = extractFloat64Sign( a );
   1860  1.1     bjh21     bSign = extractFloat64Sign( b );
   1861  1.1     bjh21     if ( aSign == bSign ) {
   1862  1.1     bjh21         return subFloat64Sigs( a, b, aSign );
   1863  1.1     bjh21     }
   1864  1.1     bjh21     else {
   1865  1.1     bjh21         return addFloat64Sigs( a, b, aSign );
   1866  1.1     bjh21     }
   1867  1.1     bjh21 
   1868  1.1     bjh21 }
   1869  1.1     bjh21 
   1870  1.1     bjh21 /*
   1871  1.1     bjh21 -------------------------------------------------------------------------------
   1872  1.1     bjh21 Returns the result of multiplying the double-precision floating-point values
   1873  1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1874  1.1     bjh21 for Binary Floating-Point Arithmetic.
   1875  1.1     bjh21 -------------------------------------------------------------------------------
   1876  1.1     bjh21 */
   1877  1.1     bjh21 float64 float64_mul( float64 a, float64 b )
   1878  1.1     bjh21 {
   1879  1.1     bjh21     flag aSign, bSign, zSign;
   1880  1.1     bjh21     int16 aExp, bExp, zExp;
   1881  1.1     bjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   1882  1.1     bjh21 
   1883  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1884  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1885  1.1     bjh21     aExp = extractFloat64Exp( a );
   1886  1.1     bjh21     aSign = extractFloat64Sign( a );
   1887  1.1     bjh21     bSig1 = extractFloat64Frac1( b );
   1888  1.1     bjh21     bSig0 = extractFloat64Frac0( b );
   1889  1.1     bjh21     bExp = extractFloat64Exp( b );
   1890  1.1     bjh21     bSign = extractFloat64Sign( b );
   1891  1.1     bjh21     zSign = aSign ^ bSign;
   1892  1.1     bjh21     if ( aExp == 0x7FF ) {
   1893  1.1     bjh21         if (    ( aSig0 | aSig1 )
   1894  1.1     bjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
   1895  1.1     bjh21             return propagateFloat64NaN( a, b );
   1896  1.1     bjh21         }
   1897  1.1     bjh21         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   1898  1.1     bjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
   1899  1.1     bjh21     }
   1900  1.1     bjh21     if ( bExp == 0x7FF ) {
   1901  1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1902  1.1     bjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   1903  1.1     bjh21  invalid:
   1904  1.1     bjh21             float_raise( float_flag_invalid );
   1905  1.1     bjh21             return float64_default_nan;
   1906  1.1     bjh21         }
   1907  1.1     bjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
   1908  1.1     bjh21     }
   1909  1.1     bjh21     if ( aExp == 0 ) {
   1910  1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1911  1.1     bjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   1912  1.1     bjh21     }
   1913  1.1     bjh21     if ( bExp == 0 ) {
   1914  1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1915  1.1     bjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   1916  1.1     bjh21     }
   1917  1.1     bjh21     zExp = aExp + bExp - 0x400;
   1918  1.1     bjh21     aSig0 |= 0x00100000;
   1919  1.1     bjh21     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
   1920  1.1     bjh21     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   1921  1.1     bjh21     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   1922  1.1     bjh21     zSig2 |= ( zSig3 != 0 );
   1923  1.1     bjh21     if ( 0x00200000 <= zSig0 ) {
   1924  1.1     bjh21         shift64ExtraRightJamming(
   1925  1.1     bjh21             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   1926  1.1     bjh21         ++zExp;
   1927  1.1     bjh21     }
   1928  1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   1929  1.1     bjh21 
   1930  1.1     bjh21 }
   1931  1.1     bjh21 
   1932  1.1     bjh21 /*
   1933  1.1     bjh21 -------------------------------------------------------------------------------
   1934  1.1     bjh21 Returns the result of dividing the double-precision floating-point value `a'
   1935  1.1     bjh21 by the corresponding value `b'.  The operation is performed according to the
   1936  1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1937  1.1     bjh21 -------------------------------------------------------------------------------
   1938  1.1     bjh21 */
   1939  1.1     bjh21 float64 float64_div( float64 a, float64 b )
   1940  1.1     bjh21 {
   1941  1.1     bjh21     flag aSign, bSign, zSign;
   1942  1.1     bjh21     int16 aExp, bExp, zExp;
   1943  1.1     bjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   1944  1.1     bjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   1945  1.1     bjh21 
   1946  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   1947  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   1948  1.1     bjh21     aExp = extractFloat64Exp( a );
   1949  1.1     bjh21     aSign = extractFloat64Sign( a );
   1950  1.1     bjh21     bSig1 = extractFloat64Frac1( b );
   1951  1.1     bjh21     bSig0 = extractFloat64Frac0( b );
   1952  1.1     bjh21     bExp = extractFloat64Exp( b );
   1953  1.1     bjh21     bSign = extractFloat64Sign( b );
   1954  1.1     bjh21     zSign = aSign ^ bSign;
   1955  1.1     bjh21     if ( aExp == 0x7FF ) {
   1956  1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1957  1.1     bjh21         if ( bExp == 0x7FF ) {
   1958  1.1     bjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1959  1.1     bjh21             goto invalid;
   1960  1.1     bjh21         }
   1961  1.1     bjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
   1962  1.1     bjh21     }
   1963  1.1     bjh21     if ( bExp == 0x7FF ) {
   1964  1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1965  1.1     bjh21         return packFloat64( zSign, 0, 0, 0 );
   1966  1.1     bjh21     }
   1967  1.1     bjh21     if ( bExp == 0 ) {
   1968  1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
   1969  1.1     bjh21             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   1970  1.1     bjh21  invalid:
   1971  1.1     bjh21                 float_raise( float_flag_invalid );
   1972  1.1     bjh21                 return float64_default_nan;
   1973  1.1     bjh21             }
   1974  1.1     bjh21             float_raise( float_flag_divbyzero );
   1975  1.1     bjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
   1976  1.1     bjh21         }
   1977  1.1     bjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   1978  1.1     bjh21     }
   1979  1.1     bjh21     if ( aExp == 0 ) {
   1980  1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1981  1.1     bjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   1982  1.1     bjh21     }
   1983  1.1     bjh21     zExp = aExp - bExp + 0x3FD;
   1984  1.1     bjh21     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
   1985  1.1     bjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
   1986  1.1     bjh21     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
   1987  1.1     bjh21         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   1988  1.1     bjh21         ++zExp;
   1989  1.1     bjh21     }
   1990  1.1     bjh21     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
   1991  1.1     bjh21     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   1992  1.1     bjh21     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   1993  1.1     bjh21     while ( (sbits32) rem0 < 0 ) {
   1994  1.1     bjh21         --zSig0;
   1995  1.1     bjh21         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   1996  1.1     bjh21     }
   1997  1.1     bjh21     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
   1998  1.1     bjh21     if ( ( zSig1 & 0x3FF ) <= 4 ) {
   1999  1.1     bjh21         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   2000  1.1     bjh21         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   2001  1.1     bjh21         while ( (sbits32) rem1 < 0 ) {
   2002  1.1     bjh21             --zSig1;
   2003  1.1     bjh21             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   2004  1.1     bjh21         }
   2005  1.1     bjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   2006  1.1     bjh21     }
   2007  1.1     bjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
   2008  1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   2009  1.1     bjh21 
   2010  1.1     bjh21 }
   2011  1.1     bjh21 
   2012  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   2013  1.1     bjh21 /*
   2014  1.1     bjh21 -------------------------------------------------------------------------------
   2015  1.1     bjh21 Returns the remainder of the double-precision floating-point value `a'
   2016  1.1     bjh21 with respect to the corresponding value `b'.  The operation is performed
   2017  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2018  1.1     bjh21 -------------------------------------------------------------------------------
   2019  1.1     bjh21 */
   2020  1.1     bjh21 float64 float64_rem( float64 a, float64 b )
   2021  1.1     bjh21 {
   2022  1.1     bjh21     flag aSign, bSign, zSign;
   2023  1.1     bjh21     int16 aExp, bExp, expDiff;
   2024  1.1     bjh21     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   2025  1.1     bjh21     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
   2026  1.1     bjh21     sbits32 sigMean0;
   2027  1.1     bjh21     float64 z;
   2028  1.1     bjh21 
   2029  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   2030  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   2031  1.1     bjh21     aExp = extractFloat64Exp( a );
   2032  1.1     bjh21     aSign = extractFloat64Sign( a );
   2033  1.1     bjh21     bSig1 = extractFloat64Frac1( b );
   2034  1.1     bjh21     bSig0 = extractFloat64Frac0( b );
   2035  1.1     bjh21     bExp = extractFloat64Exp( b );
   2036  1.1     bjh21     bSign = extractFloat64Sign( b );
   2037  1.1     bjh21     if ( aExp == 0x7FF ) {
   2038  1.1     bjh21         if (    ( aSig0 | aSig1 )
   2039  1.1     bjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
   2040  1.1     bjh21             return propagateFloat64NaN( a, b );
   2041  1.1     bjh21         }
   2042  1.1     bjh21         goto invalid;
   2043  1.1     bjh21     }
   2044  1.1     bjh21     if ( bExp == 0x7FF ) {
   2045  1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   2046  1.1     bjh21         return a;
   2047  1.1     bjh21     }
   2048  1.1     bjh21     if ( bExp == 0 ) {
   2049  1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
   2050  1.1     bjh21  invalid:
   2051  1.1     bjh21             float_raise( float_flag_invalid );
   2052  1.1     bjh21             return float64_default_nan;
   2053  1.1     bjh21         }
   2054  1.1     bjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   2055  1.1     bjh21     }
   2056  1.1     bjh21     if ( aExp == 0 ) {
   2057  1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   2058  1.1     bjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   2059  1.1     bjh21     }
   2060  1.1     bjh21     expDiff = aExp - bExp;
   2061  1.1     bjh21     if ( expDiff < -1 ) return a;
   2062  1.1     bjh21     shortShift64Left(
   2063  1.1     bjh21         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
   2064  1.1     bjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
   2065  1.1     bjh21     q = le64( bSig0, bSig1, aSig0, aSig1 );
   2066  1.1     bjh21     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   2067  1.1     bjh21     expDiff -= 32;
   2068  1.1     bjh21     while ( 0 < expDiff ) {
   2069  1.1     bjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
   2070  1.1     bjh21         q = ( 4 < q ) ? q - 4 : 0;
   2071  1.1     bjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
   2072  1.1     bjh21         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
   2073  1.1     bjh21         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
   2074  1.1     bjh21         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   2075  1.1     bjh21         expDiff -= 29;
   2076  1.1     bjh21     }
   2077  1.1     bjh21     if ( -32 < expDiff ) {
   2078  1.1     bjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
   2079  1.1     bjh21         q = ( 4 < q ) ? q - 4 : 0;
   2080  1.1     bjh21         q >>= - expDiff;
   2081  1.1     bjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
   2082  1.1     bjh21         expDiff += 24;
   2083  1.1     bjh21         if ( expDiff < 0 ) {
   2084  1.1     bjh21             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   2085  1.1     bjh21         }
   2086  1.1     bjh21         else {
   2087  1.1     bjh21             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   2088  1.1     bjh21         }
   2089  1.1     bjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
   2090  1.1     bjh21         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   2091  1.1     bjh21     }
   2092  1.1     bjh21     else {
   2093  1.1     bjh21         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
   2094  1.1     bjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
   2095  1.1     bjh21     }
   2096  1.1     bjh21     do {
   2097  1.1     bjh21         alternateASig0 = aSig0;
   2098  1.1     bjh21         alternateASig1 = aSig1;
   2099  1.1     bjh21         ++q;
   2100  1.1     bjh21         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   2101  1.1     bjh21     } while ( 0 <= (sbits32) aSig0 );
   2102  1.1     bjh21     add64(
   2103  1.1     bjh21         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
   2104  1.1     bjh21     if (    ( sigMean0 < 0 )
   2105  1.1     bjh21          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   2106  1.1     bjh21         aSig0 = alternateASig0;
   2107  1.1     bjh21         aSig1 = alternateASig1;
   2108  1.1     bjh21     }
   2109  1.1     bjh21     zSign = ( (sbits32) aSig0 < 0 );
   2110  1.1     bjh21     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   2111  1.1     bjh21     return
   2112  1.1     bjh21         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   2113  1.1     bjh21 
   2114  1.1     bjh21 }
   2115  1.1     bjh21 #endif
   2116  1.1     bjh21 
   2117  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   2118  1.1     bjh21 /*
   2119  1.1     bjh21 -------------------------------------------------------------------------------
   2120  1.1     bjh21 Returns the square root of the double-precision floating-point value `a'.
   2121  1.1     bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
   2122  1.1     bjh21 Floating-Point Arithmetic.
   2123  1.1     bjh21 -------------------------------------------------------------------------------
   2124  1.1     bjh21 */
   2125  1.1     bjh21 float64 float64_sqrt( float64 a )
   2126  1.1     bjh21 {
   2127  1.1     bjh21     flag aSign;
   2128  1.1     bjh21     int16 aExp, zExp;
   2129  1.1     bjh21     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   2130  1.1     bjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   2131  1.1     bjh21     float64 z;
   2132  1.1     bjh21 
   2133  1.1     bjh21     aSig1 = extractFloat64Frac1( a );
   2134  1.1     bjh21     aSig0 = extractFloat64Frac0( a );
   2135  1.1     bjh21     aExp = extractFloat64Exp( a );
   2136  1.1     bjh21     aSign = extractFloat64Sign( a );
   2137  1.1     bjh21     if ( aExp == 0x7FF ) {
   2138  1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
   2139  1.1     bjh21         if ( ! aSign ) return a;
   2140  1.1     bjh21         goto invalid;
   2141  1.1     bjh21     }
   2142  1.1     bjh21     if ( aSign ) {
   2143  1.1     bjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   2144  1.1     bjh21  invalid:
   2145  1.1     bjh21         float_raise( float_flag_invalid );
   2146  1.1     bjh21         return float64_default_nan;
   2147  1.1     bjh21     }
   2148  1.1     bjh21     if ( aExp == 0 ) {
   2149  1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
   2150  1.1     bjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   2151  1.1     bjh21     }
   2152  1.1     bjh21     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   2153  1.1     bjh21     aSig0 |= 0x00100000;
   2154  1.1     bjh21     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
   2155  1.1     bjh21     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
   2156  1.1     bjh21     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
   2157  1.1     bjh21     doubleZSig0 = zSig0 + zSig0;
   2158  1.1     bjh21     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
   2159  1.1     bjh21     mul32To64( zSig0, zSig0, &term0, &term1 );
   2160  1.1     bjh21     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   2161  1.1     bjh21     while ( (sbits32) rem0 < 0 ) {
   2162  1.1     bjh21         --zSig0;
   2163  1.1     bjh21         doubleZSig0 -= 2;
   2164  1.1     bjh21         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
   2165  1.1     bjh21     }
   2166  1.1     bjh21     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
   2167  1.1     bjh21     if ( ( zSig1 & 0x1FF ) <= 5 ) {
   2168  1.1     bjh21         if ( zSig1 == 0 ) zSig1 = 1;
   2169  1.1     bjh21         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
   2170  1.1     bjh21         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
   2171  1.1     bjh21         mul32To64( zSig1, zSig1, &term2, &term3 );
   2172  1.1     bjh21         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   2173  1.1     bjh21         while ( (sbits32) rem1 < 0 ) {
   2174  1.1     bjh21             --zSig1;
   2175  1.1     bjh21             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
   2176  1.1     bjh21             term3 |= 1;
   2177  1.1     bjh21             term2 |= doubleZSig0;
   2178  1.1     bjh21             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   2179  1.1     bjh21         }
   2180  1.1     bjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   2181  1.1     bjh21     }
   2182  1.1     bjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
   2183  1.1     bjh21     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
   2184  1.1     bjh21 
   2185  1.1     bjh21 }
   2186  1.1     bjh21 #endif
   2187  1.1     bjh21 
   2188  1.1     bjh21 /*
   2189  1.1     bjh21 -------------------------------------------------------------------------------
   2190  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
   2191  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2192  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2193  1.1     bjh21 -------------------------------------------------------------------------------
   2194  1.1     bjh21 */
   2195  1.1     bjh21 flag float64_eq( float64 a, float64 b )
   2196  1.1     bjh21 {
   2197  1.1     bjh21 
   2198  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2199  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2200  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2201  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2202  1.1     bjh21        ) {
   2203  1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2204  1.1     bjh21             float_raise( float_flag_invalid );
   2205  1.1     bjh21         }
   2206  1.1     bjh21         return 0;
   2207  1.1     bjh21     }
   2208  1.1     bjh21     return ( a == b ) ||
   2209  1.1     bjh21 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   2210  1.1     bjh21 
   2211  1.1     bjh21 }
   2212  1.1     bjh21 
   2213  1.1     bjh21 /*
   2214  1.1     bjh21 -------------------------------------------------------------------------------
   2215  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than
   2216  1.1     bjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2217  1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2218  1.1     bjh21 Arithmetic.
   2219  1.1     bjh21 -------------------------------------------------------------------------------
   2220  1.1     bjh21 */
   2221  1.1     bjh21 flag float64_le( float64 a, float64 b )
   2222  1.1     bjh21 {
   2223  1.1     bjh21     flag aSign, bSign;
   2224  1.1     bjh21 
   2225  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2226  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2227  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2228  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2229  1.1     bjh21        ) {
   2230  1.1     bjh21         float_raise( float_flag_invalid );
   2231  1.1     bjh21         return 0;
   2232  1.1     bjh21     }
   2233  1.1     bjh21     aSign = extractFloat64Sign( a );
   2234  1.1     bjh21     bSign = extractFloat64Sign( b );
   2235  1.1     bjh21     if ( aSign != bSign )
   2236  1.1     bjh21 	return aSign ||
   2237  1.1     bjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   2238  1.1     bjh21 	      0 );
   2239  1.1     bjh21     return ( a == b ) ||
   2240  1.1     bjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   2241  1.1     bjh21 }
   2242  1.1     bjh21 
   2243  1.1     bjh21 /*
   2244  1.1     bjh21 -------------------------------------------------------------------------------
   2245  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than
   2246  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2247  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2248  1.1     bjh21 -------------------------------------------------------------------------------
   2249  1.1     bjh21 */
   2250  1.1     bjh21 flag float64_lt( float64 a, float64 b )
   2251  1.1     bjh21 {
   2252  1.1     bjh21     flag aSign, bSign;
   2253  1.1     bjh21 
   2254  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2255  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2256  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2257  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2258  1.1     bjh21        ) {
   2259  1.1     bjh21         float_raise( float_flag_invalid );
   2260  1.1     bjh21         return 0;
   2261  1.1     bjh21     }
   2262  1.1     bjh21     aSign = extractFloat64Sign( a );
   2263  1.1     bjh21     bSign = extractFloat64Sign( b );
   2264  1.1     bjh21     if ( aSign != bSign )
   2265  1.1     bjh21 	return aSign &&
   2266  1.1     bjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   2267  1.1     bjh21 	      0 );
   2268  1.1     bjh21     return ( a != b ) &&
   2269  1.1     bjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   2270  1.1     bjh21 
   2271  1.1     bjh21 }
   2272  1.1     bjh21 
   2273  1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   2274  1.1     bjh21 /*
   2275  1.1     bjh21 -------------------------------------------------------------------------------
   2276  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is equal to
   2277  1.1     bjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2278  1.1     bjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2279  1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2280  1.1     bjh21 -------------------------------------------------------------------------------
   2281  1.1     bjh21 */
   2282  1.1     bjh21 flag float64_eq_signaling( float64 a, float64 b )
   2283  1.1     bjh21 {
   2284  1.1     bjh21 
   2285  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2286  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2287  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2288  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2289  1.1     bjh21        ) {
   2290  1.1     bjh21         float_raise( float_flag_invalid );
   2291  1.1     bjh21         return 0;
   2292  1.1     bjh21     }
   2293  1.1     bjh21     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   2294  1.1     bjh21 
   2295  1.1     bjh21 }
   2296  1.1     bjh21 
   2297  1.1     bjh21 /*
   2298  1.1     bjh21 -------------------------------------------------------------------------------
   2299  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than or
   2300  1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2301  1.1     bjh21 cause an exception.  Otherwise, the comparison is performed according to the
   2302  1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2303  1.1     bjh21 -------------------------------------------------------------------------------
   2304  1.1     bjh21 */
   2305  1.1     bjh21 flag float64_le_quiet( float64 a, float64 b )
   2306  1.1     bjh21 {
   2307  1.1     bjh21     flag aSign, bSign;
   2308  1.1     bjh21 
   2309  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2310  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2311  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2312  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2313  1.1     bjh21        ) {
   2314  1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2315  1.1     bjh21             float_raise( float_flag_invalid );
   2316  1.1     bjh21         }
   2317  1.1     bjh21         return 0;
   2318  1.1     bjh21     }
   2319  1.1     bjh21     aSign = extractFloat64Sign( a );
   2320  1.1     bjh21     bSign = extractFloat64Sign( b );
   2321  1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   2322  1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   2323  1.1     bjh21 
   2324  1.1     bjh21 }
   2325  1.1     bjh21 
   2326  1.1     bjh21 /*
   2327  1.1     bjh21 -------------------------------------------------------------------------------
   2328  1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than
   2329  1.1     bjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2330  1.1     bjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2331  1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   2332  1.1     bjh21 -------------------------------------------------------------------------------
   2333  1.1     bjh21 */
   2334  1.1     bjh21 flag float64_lt_quiet( float64 a, float64 b )
   2335  1.1     bjh21 {
   2336  1.1     bjh21     flag aSign, bSign;
   2337  1.1     bjh21 
   2338  1.1     bjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2339  1.1     bjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2340  1.1     bjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2341  1.1     bjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2342  1.1     bjh21        ) {
   2343  1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2344  1.1     bjh21             float_raise( float_flag_invalid );
   2345  1.1     bjh21         }
   2346  1.1     bjh21         return 0;
   2347  1.1     bjh21     }
   2348  1.1     bjh21     aSign = extractFloat64Sign( a );
   2349  1.1     bjh21     bSign = extractFloat64Sign( b );
   2350  1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   2351  1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   2352  1.1     bjh21 
   2353  1.1     bjh21 }
   2354  1.1     bjh21 
   2355  1.1     bjh21 #endif
   2356