Home | History | Annotate | Line # | Download | only in libkern
softfloat.c revision 1.2
      1  1.2  thorpej /* $NetBSD: softfloat.c,v 1.2 2002/12/05 17:12:06 thorpej Exp $ */
      2  1.1     ross 
      3  1.1     ross /*
      4  1.1     ross  * This version hacked for use with gcc -msoft-float by bjh21.
      5  1.1     ross  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  1.1     ross  *  itself).
      7  1.1     ross  */
      8  1.1     ross 
      9  1.1     ross /*
     10  1.1     ross  * Things you may want to define:
     11  1.1     ross  *
     12  1.1     ross  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  1.1     ross  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  1.1     ross  *   properly renamed.
     15  1.1     ross  */
     16  1.1     ross 
     17  1.1     ross /*
     18  1.1     ross ===============================================================================
     19  1.1     ross 
     20  1.1     ross This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21  1.1     ross Arithmetic Package, Release 2a.
     22  1.1     ross 
     23  1.1     ross Written by John R. Hauser.  This work was made possible in part by the
     24  1.1     ross International Computer Science Institute, located at Suite 600, 1947 Center
     25  1.1     ross Street, Berkeley, California 94704.  Funding was partially provided by the
     26  1.1     ross National Science Foundation under grant MIP-9311980.  The original version
     27  1.1     ross of this code was written as part of a project to build a fixed-point vector
     28  1.1     ross processor in collaboration with the University of California at Berkeley,
     29  1.1     ross overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30  1.1     ross is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31  1.1     ross arithmetic/SoftFloat.html'.
     32  1.1     ross 
     33  1.1     ross THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34  1.1     ross has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35  1.1     ross TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36  1.1     ross PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37  1.1     ross AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38  1.1     ross 
     39  1.1     ross Derivative works are acceptable, even for commercial purposes, so long as
     40  1.1     ross (1) they include prominent notice that the work is derivative, and (2) they
     41  1.1     ross include prominent notice akin to these four paragraphs for those parts of
     42  1.1     ross this code that are retained.
     43  1.1     ross 
     44  1.1     ross ===============================================================================
     45  1.1     ross */
     46  1.1     ross 
     47  1.2  thorpej /* If you need this in a boot program, you have bigger problems... */
     48  1.2  thorpej #ifndef _STANDALONE
     49  1.2  thorpej 
     50  1.1     ross #include <sys/cdefs.h>
     51  1.1     ross #if defined(LIBC_SCCS) && !defined(lint)
     52  1.2  thorpej __RCSID("$NetBSD: softfloat.c,v 1.2 2002/12/05 17:12:06 thorpej Exp $");
     53  1.1     ross #endif /* LIBC_SCCS and not lint */
     54  1.1     ross 
     55  1.1     ross #ifdef SOFTFLOAT_FOR_GCC
     56  1.1     ross #include "softfloat-for-gcc.h"
     57  1.1     ross #endif
     58  1.1     ross 
     59  1.1     ross #include "milieu.h"
     60  1.1     ross #include "softfloat.h"
     61  1.1     ross 
     62  1.1     ross /*
     63  1.1     ross  * Conversions between floats as stored in memory and floats as
     64  1.1     ross  * SoftFloat uses them
     65  1.1     ross  */
     66  1.1     ross #ifndef FLOAT64_DEMANGLE
     67  1.1     ross #define FLOAT64_DEMANGLE(a)	(a)
     68  1.1     ross #endif
     69  1.1     ross #ifndef FLOAT64_MANGLE
     70  1.1     ross #define FLOAT64_MANGLE(a)	(a)
     71  1.1     ross #endif
     72  1.1     ross 
     73  1.1     ross /*
     74  1.1     ross -------------------------------------------------------------------------------
     75  1.1     ross Floating-point rounding mode, extended double-precision rounding precision,
     76  1.1     ross and exception flags.
     77  1.1     ross -------------------------------------------------------------------------------
     78  1.1     ross */
     79  1.1     ross 
     80  1.1     ross /*
     81  1.1     ross  * XXX: This may cause options-MULTIPROCESSOR or thread problems someday.
     82  1.1     ross  * 	Right now, it does not.  I've removed all other dynamic global
     83  1.1     ross  * 	variables. [ross]
     84  1.1     ross  */
     85  1.1     ross #ifdef FLOATX80
     86  1.1     ross int8 floatx80_rounding_precision = 80;
     87  1.1     ross #endif
     88  1.1     ross 
     89  1.1     ross /*
     90  1.1     ross -------------------------------------------------------------------------------
     91  1.1     ross Primitive arithmetic functions, including multi-word arithmetic, and
     92  1.1     ross division and square root approximations.  (Can be specialized to target if
     93  1.1     ross desired.)
     94  1.1     ross -------------------------------------------------------------------------------
     95  1.1     ross */
     96  1.1     ross #include "softfloat-macros.h"
     97  1.1     ross 
     98  1.1     ross /*
     99  1.1     ross -------------------------------------------------------------------------------
    100  1.1     ross Functions and definitions to determine:  (1) whether tininess for underflow
    101  1.1     ross is detected before or after rounding by default, (2) what (if anything)
    102  1.1     ross happens when exceptions are raised, (3) how signaling NaNs are distinguished
    103  1.1     ross from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    104  1.1     ross are propagated from function inputs to output.  These details are target-
    105  1.1     ross specific.
    106  1.1     ross -------------------------------------------------------------------------------
    107  1.1     ross */
    108  1.1     ross #include "softfloat-specialize.h"
    109  1.1     ross 
    110  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not used */
    111  1.1     ross /*
    112  1.1     ross -------------------------------------------------------------------------------
    113  1.1     ross Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
    114  1.1     ross and 7, and returns the properly rounded 32-bit integer corresponding to the
    115  1.1     ross input.  If `zSign' is 1, the input is negated before being converted to an
    116  1.1     ross integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
    117  1.1     ross is simply rounded to an integer, with the inexact exception raised if the
    118  1.1     ross input cannot be represented exactly as an integer.  However, if the fixed-
    119  1.1     ross point input is too large, the invalid exception is raised and the largest
    120  1.1     ross positive or negative integer is returned.
    121  1.1     ross -------------------------------------------------------------------------------
    122  1.1     ross */
    123  1.1     ross static int32 roundAndPackInt32( flag zSign, bits64 absZ )
    124  1.1     ross {
    125  1.1     ross     int8 roundingMode;
    126  1.1     ross     flag roundNearestEven;
    127  1.1     ross     int8 roundIncrement, roundBits;
    128  1.1     ross     int32 z;
    129  1.1     ross 
    130  1.1     ross     roundingMode = float_rounding_mode();
    131  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
    132  1.1     ross     roundIncrement = 0x40;
    133  1.1     ross     if ( ! roundNearestEven ) {
    134  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    135  1.1     ross             roundIncrement = 0;
    136  1.1     ross         }
    137  1.1     ross         else {
    138  1.1     ross             roundIncrement = 0x7F;
    139  1.1     ross             if ( zSign ) {
    140  1.1     ross                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    141  1.1     ross             }
    142  1.1     ross             else {
    143  1.1     ross                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    144  1.1     ross             }
    145  1.1     ross         }
    146  1.1     ross     }
    147  1.1     ross     roundBits = absZ & 0x7F;
    148  1.1     ross     absZ = ( absZ + roundIncrement )>>7;
    149  1.1     ross     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    150  1.1     ross     z = absZ;
    151  1.1     ross     if ( zSign ) z = - z;
    152  1.1     ross     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
    153  1.1     ross         float_raise( float_flag_invalid );
    154  1.1     ross         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
    155  1.1     ross     }
    156  1.1     ross     if ( roundBits ) float_set_inexact();
    157  1.1     ross     return z;
    158  1.1     ross 
    159  1.1     ross }
    160  1.1     ross 
    161  1.1     ross /*
    162  1.1     ross -------------------------------------------------------------------------------
    163  1.1     ross Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
    164  1.1     ross `absZ1', with binary point between bits 63 and 64 (between the input words),
    165  1.1     ross and returns the properly rounded 64-bit integer corresponding to the input.
    166  1.1     ross If `zSign' is 1, the input is negated before being converted to an integer.
    167  1.1     ross Ordinarily, the fixed-point input is simply rounded to an integer, with
    168  1.1     ross the inexact exception raised if the input cannot be represented exactly as
    169  1.1     ross an integer.  However, if the fixed-point input is too large, the invalid
    170  1.1     ross exception is raised and the largest positive or negative integer is
    171  1.1     ross returned.
    172  1.1     ross -------------------------------------------------------------------------------
    173  1.1     ross */
    174  1.1     ross static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
    175  1.1     ross {
    176  1.1     ross     int8 roundingMode;
    177  1.1     ross     flag roundNearestEven, increment;
    178  1.1     ross     int64 z;
    179  1.1     ross 
    180  1.1     ross     roundingMode = float_rounding_mode();
    181  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
    182  1.1     ross     increment = ( (sbits64) absZ1 < 0 );
    183  1.1     ross     if ( ! roundNearestEven ) {
    184  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    185  1.1     ross             increment = 0;
    186  1.1     ross         }
    187  1.1     ross         else {
    188  1.1     ross             if ( zSign ) {
    189  1.1     ross                 increment = ( roundingMode == float_round_down ) && absZ1;
    190  1.1     ross             }
    191  1.1     ross             else {
    192  1.1     ross                 increment = ( roundingMode == float_round_up ) && absZ1;
    193  1.1     ross             }
    194  1.1     ross         }
    195  1.1     ross     }
    196  1.1     ross     if ( increment ) {
    197  1.1     ross         ++absZ0;
    198  1.1     ross         if ( absZ0 == 0 ) goto overflow;
    199  1.1     ross         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
    200  1.1     ross     }
    201  1.1     ross     z = absZ0;
    202  1.1     ross     if ( zSign ) z = - z;
    203  1.1     ross     if ( z && ( ( z < 0 ) ^ zSign ) ) {
    204  1.1     ross  overflow:
    205  1.1     ross         float_raise( float_flag_invalid );
    206  1.1     ross         return
    207  1.1     ross               zSign ? (sbits64) LIT64( 0x8000000000000000 )
    208  1.1     ross             : LIT64( 0x7FFFFFFFFFFFFFFF );
    209  1.1     ross     }
    210  1.1     ross     if ( absZ1 ) float_set_inexact();
    211  1.1     ross     return z;
    212  1.1     ross 
    213  1.1     ross }
    214  1.1     ross #endif
    215  1.1     ross 
    216  1.1     ross /*
    217  1.1     ross -------------------------------------------------------------------------------
    218  1.1     ross Returns the fraction bits of the single-precision floating-point value `a'.
    219  1.1     ross -------------------------------------------------------------------------------
    220  1.1     ross */
    221  1.1     ross INLINE bits32 extractFloat32Frac( float32 a )
    222  1.1     ross {
    223  1.1     ross 
    224  1.1     ross     return a & 0x007FFFFF;
    225  1.1     ross 
    226  1.1     ross }
    227  1.1     ross 
    228  1.1     ross /*
    229  1.1     ross -------------------------------------------------------------------------------
    230  1.1     ross Returns the exponent bits of the single-precision floating-point value `a'.
    231  1.1     ross -------------------------------------------------------------------------------
    232  1.1     ross */
    233  1.1     ross INLINE int16 extractFloat32Exp( float32 a )
    234  1.1     ross {
    235  1.1     ross 
    236  1.1     ross     return ( a>>23 ) & 0xFF;
    237  1.1     ross 
    238  1.1     ross }
    239  1.1     ross 
    240  1.1     ross /*
    241  1.1     ross -------------------------------------------------------------------------------
    242  1.1     ross Returns the sign bit of the single-precision floating-point value `a'.
    243  1.1     ross -------------------------------------------------------------------------------
    244  1.1     ross */
    245  1.1     ross INLINE flag extractFloat32Sign( float32 a )
    246  1.1     ross {
    247  1.1     ross 
    248  1.1     ross     return a>>31;
    249  1.1     ross 
    250  1.1     ross }
    251  1.1     ross 
    252  1.1     ross /*
    253  1.1     ross -------------------------------------------------------------------------------
    254  1.1     ross Normalizes the subnormal single-precision floating-point value represented
    255  1.1     ross by the denormalized significand `aSig'.  The normalized exponent and
    256  1.1     ross significand are stored at the locations pointed to by `zExpPtr' and
    257  1.1     ross `zSigPtr', respectively.
    258  1.1     ross -------------------------------------------------------------------------------
    259  1.1     ross */
    260  1.1     ross static void
    261  1.1     ross  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    262  1.1     ross {
    263  1.1     ross     int8 shiftCount;
    264  1.1     ross 
    265  1.1     ross     shiftCount = countLeadingZeros32( aSig ) - 8;
    266  1.1     ross     *zSigPtr = aSig<<shiftCount;
    267  1.1     ross     *zExpPtr = 1 - shiftCount;
    268  1.1     ross 
    269  1.1     ross }
    270  1.1     ross 
    271  1.1     ross /*
    272  1.1     ross -------------------------------------------------------------------------------
    273  1.1     ross Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    274  1.1     ross single-precision floating-point value, returning the result.  After being
    275  1.1     ross shifted into the proper positions, the three fields are simply added
    276  1.1     ross together to form the result.  This means that any integer portion of `zSig'
    277  1.1     ross will be added into the exponent.  Since a properly normalized significand
    278  1.1     ross will have an integer portion equal to 1, the `zExp' input should be 1 less
    279  1.1     ross than the desired result exponent whenever `zSig' is a complete, normalized
    280  1.1     ross significand.
    281  1.1     ross -------------------------------------------------------------------------------
    282  1.1     ross */
    283  1.1     ross INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    284  1.1     ross {
    285  1.1     ross 
    286  1.1     ross     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    287  1.1     ross 
    288  1.1     ross }
    289  1.1     ross 
    290  1.1     ross /*
    291  1.1     ross -------------------------------------------------------------------------------
    292  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    293  1.1     ross and significand `zSig', and returns the proper single-precision floating-
    294  1.1     ross point value corresponding to the abstract input.  Ordinarily, the abstract
    295  1.1     ross value is simply rounded and packed into the single-precision format, with
    296  1.1     ross the inexact exception raised if the abstract input cannot be represented
    297  1.1     ross exactly.  However, if the abstract value is too large, the overflow and
    298  1.1     ross inexact exceptions are raised and an infinity or maximal finite value is
    299  1.1     ross returned.  If the abstract value is too small, the input value is rounded to
    300  1.1     ross a subnormal number, and the underflow and inexact exceptions are raised if
    301  1.1     ross the abstract input cannot be represented exactly as a subnormal single-
    302  1.1     ross precision floating-point number.
    303  1.1     ross     The input significand `zSig' has its binary point between bits 30
    304  1.1     ross and 29, which is 7 bits to the left of the usual location.  This shifted
    305  1.1     ross significand must be normalized or smaller.  If `zSig' is not normalized,
    306  1.1     ross `zExp' must be 0; in that case, the result returned is a subnormal number,
    307  1.1     ross and it must not require rounding.  In the usual case that `zSig' is
    308  1.1     ross normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    309  1.1     ross The handling of underflow and overflow follows the IEC/IEEE Standard for
    310  1.1     ross Binary Floating-Point Arithmetic.
    311  1.1     ross -------------------------------------------------------------------------------
    312  1.1     ross */
    313  1.1     ross static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    314  1.1     ross {
    315  1.1     ross     int8 roundingMode;
    316  1.1     ross     flag roundNearestEven;
    317  1.1     ross     int8 roundIncrement, roundBits;
    318  1.1     ross     flag isTiny;
    319  1.1     ross 
    320  1.1     ross     roundingMode = float_rounding_mode();
    321  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
    322  1.1     ross     roundIncrement = 0x40;
    323  1.1     ross     if ( ! roundNearestEven ) {
    324  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    325  1.1     ross             roundIncrement = 0;
    326  1.1     ross         }
    327  1.1     ross         else {
    328  1.1     ross             roundIncrement = 0x7F;
    329  1.1     ross             if ( zSign ) {
    330  1.1     ross                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    331  1.1     ross             }
    332  1.1     ross             else {
    333  1.1     ross                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    334  1.1     ross             }
    335  1.1     ross         }
    336  1.1     ross     }
    337  1.1     ross     roundBits = zSig & 0x7F;
    338  1.1     ross     if ( 0xFD <= (bits16) zExp ) {
    339  1.1     ross         if (    ( 0xFD < zExp )
    340  1.1     ross              || (    ( zExp == 0xFD )
    341  1.1     ross                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    342  1.1     ross            ) {
    343  1.1     ross             float_raise( float_flag_overflow | float_flag_inexact );
    344  1.1     ross             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    345  1.1     ross         }
    346  1.1     ross         if ( zExp < 0 ) {
    347  1.1     ross             isTiny =
    348  1.1     ross                    ( float_detect_tininess == float_tininess_before_rounding )
    349  1.1     ross                 || ( zExp < -1 )
    350  1.1     ross                 || ( zSig + roundIncrement < 0x80000000 );
    351  1.1     ross             shift32RightJamming( zSig, - zExp, &zSig );
    352  1.1     ross             zExp = 0;
    353  1.1     ross             roundBits = zSig & 0x7F;
    354  1.1     ross             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    355  1.1     ross         }
    356  1.1     ross     }
    357  1.1     ross     if ( roundBits ) float_set_inexact();
    358  1.1     ross     zSig = ( zSig + roundIncrement )>>7;
    359  1.1     ross     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    360  1.1     ross     if ( zSig == 0 ) zExp = 0;
    361  1.1     ross     return packFloat32( zSign, zExp, zSig );
    362  1.1     ross 
    363  1.1     ross }
    364  1.1     ross 
    365  1.1     ross /*
    366  1.1     ross -------------------------------------------------------------------------------
    367  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    368  1.1     ross and significand `zSig', and returns the proper single-precision floating-
    369  1.1     ross point value corresponding to the abstract input.  This routine is just like
    370  1.1     ross `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    371  1.1     ross Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    372  1.1     ross floating-point exponent.
    373  1.1     ross -------------------------------------------------------------------------------
    374  1.1     ross */
    375  1.1     ross static float32
    376  1.1     ross  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    377  1.1     ross {
    378  1.1     ross     int8 shiftCount;
    379  1.1     ross 
    380  1.1     ross     shiftCount = countLeadingZeros32( zSig ) - 1;
    381  1.1     ross     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    382  1.1     ross 
    383  1.1     ross }
    384  1.1     ross 
    385  1.1     ross /*
    386  1.1     ross -------------------------------------------------------------------------------
    387  1.1     ross Returns the fraction bits of the double-precision floating-point value `a'.
    388  1.1     ross -------------------------------------------------------------------------------
    389  1.1     ross */
    390  1.1     ross INLINE bits64 extractFloat64Frac( float64 a )
    391  1.1     ross {
    392  1.1     ross 
    393  1.1     ross     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
    394  1.1     ross 
    395  1.1     ross }
    396  1.1     ross 
    397  1.1     ross /*
    398  1.1     ross -------------------------------------------------------------------------------
    399  1.1     ross Returns the exponent bits of the double-precision floating-point value `a'.
    400  1.1     ross -------------------------------------------------------------------------------
    401  1.1     ross */
    402  1.1     ross INLINE int16 extractFloat64Exp( float64 a )
    403  1.1     ross {
    404  1.1     ross 
    405  1.1     ross     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
    406  1.1     ross 
    407  1.1     ross }
    408  1.1     ross 
    409  1.1     ross /*
    410  1.1     ross -------------------------------------------------------------------------------
    411  1.1     ross Returns the sign bit of the double-precision floating-point value `a'.
    412  1.1     ross -------------------------------------------------------------------------------
    413  1.1     ross */
    414  1.1     ross INLINE flag extractFloat64Sign( float64 a )
    415  1.1     ross {
    416  1.1     ross 
    417  1.1     ross     return FLOAT64_DEMANGLE(a)>>63;
    418  1.1     ross 
    419  1.1     ross }
    420  1.1     ross 
    421  1.1     ross /*
    422  1.1     ross -------------------------------------------------------------------------------
    423  1.1     ross Normalizes the subnormal double-precision floating-point value represented
    424  1.1     ross by the denormalized significand `aSig'.  The normalized exponent and
    425  1.1     ross significand are stored at the locations pointed to by `zExpPtr' and
    426  1.1     ross `zSigPtr', respectively.
    427  1.1     ross -------------------------------------------------------------------------------
    428  1.1     ross */
    429  1.1     ross static void
    430  1.1     ross  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
    431  1.1     ross {
    432  1.1     ross     int8 shiftCount;
    433  1.1     ross 
    434  1.1     ross     shiftCount = countLeadingZeros64( aSig ) - 11;
    435  1.1     ross     *zSigPtr = aSig<<shiftCount;
    436  1.1     ross     *zExpPtr = 1 - shiftCount;
    437  1.1     ross 
    438  1.1     ross }
    439  1.1     ross 
    440  1.1     ross /*
    441  1.1     ross -------------------------------------------------------------------------------
    442  1.1     ross Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    443  1.1     ross double-precision floating-point value, returning the result.  After being
    444  1.1     ross shifted into the proper positions, the three fields are simply added
    445  1.1     ross together to form the result.  This means that any integer portion of `zSig'
    446  1.1     ross will be added into the exponent.  Since a properly normalized significand
    447  1.1     ross will have an integer portion equal to 1, the `zExp' input should be 1 less
    448  1.1     ross than the desired result exponent whenever `zSig' is a complete, normalized
    449  1.1     ross significand.
    450  1.1     ross -------------------------------------------------------------------------------
    451  1.1     ross */
    452  1.1     ross INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
    453  1.1     ross {
    454  1.1     ross 
    455  1.1     ross     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    456  1.1     ross 			   ( ( (bits64) zExp )<<52 ) + zSig );
    457  1.1     ross 
    458  1.1     ross }
    459  1.1     ross 
    460  1.1     ross /*
    461  1.1     ross -------------------------------------------------------------------------------
    462  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    463  1.1     ross and significand `zSig', and returns the proper double-precision floating-
    464  1.1     ross point value corresponding to the abstract input.  Ordinarily, the abstract
    465  1.1     ross value is simply rounded and packed into the double-precision format, with
    466  1.1     ross the inexact exception raised if the abstract input cannot be represented
    467  1.1     ross exactly.  However, if the abstract value is too large, the overflow and
    468  1.1     ross inexact exceptions are raised and an infinity or maximal finite value is
    469  1.1     ross returned.  If the abstract value is too small, the input value is rounded to
    470  1.1     ross a subnormal number, and the underflow and inexact exceptions are raised if
    471  1.1     ross the abstract input cannot be represented exactly as a subnormal double-
    472  1.1     ross precision floating-point number.
    473  1.1     ross     The input significand `zSig' has its binary point between bits 62
    474  1.1     ross and 61, which is 10 bits to the left of the usual location.  This shifted
    475  1.1     ross significand must be normalized or smaller.  If `zSig' is not normalized,
    476  1.1     ross `zExp' must be 0; in that case, the result returned is a subnormal number,
    477  1.1     ross and it must not require rounding.  In the usual case that `zSig' is
    478  1.1     ross normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    479  1.1     ross The handling of underflow and overflow follows the IEC/IEEE Standard for
    480  1.1     ross Binary Floating-Point Arithmetic.
    481  1.1     ross -------------------------------------------------------------------------------
    482  1.1     ross */
    483  1.1     ross static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    484  1.1     ross {
    485  1.1     ross     int8 roundingMode;
    486  1.1     ross     flag roundNearestEven;
    487  1.1     ross     int16 roundIncrement, roundBits;
    488  1.1     ross     flag isTiny;
    489  1.1     ross 
    490  1.1     ross     roundingMode = float_rounding_mode();
    491  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
    492  1.1     ross     roundIncrement = 0x200;
    493  1.1     ross     if ( ! roundNearestEven ) {
    494  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    495  1.1     ross             roundIncrement = 0;
    496  1.1     ross         }
    497  1.1     ross         else {
    498  1.1     ross             roundIncrement = 0x3FF;
    499  1.1     ross             if ( zSign ) {
    500  1.1     ross                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    501  1.1     ross             }
    502  1.1     ross             else {
    503  1.1     ross                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    504  1.1     ross             }
    505  1.1     ross         }
    506  1.1     ross     }
    507  1.1     ross     roundBits = zSig & 0x3FF;
    508  1.1     ross     if ( 0x7FD <= (bits16) zExp ) {
    509  1.1     ross         if (    ( 0x7FD < zExp )
    510  1.1     ross              || (    ( zExp == 0x7FD )
    511  1.1     ross                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
    512  1.1     ross            ) {
    513  1.1     ross             float_raise( float_flag_overflow | float_flag_inexact );
    514  1.1     ross             return FLOAT64_MANGLE(
    515  1.1     ross 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
    516  1.1     ross 		( roundIncrement == 0 ));
    517  1.1     ross         }
    518  1.1     ross         if ( zExp < 0 ) {
    519  1.1     ross             isTiny =
    520  1.1     ross                    ( float_detect_tininess == float_tininess_before_rounding )
    521  1.1     ross                 || ( zExp < -1 )
    522  1.1     ross                 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
    523  1.1     ross             shift64RightJamming( zSig, - zExp, &zSig );
    524  1.1     ross             zExp = 0;
    525  1.1     ross             roundBits = zSig & 0x3FF;
    526  1.1     ross             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    527  1.1     ross         }
    528  1.1     ross     }
    529  1.1     ross     if ( roundBits ) float_set_inexact();
    530  1.1     ross     zSig = ( zSig + roundIncrement )>>10;
    531  1.1     ross     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
    532  1.1     ross     if ( zSig == 0 ) zExp = 0;
    533  1.1     ross     return packFloat64( zSign, zExp, zSig );
    534  1.1     ross 
    535  1.1     ross }
    536  1.1     ross 
    537  1.1     ross /*
    538  1.1     ross -------------------------------------------------------------------------------
    539  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    540  1.1     ross and significand `zSig', and returns the proper double-precision floating-
    541  1.1     ross point value corresponding to the abstract input.  This routine is just like
    542  1.1     ross `roundAndPackFloat64' except that `zSig' does not have to be normalized.
    543  1.1     ross Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    544  1.1     ross floating-point exponent.
    545  1.1     ross -------------------------------------------------------------------------------
    546  1.1     ross */
    547  1.1     ross static float64
    548  1.1     ross  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    549  1.1     ross {
    550  1.1     ross     int8 shiftCount;
    551  1.1     ross 
    552  1.1     ross     shiftCount = countLeadingZeros64( zSig ) - 1;
    553  1.1     ross     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
    554  1.1     ross 
    555  1.1     ross }
    556  1.1     ross 
    557  1.1     ross #ifdef FLOATX80
    558  1.1     ross 
    559  1.1     ross /*
    560  1.1     ross -------------------------------------------------------------------------------
    561  1.1     ross Returns the fraction bits of the extended double-precision floating-point
    562  1.1     ross value `a'.
    563  1.1     ross -------------------------------------------------------------------------------
    564  1.1     ross */
    565  1.1     ross INLINE bits64 extractFloatx80Frac( floatx80 a )
    566  1.1     ross {
    567  1.1     ross 
    568  1.1     ross     return a.low;
    569  1.1     ross 
    570  1.1     ross }
    571  1.1     ross 
    572  1.1     ross /*
    573  1.1     ross -------------------------------------------------------------------------------
    574  1.1     ross Returns the exponent bits of the extended double-precision floating-point
    575  1.1     ross value `a'.
    576  1.1     ross -------------------------------------------------------------------------------
    577  1.1     ross */
    578  1.1     ross INLINE int32 extractFloatx80Exp( floatx80 a )
    579  1.1     ross {
    580  1.1     ross 
    581  1.1     ross     return a.high & 0x7FFF;
    582  1.1     ross 
    583  1.1     ross }
    584  1.1     ross 
    585  1.1     ross /*
    586  1.1     ross -------------------------------------------------------------------------------
    587  1.1     ross Returns the sign bit of the extended double-precision floating-point value
    588  1.1     ross `a'.
    589  1.1     ross -------------------------------------------------------------------------------
    590  1.1     ross */
    591  1.1     ross INLINE flag extractFloatx80Sign( floatx80 a )
    592  1.1     ross {
    593  1.1     ross 
    594  1.1     ross     return a.high>>15;
    595  1.1     ross 
    596  1.1     ross }
    597  1.1     ross 
    598  1.1     ross /*
    599  1.1     ross -------------------------------------------------------------------------------
    600  1.1     ross Normalizes the subnormal extended double-precision floating-point value
    601  1.1     ross represented by the denormalized significand `aSig'.  The normalized exponent
    602  1.1     ross and significand are stored at the locations pointed to by `zExpPtr' and
    603  1.1     ross `zSigPtr', respectively.
    604  1.1     ross -------------------------------------------------------------------------------
    605  1.1     ross */
    606  1.1     ross static void
    607  1.1     ross  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
    608  1.1     ross {
    609  1.1     ross     int8 shiftCount;
    610  1.1     ross 
    611  1.1     ross     shiftCount = countLeadingZeros64( aSig );
    612  1.1     ross     *zSigPtr = aSig<<shiftCount;
    613  1.1     ross     *zExpPtr = 1 - shiftCount;
    614  1.1     ross 
    615  1.1     ross }
    616  1.1     ross 
    617  1.1     ross /*
    618  1.1     ross -------------------------------------------------------------------------------
    619  1.1     ross Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
    620  1.1     ross extended double-precision floating-point value, returning the result.
    621  1.1     ross -------------------------------------------------------------------------------
    622  1.1     ross */
    623  1.1     ross INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
    624  1.1     ross {
    625  1.1     ross     floatx80 z;
    626  1.1     ross 
    627  1.1     ross     z.low = zSig;
    628  1.1     ross     z.high = ( ( (bits16) zSign )<<15 ) + zExp;
    629  1.1     ross     return z;
    630  1.1     ross 
    631  1.1     ross }
    632  1.1     ross 
    633  1.1     ross /*
    634  1.1     ross -------------------------------------------------------------------------------
    635  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    636  1.1     ross and extended significand formed by the concatenation of `zSig0' and `zSig1',
    637  1.1     ross and returns the proper extended double-precision floating-point value
    638  1.1     ross corresponding to the abstract input.  Ordinarily, the abstract value is
    639  1.1     ross rounded and packed into the extended double-precision format, with the
    640  1.1     ross inexact exception raised if the abstract input cannot be represented
    641  1.1     ross exactly.  However, if the abstract value is too large, the overflow and
    642  1.1     ross inexact exceptions are raised and an infinity or maximal finite value is
    643  1.1     ross returned.  If the abstract value is too small, the input value is rounded to
    644  1.1     ross a subnormal number, and the underflow and inexact exceptions are raised if
    645  1.1     ross the abstract input cannot be represented exactly as a subnormal extended
    646  1.1     ross double-precision floating-point number.
    647  1.1     ross     If `roundingPrecision' is 32 or 64, the result is rounded to the same
    648  1.1     ross number of bits as single or double precision, respectively.  Otherwise, the
    649  1.1     ross result is rounded to the full precision of the extended double-precision
    650  1.1     ross format.
    651  1.1     ross     The input significand must be normalized or smaller.  If the input
    652  1.1     ross significand is not normalized, `zExp' must be 0; in that case, the result
    653  1.1     ross returned is a subnormal number, and it must not require rounding.  The
    654  1.1     ross handling of underflow and overflow follows the IEC/IEEE Standard for Binary
    655  1.1     ross Floating-Point Arithmetic.
    656  1.1     ross -------------------------------------------------------------------------------
    657  1.1     ross */
    658  1.1     ross static floatx80
    659  1.1     ross  roundAndPackFloatx80(
    660  1.1     ross      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    661  1.1     ross  )
    662  1.1     ross {
    663  1.1     ross     int8 roundingMode;
    664  1.1     ross     flag roundNearestEven, increment, isTiny;
    665  1.1     ross     int64 roundIncrement, roundMask, roundBits;
    666  1.1     ross 
    667  1.1     ross     roundingMode = float_rounding_mode();
    668  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
    669  1.1     ross     if ( roundingPrecision == 80 ) goto precision80;
    670  1.1     ross     if ( roundingPrecision == 64 ) {
    671  1.1     ross         roundIncrement = LIT64( 0x0000000000000400 );
    672  1.1     ross         roundMask = LIT64( 0x00000000000007FF );
    673  1.1     ross     }
    674  1.1     ross     else if ( roundingPrecision == 32 ) {
    675  1.1     ross         roundIncrement = LIT64( 0x0000008000000000 );
    676  1.1     ross         roundMask = LIT64( 0x000000FFFFFFFFFF );
    677  1.1     ross     }
    678  1.1     ross     else {
    679  1.1     ross         goto precision80;
    680  1.1     ross     }
    681  1.1     ross     zSig0 |= ( zSig1 != 0 );
    682  1.1     ross     if ( ! roundNearestEven ) {
    683  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    684  1.1     ross             roundIncrement = 0;
    685  1.1     ross         }
    686  1.1     ross         else {
    687  1.1     ross             roundIncrement = roundMask;
    688  1.1     ross             if ( zSign ) {
    689  1.1     ross                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    690  1.1     ross             }
    691  1.1     ross             else {
    692  1.1     ross                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    693  1.1     ross             }
    694  1.1     ross         }
    695  1.1     ross     }
    696  1.1     ross     roundBits = zSig0 & roundMask;
    697  1.1     ross     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    698  1.1     ross         if (    ( 0x7FFE < zExp )
    699  1.1     ross              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
    700  1.1     ross            ) {
    701  1.1     ross             goto overflow;
    702  1.1     ross         }
    703  1.1     ross         if ( zExp <= 0 ) {
    704  1.1     ross             isTiny =
    705  1.1     ross                    ( float_detect_tininess == float_tininess_before_rounding )
    706  1.1     ross                 || ( zExp < 0 )
    707  1.1     ross                 || ( zSig0 <= zSig0 + roundIncrement );
    708  1.1     ross             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
    709  1.1     ross             zExp = 0;
    710  1.1     ross             roundBits = zSig0 & roundMask;
    711  1.1     ross             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    712  1.1     ross             if ( roundBits ) float_set_inexact();
    713  1.1     ross             zSig0 += roundIncrement;
    714  1.1     ross             if ( (sbits64) zSig0 < 0 ) zExp = 1;
    715  1.1     ross             roundIncrement = roundMask + 1;
    716  1.1     ross             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    717  1.1     ross                 roundMask |= roundIncrement;
    718  1.1     ross             }
    719  1.1     ross             zSig0 &= ~ roundMask;
    720  1.1     ross             return packFloatx80( zSign, zExp, zSig0 );
    721  1.1     ross         }
    722  1.1     ross     }
    723  1.1     ross     if ( roundBits ) float_set_inexact();
    724  1.1     ross     zSig0 += roundIncrement;
    725  1.1     ross     if ( zSig0 < roundIncrement ) {
    726  1.1     ross         ++zExp;
    727  1.1     ross         zSig0 = LIT64( 0x8000000000000000 );
    728  1.1     ross     }
    729  1.1     ross     roundIncrement = roundMask + 1;
    730  1.1     ross     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    731  1.1     ross         roundMask |= roundIncrement;
    732  1.1     ross     }
    733  1.1     ross     zSig0 &= ~ roundMask;
    734  1.1     ross     if ( zSig0 == 0 ) zExp = 0;
    735  1.1     ross     return packFloatx80( zSign, zExp, zSig0 );
    736  1.1     ross  precision80:
    737  1.1     ross     increment = ( (sbits64) zSig1 < 0 );
    738  1.1     ross     if ( ! roundNearestEven ) {
    739  1.1     ross         if ( roundingMode == float_round_to_zero ) {
    740  1.1     ross             increment = 0;
    741  1.1     ross         }
    742  1.1     ross         else {
    743  1.1     ross             if ( zSign ) {
    744  1.1     ross                 increment = ( roundingMode == float_round_down ) && zSig1;
    745  1.1     ross             }
    746  1.1     ross             else {
    747  1.1     ross                 increment = ( roundingMode == float_round_up ) && zSig1;
    748  1.1     ross             }
    749  1.1     ross         }
    750  1.1     ross     }
    751  1.1     ross     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    752  1.1     ross         if (    ( 0x7FFE < zExp )
    753  1.1     ross              || (    ( zExp == 0x7FFE )
    754  1.1     ross                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
    755  1.1     ross                   && increment
    756  1.1     ross                 )
    757  1.1     ross            ) {
    758  1.1     ross             roundMask = 0;
    759  1.1     ross  overflow:
    760  1.1     ross             float_raise( float_flag_overflow | float_flag_inexact );
    761  1.1     ross             if (    ( roundingMode == float_round_to_zero )
    762  1.1     ross                  || ( zSign && ( roundingMode == float_round_up ) )
    763  1.1     ross                  || ( ! zSign && ( roundingMode == float_round_down ) )
    764  1.1     ross                ) {
    765  1.1     ross                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
    766  1.1     ross             }
    767  1.1     ross             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
    768  1.1     ross         }
    769  1.1     ross         if ( zExp <= 0 ) {
    770  1.1     ross             isTiny =
    771  1.1     ross                    ( float_detect_tininess == float_tininess_before_rounding )
    772  1.1     ross                 || ( zExp < 0 )
    773  1.1     ross                 || ! increment
    774  1.1     ross                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
    775  1.1     ross             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
    776  1.1     ross             zExp = 0;
    777  1.1     ross             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
    778  1.1     ross             if ( zSig1 ) float_set_inexact();
    779  1.1     ross             if ( roundNearestEven ) {
    780  1.1     ross                 increment = ( (sbits64) zSig1 < 0 );
    781  1.1     ross             }
    782  1.1     ross             else {
    783  1.1     ross                 if ( zSign ) {
    784  1.1     ross                     increment = ( roundingMode == float_round_down ) && zSig1;
    785  1.1     ross                 }
    786  1.1     ross                 else {
    787  1.1     ross                     increment = ( roundingMode == float_round_up ) && zSig1;
    788  1.1     ross                 }
    789  1.1     ross             }
    790  1.1     ross             if ( increment ) {
    791  1.1     ross                 ++zSig0;
    792  1.1     ross                 zSig0 &=
    793  1.1     ross                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    794  1.1     ross                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
    795  1.1     ross             }
    796  1.1     ross             return packFloatx80( zSign, zExp, zSig0 );
    797  1.1     ross         }
    798  1.1     ross     }
    799  1.1     ross     if ( zSig1 ) float_set_inexact();
    800  1.1     ross     if ( increment ) {
    801  1.1     ross         ++zSig0;
    802  1.1     ross         if ( zSig0 == 0 ) {
    803  1.1     ross             ++zExp;
    804  1.1     ross             zSig0 = LIT64( 0x8000000000000000 );
    805  1.1     ross         }
    806  1.1     ross         else {
    807  1.1     ross             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    808  1.1     ross         }
    809  1.1     ross     }
    810  1.1     ross     else {
    811  1.1     ross         if ( zSig0 == 0 ) zExp = 0;
    812  1.1     ross     }
    813  1.1     ross     return packFloatx80( zSign, zExp, zSig0 );
    814  1.1     ross 
    815  1.1     ross }
    816  1.1     ross 
    817  1.1     ross /*
    818  1.1     ross -------------------------------------------------------------------------------
    819  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent
    820  1.1     ross `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
    821  1.1     ross and returns the proper extended double-precision floating-point value
    822  1.1     ross corresponding to the abstract input.  This routine is just like
    823  1.1     ross `roundAndPackFloatx80' except that the input significand does not have to be
    824  1.1     ross normalized.
    825  1.1     ross -------------------------------------------------------------------------------
    826  1.1     ross */
    827  1.1     ross static floatx80
    828  1.1     ross  normalizeRoundAndPackFloatx80(
    829  1.1     ross      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    830  1.1     ross  )
    831  1.1     ross {
    832  1.1     ross     int8 shiftCount;
    833  1.1     ross 
    834  1.1     ross     if ( zSig0 == 0 ) {
    835  1.1     ross         zSig0 = zSig1;
    836  1.1     ross         zSig1 = 0;
    837  1.1     ross         zExp -= 64;
    838  1.1     ross     }
    839  1.1     ross     shiftCount = countLeadingZeros64( zSig0 );
    840  1.1     ross     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    841  1.1     ross     zExp -= shiftCount;
    842  1.1     ross     return
    843  1.1     ross         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
    844  1.1     ross 
    845  1.1     ross }
    846  1.1     ross 
    847  1.1     ross #endif
    848  1.1     ross 
    849  1.1     ross #ifdef FLOAT128
    850  1.1     ross 
    851  1.1     ross /*
    852  1.1     ross -------------------------------------------------------------------------------
    853  1.1     ross Returns the least-significant 64 fraction bits of the quadruple-precision
    854  1.1     ross floating-point value `a'.
    855  1.1     ross -------------------------------------------------------------------------------
    856  1.1     ross */
    857  1.1     ross INLINE bits64 extractFloat128Frac1( float128 a )
    858  1.1     ross {
    859  1.1     ross 
    860  1.1     ross     return a.low;
    861  1.1     ross 
    862  1.1     ross }
    863  1.1     ross 
    864  1.1     ross /*
    865  1.1     ross -------------------------------------------------------------------------------
    866  1.1     ross Returns the most-significant 48 fraction bits of the quadruple-precision
    867  1.1     ross floating-point value `a'.
    868  1.1     ross -------------------------------------------------------------------------------
    869  1.1     ross */
    870  1.1     ross INLINE bits64 extractFloat128Frac0( float128 a )
    871  1.1     ross {
    872  1.1     ross 
    873  1.1     ross     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
    874  1.1     ross 
    875  1.1     ross }
    876  1.1     ross 
    877  1.1     ross /*
    878  1.1     ross -------------------------------------------------------------------------------
    879  1.1     ross Returns the exponent bits of the quadruple-precision floating-point value
    880  1.1     ross `a'.
    881  1.1     ross -------------------------------------------------------------------------------
    882  1.1     ross */
    883  1.1     ross INLINE int32 extractFloat128Exp( float128 a )
    884  1.1     ross {
    885  1.1     ross 
    886  1.1     ross     return ( a.high>>48 ) & 0x7FFF;
    887  1.1     ross 
    888  1.1     ross }
    889  1.1     ross 
    890  1.1     ross /*
    891  1.1     ross -------------------------------------------------------------------------------
    892  1.1     ross Returns the sign bit of the quadruple-precision floating-point value `a'.
    893  1.1     ross -------------------------------------------------------------------------------
    894  1.1     ross */
    895  1.1     ross INLINE flag extractFloat128Sign( float128 a )
    896  1.1     ross {
    897  1.1     ross 
    898  1.1     ross     return a.high>>63;
    899  1.1     ross 
    900  1.1     ross }
    901  1.1     ross 
    902  1.1     ross /*
    903  1.1     ross -------------------------------------------------------------------------------
    904  1.1     ross Normalizes the subnormal quadruple-precision floating-point value
    905  1.1     ross represented by the denormalized significand formed by the concatenation of
    906  1.1     ross `aSig0' and `aSig1'.  The normalized exponent is stored at the location
    907  1.1     ross pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
    908  1.1     ross significand are stored at the location pointed to by `zSig0Ptr', and the
    909  1.1     ross least significant 64 bits of the normalized significand are stored at the
    910  1.1     ross location pointed to by `zSig1Ptr'.
    911  1.1     ross -------------------------------------------------------------------------------
    912  1.1     ross */
    913  1.1     ross static void
    914  1.1     ross  normalizeFloat128Subnormal(
    915  1.1     ross      bits64 aSig0,
    916  1.1     ross      bits64 aSig1,
    917  1.1     ross      int32 *zExpPtr,
    918  1.1     ross      bits64 *zSig0Ptr,
    919  1.1     ross      bits64 *zSig1Ptr
    920  1.1     ross  )
    921  1.1     ross {
    922  1.1     ross     int8 shiftCount;
    923  1.1     ross 
    924  1.1     ross     if ( aSig0 == 0 ) {
    925  1.1     ross         shiftCount = countLeadingZeros64( aSig1 ) - 15;
    926  1.1     ross         if ( shiftCount < 0 ) {
    927  1.1     ross             *zSig0Ptr = aSig1>>( - shiftCount );
    928  1.1     ross             *zSig1Ptr = aSig1<<( shiftCount & 63 );
    929  1.1     ross         }
    930  1.1     ross         else {
    931  1.1     ross             *zSig0Ptr = aSig1<<shiftCount;
    932  1.1     ross             *zSig1Ptr = 0;
    933  1.1     ross         }
    934  1.1     ross         *zExpPtr = - shiftCount - 63;
    935  1.1     ross     }
    936  1.1     ross     else {
    937  1.1     ross         shiftCount = countLeadingZeros64( aSig0 ) - 15;
    938  1.1     ross         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    939  1.1     ross         *zExpPtr = 1 - shiftCount;
    940  1.1     ross     }
    941  1.1     ross 
    942  1.1     ross }
    943  1.1     ross 
    944  1.1     ross /*
    945  1.1     ross -------------------------------------------------------------------------------
    946  1.1     ross Packs the sign `zSign', the exponent `zExp', and the significand formed
    947  1.1     ross by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
    948  1.1     ross floating-point value, returning the result.  After being shifted into the
    949  1.1     ross proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
    950  1.1     ross added together to form the most significant 32 bits of the result.  This
    951  1.1     ross means that any integer portion of `zSig0' will be added into the exponent.
    952  1.1     ross Since a properly normalized significand will have an integer portion equal
    953  1.1     ross to 1, the `zExp' input should be 1 less than the desired result exponent
    954  1.1     ross whenever `zSig0' and `zSig1' concatenated form a complete, normalized
    955  1.1     ross significand.
    956  1.1     ross -------------------------------------------------------------------------------
    957  1.1     ross */
    958  1.1     ross INLINE float128
    959  1.1     ross  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
    960  1.1     ross {
    961  1.1     ross     float128 z;
    962  1.1     ross 
    963  1.1     ross     z.low = zSig1;
    964  1.1     ross     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    965  1.1     ross     return z;
    966  1.1     ross 
    967  1.1     ross }
    968  1.1     ross 
    969  1.1     ross /*
    970  1.1     ross -------------------------------------------------------------------------------
    971  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    972  1.1     ross and extended significand formed by the concatenation of `zSig0', `zSig1',
    973  1.1     ross and `zSig2', and returns the proper quadruple-precision floating-point value
    974  1.1     ross corresponding to the abstract input.  Ordinarily, the abstract value is
    975  1.1     ross simply rounded and packed into the quadruple-precision format, with the
    976  1.1     ross inexact exception raised if the abstract input cannot be represented
    977  1.1     ross exactly.  However, if the abstract value is too large, the overflow and
    978  1.1     ross inexact exceptions are raised and an infinity or maximal finite value is
    979  1.1     ross returned.  If the abstract value is too small, the input value is rounded to
    980  1.1     ross a subnormal number, and the underflow and inexact exceptions are raised if
    981  1.1     ross the abstract input cannot be represented exactly as a subnormal quadruple-
    982  1.1     ross precision floating-point number.
    983  1.1     ross     The input significand must be normalized or smaller.  If the input
    984  1.1     ross significand is not normalized, `zExp' must be 0; in that case, the result
    985  1.1     ross returned is a subnormal number, and it must not require rounding.  In the
    986  1.1     ross usual case that the input significand is normalized, `zExp' must be 1 less
    987  1.1     ross than the ``true'' floating-point exponent.  The handling of underflow and
    988  1.1     ross overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    989  1.1     ross -------------------------------------------------------------------------------
    990  1.1     ross */
    991  1.1     ross static float128
    992  1.1     ross  roundAndPackFloat128(
    993  1.1     ross      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
    994  1.1     ross {
    995  1.1     ross     int8 roundingMode;
    996  1.1     ross     flag roundNearestEven, increment, isTiny;
    997  1.1     ross 
    998  1.1     ross     roundingMode = float_rounding_mode();
    999  1.1     ross     roundNearestEven = ( roundingMode == float_round_nearest_even );
   1000  1.1     ross     increment = ( (sbits64) zSig2 < 0 );
   1001  1.1     ross     if ( ! roundNearestEven ) {
   1002  1.1     ross         if ( roundingMode == float_round_to_zero ) {
   1003  1.1     ross             increment = 0;
   1004  1.1     ross         }
   1005  1.1     ross         else {
   1006  1.1     ross             if ( zSign ) {
   1007  1.1     ross                 increment = ( roundingMode == float_round_down ) && zSig2;
   1008  1.1     ross             }
   1009  1.1     ross             else {
   1010  1.1     ross                 increment = ( roundingMode == float_round_up ) && zSig2;
   1011  1.1     ross             }
   1012  1.1     ross         }
   1013  1.1     ross     }
   1014  1.1     ross     if ( 0x7FFD <= (bits32) zExp ) {
   1015  1.1     ross         if (    ( 0x7FFD < zExp )
   1016  1.1     ross              || (    ( zExp == 0x7FFD )
   1017  1.1     ross                   && eq128(
   1018  1.1     ross                          LIT64( 0x0001FFFFFFFFFFFF ),
   1019  1.1     ross                          LIT64( 0xFFFFFFFFFFFFFFFF ),
   1020  1.1     ross                          zSig0,
   1021  1.1     ross                          zSig1
   1022  1.1     ross                      )
   1023  1.1     ross                   && increment
   1024  1.1     ross                 )
   1025  1.1     ross            ) {
   1026  1.1     ross             float_raise( float_flag_overflow | float_flag_inexact );
   1027  1.1     ross             if (    ( roundingMode == float_round_to_zero )
   1028  1.1     ross                  || ( zSign && ( roundingMode == float_round_up ) )
   1029  1.1     ross                  || ( ! zSign && ( roundingMode == float_round_down ) )
   1030  1.1     ross                ) {
   1031  1.1     ross                 return
   1032  1.1     ross                     packFloat128(
   1033  1.1     ross                         zSign,
   1034  1.1     ross                         0x7FFE,
   1035  1.1     ross                         LIT64( 0x0000FFFFFFFFFFFF ),
   1036  1.1     ross                         LIT64( 0xFFFFFFFFFFFFFFFF )
   1037  1.1     ross                     );
   1038  1.1     ross             }
   1039  1.1     ross             return packFloat128( zSign, 0x7FFF, 0, 0 );
   1040  1.1     ross         }
   1041  1.1     ross         if ( zExp < 0 ) {
   1042  1.1     ross             isTiny =
   1043  1.1     ross                    ( float_detect_tininess == float_tininess_before_rounding )
   1044  1.1     ross                 || ( zExp < -1 )
   1045  1.1     ross                 || ! increment
   1046  1.1     ross                 || lt128(
   1047  1.1     ross                        zSig0,
   1048  1.1     ross                        zSig1,
   1049  1.1     ross                        LIT64( 0x0001FFFFFFFFFFFF ),
   1050  1.1     ross                        LIT64( 0xFFFFFFFFFFFFFFFF )
   1051  1.1     ross                    );
   1052  1.1     ross             shift128ExtraRightJamming(
   1053  1.1     ross                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
   1054  1.1     ross             zExp = 0;
   1055  1.1     ross             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
   1056  1.1     ross             if ( roundNearestEven ) {
   1057  1.1     ross                 increment = ( (sbits64) zSig2 < 0 );
   1058  1.1     ross             }
   1059  1.1     ross             else {
   1060  1.1     ross                 if ( zSign ) {
   1061  1.1     ross                     increment = ( roundingMode == float_round_down ) && zSig2;
   1062  1.1     ross                 }
   1063  1.1     ross                 else {
   1064  1.1     ross                     increment = ( roundingMode == float_round_up ) && zSig2;
   1065  1.1     ross                 }
   1066  1.1     ross             }
   1067  1.1     ross         }
   1068  1.1     ross     }
   1069  1.1     ross     if ( zSig2 ) float_set_inexact();
   1070  1.1     ross     if ( increment ) {
   1071  1.1     ross         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
   1072  1.1     ross         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
   1073  1.1     ross     }
   1074  1.1     ross     else {
   1075  1.1     ross         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
   1076  1.1     ross     }
   1077  1.1     ross     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1078  1.1     ross 
   1079  1.1     ross }
   1080  1.1     ross 
   1081  1.1     ross /*
   1082  1.1     ross -------------------------------------------------------------------------------
   1083  1.1     ross Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   1084  1.1     ross and significand formed by the concatenation of `zSig0' and `zSig1', and
   1085  1.1     ross returns the proper quadruple-precision floating-point value corresponding
   1086  1.1     ross to the abstract input.  This routine is just like `roundAndPackFloat128'
   1087  1.1     ross except that the input significand has fewer bits and does not have to be
   1088  1.1     ross normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
   1089  1.1     ross point exponent.
   1090  1.1     ross -------------------------------------------------------------------------------
   1091  1.1     ross */
   1092  1.1     ross static float128
   1093  1.1     ross  normalizeRoundAndPackFloat128(
   1094  1.1     ross      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
   1095  1.1     ross {
   1096  1.1     ross     int8 shiftCount;
   1097  1.1     ross     bits64 zSig2;
   1098  1.1     ross 
   1099  1.1     ross     if ( zSig0 == 0 ) {
   1100  1.1     ross         zSig0 = zSig1;
   1101  1.1     ross         zSig1 = 0;
   1102  1.1     ross         zExp -= 64;
   1103  1.1     ross     }
   1104  1.1     ross     shiftCount = countLeadingZeros64( zSig0 ) - 15;
   1105  1.1     ross     if ( 0 <= shiftCount ) {
   1106  1.1     ross         zSig2 = 0;
   1107  1.1     ross         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1108  1.1     ross     }
   1109  1.1     ross     else {
   1110  1.1     ross         shift128ExtraRightJamming(
   1111  1.1     ross             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
   1112  1.1     ross     }
   1113  1.1     ross     zExp -= shiftCount;
   1114  1.1     ross     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   1115  1.1     ross 
   1116  1.1     ross }
   1117  1.1     ross 
   1118  1.1     ross #endif
   1119  1.1     ross 
   1120  1.1     ross /*
   1121  1.1     ross -------------------------------------------------------------------------------
   1122  1.1     ross Returns the result of converting the 32-bit two's complement integer `a'
   1123  1.1     ross to the single-precision floating-point format.  The conversion is performed
   1124  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1125  1.1     ross -------------------------------------------------------------------------------
   1126  1.1     ross */
   1127  1.1     ross float32 int32_to_float32( int32 a )
   1128  1.1     ross {
   1129  1.1     ross     flag zSign;
   1130  1.1     ross 
   1131  1.1     ross     if ( a == 0 ) return 0;
   1132  1.1     ross     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
   1133  1.1     ross     zSign = ( a < 0 );
   1134  1.1     ross     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
   1135  1.1     ross 
   1136  1.1     ross }
   1137  1.1     ross 
   1138  1.1     ross /*
   1139  1.1     ross -------------------------------------------------------------------------------
   1140  1.1     ross Returns the result of converting the 32-bit two's complement integer `a'
   1141  1.1     ross to the double-precision floating-point format.  The conversion is performed
   1142  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1143  1.1     ross -------------------------------------------------------------------------------
   1144  1.1     ross */
   1145  1.1     ross float64 int32_to_float64( int32 a )
   1146  1.1     ross {
   1147  1.1     ross     flag zSign;
   1148  1.1     ross     uint32 absA;
   1149  1.1     ross     int8 shiftCount;
   1150  1.1     ross     bits64 zSig;
   1151  1.1     ross 
   1152  1.1     ross     if ( a == 0 ) return 0;
   1153  1.1     ross     zSign = ( a < 0 );
   1154  1.1     ross     absA = zSign ? - a : a;
   1155  1.1     ross     shiftCount = countLeadingZeros32( absA ) + 21;
   1156  1.1     ross     zSig = absA;
   1157  1.1     ross     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
   1158  1.1     ross 
   1159  1.1     ross }
   1160  1.1     ross 
   1161  1.1     ross #ifdef FLOATX80
   1162  1.1     ross 
   1163  1.1     ross /*
   1164  1.1     ross -------------------------------------------------------------------------------
   1165  1.1     ross Returns the result of converting the 32-bit two's complement integer `a'
   1166  1.1     ross to the extended double-precision floating-point format.  The conversion
   1167  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1168  1.1     ross Arithmetic.
   1169  1.1     ross -------------------------------------------------------------------------------
   1170  1.1     ross */
   1171  1.1     ross floatx80 int32_to_floatx80( int32 a )
   1172  1.1     ross {
   1173  1.1     ross     flag zSign;
   1174  1.1     ross     uint32 absA;
   1175  1.1     ross     int8 shiftCount;
   1176  1.1     ross     bits64 zSig;
   1177  1.1     ross 
   1178  1.1     ross     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1179  1.1     ross     zSign = ( a < 0 );
   1180  1.1     ross     absA = zSign ? - a : a;
   1181  1.1     ross     shiftCount = countLeadingZeros32( absA ) + 32;
   1182  1.1     ross     zSig = absA;
   1183  1.1     ross     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
   1184  1.1     ross 
   1185  1.1     ross }
   1186  1.1     ross 
   1187  1.1     ross #endif
   1188  1.1     ross 
   1189  1.1     ross #ifdef FLOAT128
   1190  1.1     ross 
   1191  1.1     ross /*
   1192  1.1     ross -------------------------------------------------------------------------------
   1193  1.1     ross Returns the result of converting the 32-bit two's complement integer `a' to
   1194  1.1     ross the quadruple-precision floating-point format.  The conversion is performed
   1195  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1196  1.1     ross -------------------------------------------------------------------------------
   1197  1.1     ross */
   1198  1.1     ross float128 int32_to_float128( int32 a )
   1199  1.1     ross {
   1200  1.1     ross     flag zSign;
   1201  1.1     ross     uint32 absA;
   1202  1.1     ross     int8 shiftCount;
   1203  1.1     ross     bits64 zSig0;
   1204  1.1     ross 
   1205  1.1     ross     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1206  1.1     ross     zSign = ( a < 0 );
   1207  1.1     ross     absA = zSign ? - a : a;
   1208  1.1     ross     shiftCount = countLeadingZeros32( absA ) + 17;
   1209  1.1     ross     zSig0 = absA;
   1210  1.1     ross     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1211  1.1     ross 
   1212  1.1     ross }
   1213  1.1     ross 
   1214  1.1     ross #endif
   1215  1.1     ross 
   1216  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
   1217  1.1     ross /*
   1218  1.1     ross -------------------------------------------------------------------------------
   1219  1.1     ross Returns the result of converting the 64-bit two's complement integer `a'
   1220  1.1     ross to the single-precision floating-point format.  The conversion is performed
   1221  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1222  1.1     ross -------------------------------------------------------------------------------
   1223  1.1     ross */
   1224  1.1     ross float32 int64_to_float32( int64 a )
   1225  1.1     ross {
   1226  1.1     ross     flag zSign;
   1227  1.1     ross     uint64 absA;
   1228  1.1     ross     int8 shiftCount;
   1229  1.1     ross 
   1230  1.1     ross     if ( a == 0 ) return 0;
   1231  1.1     ross     zSign = ( a < 0 );
   1232  1.1     ross     absA = zSign ? - a : a;
   1233  1.1     ross     shiftCount = countLeadingZeros64( absA ) - 40;
   1234  1.1     ross     if ( 0 <= shiftCount ) {
   1235  1.1     ross         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
   1236  1.1     ross     }
   1237  1.1     ross     else {
   1238  1.1     ross         shiftCount += 7;
   1239  1.1     ross         if ( shiftCount < 0 ) {
   1240  1.1     ross             shift64RightJamming( absA, - shiftCount, &absA );
   1241  1.1     ross         }
   1242  1.1     ross         else {
   1243  1.1     ross             absA <<= shiftCount;
   1244  1.1     ross         }
   1245  1.1     ross         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
   1246  1.1     ross     }
   1247  1.1     ross 
   1248  1.1     ross }
   1249  1.1     ross 
   1250  1.1     ross /*
   1251  1.1     ross -------------------------------------------------------------------------------
   1252  1.1     ross Returns the result of converting the 64-bit two's complement integer `a'
   1253  1.1     ross to the double-precision floating-point format.  The conversion is performed
   1254  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1255  1.1     ross -------------------------------------------------------------------------------
   1256  1.1     ross */
   1257  1.1     ross float64 int64_to_float64( int64 a )
   1258  1.1     ross {
   1259  1.1     ross     flag zSign;
   1260  1.1     ross 
   1261  1.1     ross     if ( a == 0 ) return 0;
   1262  1.1     ross     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
   1263  1.1     ross         return packFloat64( 1, 0x43E, 0 );
   1264  1.1     ross     }
   1265  1.1     ross     zSign = ( a < 0 );
   1266  1.1     ross     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
   1267  1.1     ross 
   1268  1.1     ross }
   1269  1.1     ross 
   1270  1.1     ross #ifdef FLOATX80
   1271  1.1     ross 
   1272  1.1     ross /*
   1273  1.1     ross -------------------------------------------------------------------------------
   1274  1.1     ross Returns the result of converting the 64-bit two's complement integer `a'
   1275  1.1     ross to the extended double-precision floating-point format.  The conversion
   1276  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1277  1.1     ross Arithmetic.
   1278  1.1     ross -------------------------------------------------------------------------------
   1279  1.1     ross */
   1280  1.1     ross floatx80 int64_to_floatx80( int64 a )
   1281  1.1     ross {
   1282  1.1     ross     flag zSign;
   1283  1.1     ross     uint64 absA;
   1284  1.1     ross     int8 shiftCount;
   1285  1.1     ross 
   1286  1.1     ross     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1287  1.1     ross     zSign = ( a < 0 );
   1288  1.1     ross     absA = zSign ? - a : a;
   1289  1.1     ross     shiftCount = countLeadingZeros64( absA );
   1290  1.1     ross     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
   1291  1.1     ross 
   1292  1.1     ross }
   1293  1.1     ross 
   1294  1.1     ross #endif
   1295  1.1     ross 
   1296  1.1     ross #ifdef FLOAT128
   1297  1.1     ross 
   1298  1.1     ross /*
   1299  1.1     ross -------------------------------------------------------------------------------
   1300  1.1     ross Returns the result of converting the 64-bit two's complement integer `a' to
   1301  1.1     ross the quadruple-precision floating-point format.  The conversion is performed
   1302  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1303  1.1     ross -------------------------------------------------------------------------------
   1304  1.1     ross */
   1305  1.1     ross float128 int64_to_float128( int64 a )
   1306  1.1     ross {
   1307  1.1     ross     flag zSign;
   1308  1.1     ross     uint64 absA;
   1309  1.1     ross     int8 shiftCount;
   1310  1.1     ross     int32 zExp;
   1311  1.1     ross     bits64 zSig0, zSig1;
   1312  1.1     ross 
   1313  1.1     ross     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1314  1.1     ross     zSign = ( a < 0 );
   1315  1.1     ross     absA = zSign ? - a : a;
   1316  1.1     ross     shiftCount = countLeadingZeros64( absA ) + 49;
   1317  1.1     ross     zExp = 0x406E - shiftCount;
   1318  1.1     ross     if ( 64 <= shiftCount ) {
   1319  1.1     ross         zSig1 = 0;
   1320  1.1     ross         zSig0 = absA;
   1321  1.1     ross         shiftCount -= 64;
   1322  1.1     ross     }
   1323  1.1     ross     else {
   1324  1.1     ross         zSig1 = absA;
   1325  1.1     ross         zSig0 = 0;
   1326  1.1     ross     }
   1327  1.1     ross     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1328  1.1     ross     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1329  1.1     ross 
   1330  1.1     ross }
   1331  1.1     ross 
   1332  1.1     ross #endif
   1333  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   1334  1.1     ross 
   1335  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1336  1.1     ross /*
   1337  1.1     ross -------------------------------------------------------------------------------
   1338  1.1     ross Returns the result of converting the single-precision floating-point value
   1339  1.1     ross `a' to the 32-bit two's complement integer format.  The conversion is
   1340  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1341  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   1342  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   1343  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   1344  1.1     ross largest integer with the same sign as `a' is returned.
   1345  1.1     ross -------------------------------------------------------------------------------
   1346  1.1     ross */
   1347  1.1     ross int32 float32_to_int32( float32 a )
   1348  1.1     ross {
   1349  1.1     ross     flag aSign;
   1350  1.1     ross     int16 aExp, shiftCount;
   1351  1.1     ross     bits32 aSig;
   1352  1.1     ross     bits64 aSig64;
   1353  1.1     ross 
   1354  1.1     ross     aSig = extractFloat32Frac( a );
   1355  1.1     ross     aExp = extractFloat32Exp( a );
   1356  1.1     ross     aSign = extractFloat32Sign( a );
   1357  1.1     ross     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
   1358  1.1     ross     if ( aExp ) aSig |= 0x00800000;
   1359  1.1     ross     shiftCount = 0xAF - aExp;
   1360  1.1     ross     aSig64 = aSig;
   1361  1.1     ross     aSig64 <<= 32;
   1362  1.1     ross     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
   1363  1.1     ross     return roundAndPackInt32( aSign, aSig64 );
   1364  1.1     ross 
   1365  1.1     ross }
   1366  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   1367  1.1     ross 
   1368  1.1     ross /*
   1369  1.1     ross -------------------------------------------------------------------------------
   1370  1.1     ross Returns the result of converting the single-precision floating-point value
   1371  1.1     ross `a' to the 32-bit two's complement integer format.  The conversion is
   1372  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1373  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.
   1374  1.1     ross If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1375  1.1     ross the conversion overflows, the largest integer with the same sign as `a' is
   1376  1.1     ross returned.
   1377  1.1     ross -------------------------------------------------------------------------------
   1378  1.1     ross */
   1379  1.1     ross int32 float32_to_int32_round_to_zero( float32 a )
   1380  1.1     ross {
   1381  1.1     ross     flag aSign;
   1382  1.1     ross     int16 aExp, shiftCount;
   1383  1.1     ross     bits32 aSig;
   1384  1.1     ross     int32 z;
   1385  1.1     ross 
   1386  1.1     ross     aSig = extractFloat32Frac( a );
   1387  1.1     ross     aExp = extractFloat32Exp( a );
   1388  1.1     ross     aSign = extractFloat32Sign( a );
   1389  1.1     ross     shiftCount = aExp - 0x9E;
   1390  1.1     ross     if ( 0 <= shiftCount ) {
   1391  1.1     ross         if ( a != 0xCF000000 ) {
   1392  1.1     ross             float_raise( float_flag_invalid );
   1393  1.1     ross             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
   1394  1.1     ross         }
   1395  1.1     ross         return (sbits32) 0x80000000;
   1396  1.1     ross     }
   1397  1.1     ross     else if ( aExp <= 0x7E ) {
   1398  1.1     ross         if ( aExp | aSig ) float_set_inexact();
   1399  1.1     ross         return 0;
   1400  1.1     ross     }
   1401  1.1     ross     aSig = ( aSig | 0x00800000 )<<8;
   1402  1.1     ross     z = aSig>>( - shiftCount );
   1403  1.1     ross     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
   1404  1.1     ross         float_set_inexact();
   1405  1.1     ross     }
   1406  1.1     ross     if ( aSign ) z = - z;
   1407  1.1     ross     return z;
   1408  1.1     ross 
   1409  1.1     ross }
   1410  1.1     ross 
   1411  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
   1412  1.1     ross /*
   1413  1.1     ross -------------------------------------------------------------------------------
   1414  1.1     ross Returns the result of converting the single-precision floating-point value
   1415  1.1     ross `a' to the 64-bit two's complement integer format.  The conversion is
   1416  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1417  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   1418  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   1419  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   1420  1.1     ross largest integer with the same sign as `a' is returned.
   1421  1.1     ross -------------------------------------------------------------------------------
   1422  1.1     ross */
   1423  1.1     ross int64 float32_to_int64( float32 a )
   1424  1.1     ross {
   1425  1.1     ross     flag aSign;
   1426  1.1     ross     int16 aExp, shiftCount;
   1427  1.1     ross     bits32 aSig;
   1428  1.1     ross     bits64 aSig64, aSigExtra;
   1429  1.1     ross 
   1430  1.1     ross     aSig = extractFloat32Frac( a );
   1431  1.1     ross     aExp = extractFloat32Exp( a );
   1432  1.1     ross     aSign = extractFloat32Sign( a );
   1433  1.1     ross     shiftCount = 0xBE - aExp;
   1434  1.1     ross     if ( shiftCount < 0 ) {
   1435  1.1     ross         float_raise( float_flag_invalid );
   1436  1.1     ross         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1437  1.1     ross             return LIT64( 0x7FFFFFFFFFFFFFFF );
   1438  1.1     ross         }
   1439  1.1     ross         return (sbits64) LIT64( 0x8000000000000000 );
   1440  1.1     ross     }
   1441  1.1     ross     if ( aExp ) aSig |= 0x00800000;
   1442  1.1     ross     aSig64 = aSig;
   1443  1.1     ross     aSig64 <<= 40;
   1444  1.1     ross     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
   1445  1.1     ross     return roundAndPackInt64( aSign, aSig64, aSigExtra );
   1446  1.1     ross 
   1447  1.1     ross }
   1448  1.1     ross 
   1449  1.1     ross /*
   1450  1.1     ross -------------------------------------------------------------------------------
   1451  1.1     ross Returns the result of converting the single-precision floating-point value
   1452  1.1     ross `a' to the 64-bit two's complement integer format.  The conversion is
   1453  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1454  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.  If
   1455  1.1     ross `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   1456  1.1     ross conversion overflows, the largest integer with the same sign as `a' is
   1457  1.1     ross returned.
   1458  1.1     ross -------------------------------------------------------------------------------
   1459  1.1     ross */
   1460  1.1     ross int64 float32_to_int64_round_to_zero( float32 a )
   1461  1.1     ross {
   1462  1.1     ross     flag aSign;
   1463  1.1     ross     int16 aExp, shiftCount;
   1464  1.1     ross     bits32 aSig;
   1465  1.1     ross     bits64 aSig64;
   1466  1.1     ross     int64 z;
   1467  1.1     ross 
   1468  1.1     ross     aSig = extractFloat32Frac( a );
   1469  1.1     ross     aExp = extractFloat32Exp( a );
   1470  1.1     ross     aSign = extractFloat32Sign( a );
   1471  1.1     ross     shiftCount = aExp - 0xBE;
   1472  1.1     ross     if ( 0 <= shiftCount ) {
   1473  1.1     ross         if ( a != 0xDF000000 ) {
   1474  1.1     ross             float_raise( float_flag_invalid );
   1475  1.1     ross             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1476  1.1     ross                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   1477  1.1     ross             }
   1478  1.1     ross         }
   1479  1.1     ross         return (sbits64) LIT64( 0x8000000000000000 );
   1480  1.1     ross     }
   1481  1.1     ross     else if ( aExp <= 0x7E ) {
   1482  1.1     ross         if ( aExp | aSig ) float_set_inexact();
   1483  1.1     ross         return 0;
   1484  1.1     ross     }
   1485  1.1     ross     aSig64 = aSig | 0x00800000;
   1486  1.1     ross     aSig64 <<= 40;
   1487  1.1     ross     z = aSig64>>( - shiftCount );
   1488  1.1     ross     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
   1489  1.1     ross         float_set_inexact();
   1490  1.1     ross     }
   1491  1.1     ross     if ( aSign ) z = - z;
   1492  1.1     ross     return z;
   1493  1.1     ross 
   1494  1.1     ross }
   1495  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   1496  1.1     ross 
   1497  1.1     ross /*
   1498  1.1     ross -------------------------------------------------------------------------------
   1499  1.1     ross Returns the result of converting the single-precision floating-point value
   1500  1.1     ross `a' to the double-precision floating-point format.  The conversion is
   1501  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1502  1.1     ross Arithmetic.
   1503  1.1     ross -------------------------------------------------------------------------------
   1504  1.1     ross */
   1505  1.1     ross float64 float32_to_float64( float32 a )
   1506  1.1     ross {
   1507  1.1     ross     flag aSign;
   1508  1.1     ross     int16 aExp;
   1509  1.1     ross     bits32 aSig;
   1510  1.1     ross 
   1511  1.1     ross     aSig = extractFloat32Frac( a );
   1512  1.1     ross     aExp = extractFloat32Exp( a );
   1513  1.1     ross     aSign = extractFloat32Sign( a );
   1514  1.1     ross     if ( aExp == 0xFF ) {
   1515  1.1     ross         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
   1516  1.1     ross         return packFloat64( aSign, 0x7FF, 0 );
   1517  1.1     ross     }
   1518  1.1     ross     if ( aExp == 0 ) {
   1519  1.1     ross         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
   1520  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1521  1.1     ross         --aExp;
   1522  1.1     ross     }
   1523  1.1     ross     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
   1524  1.1     ross 
   1525  1.1     ross }
   1526  1.1     ross 
   1527  1.1     ross #ifdef FLOATX80
   1528  1.1     ross 
   1529  1.1     ross /*
   1530  1.1     ross -------------------------------------------------------------------------------
   1531  1.1     ross Returns the result of converting the single-precision floating-point value
   1532  1.1     ross `a' to the extended double-precision floating-point format.  The conversion
   1533  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1534  1.1     ross Arithmetic.
   1535  1.1     ross -------------------------------------------------------------------------------
   1536  1.1     ross */
   1537  1.1     ross floatx80 float32_to_floatx80( float32 a )
   1538  1.1     ross {
   1539  1.1     ross     flag aSign;
   1540  1.1     ross     int16 aExp;
   1541  1.1     ross     bits32 aSig;
   1542  1.1     ross 
   1543  1.1     ross     aSig = extractFloat32Frac( a );
   1544  1.1     ross     aExp = extractFloat32Exp( a );
   1545  1.1     ross     aSign = extractFloat32Sign( a );
   1546  1.1     ross     if ( aExp == 0xFF ) {
   1547  1.1     ross         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
   1548  1.1     ross         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   1549  1.1     ross     }
   1550  1.1     ross     if ( aExp == 0 ) {
   1551  1.1     ross         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   1552  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1553  1.1     ross     }
   1554  1.1     ross     aSig |= 0x00800000;
   1555  1.1     ross     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
   1556  1.1     ross 
   1557  1.1     ross }
   1558  1.1     ross 
   1559  1.1     ross #endif
   1560  1.1     ross 
   1561  1.1     ross #ifdef FLOAT128
   1562  1.1     ross 
   1563  1.1     ross /*
   1564  1.1     ross -------------------------------------------------------------------------------
   1565  1.1     ross Returns the result of converting the single-precision floating-point value
   1566  1.1     ross `a' to the double-precision floating-point format.  The conversion is
   1567  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   1568  1.1     ross Arithmetic.
   1569  1.1     ross -------------------------------------------------------------------------------
   1570  1.1     ross */
   1571  1.1     ross float128 float32_to_float128( float32 a )
   1572  1.1     ross {
   1573  1.1     ross     flag aSign;
   1574  1.1     ross     int16 aExp;
   1575  1.1     ross     bits32 aSig;
   1576  1.1     ross 
   1577  1.1     ross     aSig = extractFloat32Frac( a );
   1578  1.1     ross     aExp = extractFloat32Exp( a );
   1579  1.1     ross     aSign = extractFloat32Sign( a );
   1580  1.1     ross     if ( aExp == 0xFF ) {
   1581  1.1     ross         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
   1582  1.1     ross         return packFloat128( aSign, 0x7FFF, 0, 0 );
   1583  1.1     ross     }
   1584  1.1     ross     if ( aExp == 0 ) {
   1585  1.1     ross         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   1586  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1587  1.1     ross         --aExp;
   1588  1.1     ross     }
   1589  1.1     ross     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
   1590  1.1     ross 
   1591  1.1     ross }
   1592  1.1     ross 
   1593  1.1     ross #endif
   1594  1.1     ross 
   1595  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1596  1.1     ross /*
   1597  1.1     ross -------------------------------------------------------------------------------
   1598  1.1     ross Rounds the single-precision floating-point value `a' to an integer, and
   1599  1.1     ross returns the result as a single-precision floating-point value.  The
   1600  1.1     ross operation is performed according to the IEC/IEEE Standard for Binary
   1601  1.1     ross Floating-Point Arithmetic.
   1602  1.1     ross -------------------------------------------------------------------------------
   1603  1.1     ross */
   1604  1.1     ross float32 float32_round_to_int( float32 a )
   1605  1.1     ross {
   1606  1.1     ross     flag aSign;
   1607  1.1     ross     int16 aExp;
   1608  1.1     ross     bits32 lastBitMask, roundBitsMask;
   1609  1.1     ross     int8 roundingMode;
   1610  1.1     ross     float32 z;
   1611  1.1     ross 
   1612  1.1     ross     aExp = extractFloat32Exp( a );
   1613  1.1     ross     if ( 0x96 <= aExp ) {
   1614  1.1     ross         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
   1615  1.1     ross             return propagateFloat32NaN( a, a );
   1616  1.1     ross         }
   1617  1.1     ross         return a;
   1618  1.1     ross     }
   1619  1.1     ross     if ( aExp <= 0x7E ) {
   1620  1.1     ross         if ( (bits32) ( a<<1 ) == 0 ) return a;
   1621  1.1     ross         float_set_inexact();
   1622  1.1     ross         aSign = extractFloat32Sign( a );
   1623  1.1     ross         switch ( float_rounding_mode() ) {
   1624  1.1     ross          case float_round_nearest_even:
   1625  1.1     ross             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
   1626  1.1     ross                 return packFloat32( aSign, 0x7F, 0 );
   1627  1.1     ross             }
   1628  1.1     ross             break;
   1629  1.1     ross          case float_round_down:
   1630  1.1     ross             return aSign ? 0xBF800000 : 0;
   1631  1.1     ross          case float_round_up:
   1632  1.1     ross             return aSign ? 0x80000000 : 0x3F800000;
   1633  1.1     ross         }
   1634  1.1     ross         return packFloat32( aSign, 0, 0 );
   1635  1.1     ross     }
   1636  1.1     ross     lastBitMask = 1;
   1637  1.1     ross     lastBitMask <<= 0x96 - aExp;
   1638  1.1     ross     roundBitsMask = lastBitMask - 1;
   1639  1.1     ross     z = a;
   1640  1.1     ross     roundingMode = float_rounding_mode();
   1641  1.1     ross     if ( roundingMode == float_round_nearest_even ) {
   1642  1.1     ross         z += lastBitMask>>1;
   1643  1.1     ross         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   1644  1.1     ross     }
   1645  1.1     ross     else if ( roundingMode != float_round_to_zero ) {
   1646  1.1     ross         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   1647  1.1     ross             z += roundBitsMask;
   1648  1.1     ross         }
   1649  1.1     ross     }
   1650  1.1     ross     z &= ~ roundBitsMask;
   1651  1.1     ross     if ( z != a ) float_set_inexact();
   1652  1.1     ross     return z;
   1653  1.1     ross 
   1654  1.1     ross }
   1655  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   1656  1.1     ross 
   1657  1.1     ross /*
   1658  1.1     ross -------------------------------------------------------------------------------
   1659  1.1     ross Returns the result of adding the absolute values of the single-precision
   1660  1.1     ross floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1661  1.1     ross before being returned.  `zSign' is ignored if the result is a NaN.
   1662  1.1     ross The addition is performed according to the IEC/IEEE Standard for Binary
   1663  1.1     ross Floating-Point Arithmetic.
   1664  1.1     ross -------------------------------------------------------------------------------
   1665  1.1     ross */
   1666  1.1     ross static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
   1667  1.1     ross {
   1668  1.1     ross     int16 aExp, bExp, zExp;
   1669  1.1     ross     bits32 aSig, bSig, zSig;
   1670  1.1     ross     int16 expDiff;
   1671  1.1     ross 
   1672  1.1     ross     aSig = extractFloat32Frac( a );
   1673  1.1     ross     aExp = extractFloat32Exp( a );
   1674  1.1     ross     bSig = extractFloat32Frac( b );
   1675  1.1     ross     bExp = extractFloat32Exp( b );
   1676  1.1     ross     expDiff = aExp - bExp;
   1677  1.1     ross     aSig <<= 6;
   1678  1.1     ross     bSig <<= 6;
   1679  1.1     ross     if ( 0 < expDiff ) {
   1680  1.1     ross         if ( aExp == 0xFF ) {
   1681  1.1     ross             if ( aSig ) return propagateFloat32NaN( a, b );
   1682  1.1     ross             return a;
   1683  1.1     ross         }
   1684  1.1     ross         if ( bExp == 0 ) {
   1685  1.1     ross             --expDiff;
   1686  1.1     ross         }
   1687  1.1     ross         else {
   1688  1.1     ross             bSig |= 0x20000000;
   1689  1.1     ross         }
   1690  1.1     ross         shift32RightJamming( bSig, expDiff, &bSig );
   1691  1.1     ross         zExp = aExp;
   1692  1.1     ross     }
   1693  1.1     ross     else if ( expDiff < 0 ) {
   1694  1.1     ross         if ( bExp == 0xFF ) {
   1695  1.1     ross             if ( bSig ) return propagateFloat32NaN( a, b );
   1696  1.1     ross             return packFloat32( zSign, 0xFF, 0 );
   1697  1.1     ross         }
   1698  1.1     ross         if ( aExp == 0 ) {
   1699  1.1     ross             ++expDiff;
   1700  1.1     ross         }
   1701  1.1     ross         else {
   1702  1.1     ross             aSig |= 0x20000000;
   1703  1.1     ross         }
   1704  1.1     ross         shift32RightJamming( aSig, - expDiff, &aSig );
   1705  1.1     ross         zExp = bExp;
   1706  1.1     ross     }
   1707  1.1     ross     else {
   1708  1.1     ross         if ( aExp == 0xFF ) {
   1709  1.1     ross             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1710  1.1     ross             return a;
   1711  1.1     ross         }
   1712  1.1     ross         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
   1713  1.1     ross         zSig = 0x40000000 + aSig + bSig;
   1714  1.1     ross         zExp = aExp;
   1715  1.1     ross         goto roundAndPack;
   1716  1.1     ross     }
   1717  1.1     ross     aSig |= 0x20000000;
   1718  1.1     ross     zSig = ( aSig + bSig )<<1;
   1719  1.1     ross     --zExp;
   1720  1.1     ross     if ( (sbits32) zSig < 0 ) {
   1721  1.1     ross         zSig = aSig + bSig;
   1722  1.1     ross         ++zExp;
   1723  1.1     ross     }
   1724  1.1     ross  roundAndPack:
   1725  1.1     ross     return roundAndPackFloat32( zSign, zExp, zSig );
   1726  1.1     ross 
   1727  1.1     ross }
   1728  1.1     ross 
   1729  1.1     ross /*
   1730  1.1     ross -------------------------------------------------------------------------------
   1731  1.1     ross Returns the result of subtracting the absolute values of the single-
   1732  1.1     ross precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1733  1.1     ross difference is negated before being returned.  `zSign' is ignored if the
   1734  1.1     ross result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1735  1.1     ross Standard for Binary Floating-Point Arithmetic.
   1736  1.1     ross -------------------------------------------------------------------------------
   1737  1.1     ross */
   1738  1.1     ross static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
   1739  1.1     ross {
   1740  1.1     ross     int16 aExp, bExp, zExp;
   1741  1.1     ross     bits32 aSig, bSig, zSig;
   1742  1.1     ross     int16 expDiff;
   1743  1.1     ross 
   1744  1.1     ross     aSig = extractFloat32Frac( a );
   1745  1.1     ross     aExp = extractFloat32Exp( a );
   1746  1.1     ross     bSig = extractFloat32Frac( b );
   1747  1.1     ross     bExp = extractFloat32Exp( b );
   1748  1.1     ross     expDiff = aExp - bExp;
   1749  1.1     ross     aSig <<= 7;
   1750  1.1     ross     bSig <<= 7;
   1751  1.1     ross     if ( 0 < expDiff ) goto aExpBigger;
   1752  1.1     ross     if ( expDiff < 0 ) goto bExpBigger;
   1753  1.1     ross     if ( aExp == 0xFF ) {
   1754  1.1     ross         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1755  1.1     ross         float_raise( float_flag_invalid );
   1756  1.1     ross         return float32_default_nan;
   1757  1.1     ross     }
   1758  1.1     ross     if ( aExp == 0 ) {
   1759  1.1     ross         aExp = 1;
   1760  1.1     ross         bExp = 1;
   1761  1.1     ross     }
   1762  1.1     ross     if ( bSig < aSig ) goto aBigger;
   1763  1.1     ross     if ( aSig < bSig ) goto bBigger;
   1764  1.1     ross     return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
   1765  1.1     ross  bExpBigger:
   1766  1.1     ross     if ( bExp == 0xFF ) {
   1767  1.1     ross         if ( bSig ) return propagateFloat32NaN( a, b );
   1768  1.1     ross         return packFloat32( zSign ^ 1, 0xFF, 0 );
   1769  1.1     ross     }
   1770  1.1     ross     if ( aExp == 0 ) {
   1771  1.1     ross         ++expDiff;
   1772  1.1     ross     }
   1773  1.1     ross     else {
   1774  1.1     ross         aSig |= 0x40000000;
   1775  1.1     ross     }
   1776  1.1     ross     shift32RightJamming( aSig, - expDiff, &aSig );
   1777  1.1     ross     bSig |= 0x40000000;
   1778  1.1     ross  bBigger:
   1779  1.1     ross     zSig = bSig - aSig;
   1780  1.1     ross     zExp = bExp;
   1781  1.1     ross     zSign ^= 1;
   1782  1.1     ross     goto normalizeRoundAndPack;
   1783  1.1     ross  aExpBigger:
   1784  1.1     ross     if ( aExp == 0xFF ) {
   1785  1.1     ross         if ( aSig ) return propagateFloat32NaN( a, b );
   1786  1.1     ross         return a;
   1787  1.1     ross     }
   1788  1.1     ross     if ( bExp == 0 ) {
   1789  1.1     ross         --expDiff;
   1790  1.1     ross     }
   1791  1.1     ross     else {
   1792  1.1     ross         bSig |= 0x40000000;
   1793  1.1     ross     }
   1794  1.1     ross     shift32RightJamming( bSig, expDiff, &bSig );
   1795  1.1     ross     aSig |= 0x40000000;
   1796  1.1     ross  aBigger:
   1797  1.1     ross     zSig = aSig - bSig;
   1798  1.1     ross     zExp = aExp;
   1799  1.1     ross  normalizeRoundAndPack:
   1800  1.1     ross     --zExp;
   1801  1.1     ross     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
   1802  1.1     ross 
   1803  1.1     ross }
   1804  1.1     ross 
   1805  1.1     ross /*
   1806  1.1     ross -------------------------------------------------------------------------------
   1807  1.1     ross Returns the result of adding the single-precision floating-point values `a'
   1808  1.1     ross and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1809  1.1     ross Binary Floating-Point Arithmetic.
   1810  1.1     ross -------------------------------------------------------------------------------
   1811  1.1     ross */
   1812  1.1     ross float32 float32_add( float32 a, float32 b )
   1813  1.1     ross {
   1814  1.1     ross     flag aSign, bSign;
   1815  1.1     ross 
   1816  1.1     ross     aSign = extractFloat32Sign( a );
   1817  1.1     ross     bSign = extractFloat32Sign( b );
   1818  1.1     ross     if ( aSign == bSign ) {
   1819  1.1     ross         return addFloat32Sigs( a, b, aSign );
   1820  1.1     ross     }
   1821  1.1     ross     else {
   1822  1.1     ross         return subFloat32Sigs( a, b, aSign );
   1823  1.1     ross     }
   1824  1.1     ross 
   1825  1.1     ross }
   1826  1.1     ross 
   1827  1.1     ross /*
   1828  1.1     ross -------------------------------------------------------------------------------
   1829  1.1     ross Returns the result of subtracting the single-precision floating-point values
   1830  1.1     ross `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1831  1.1     ross for Binary Floating-Point Arithmetic.
   1832  1.1     ross -------------------------------------------------------------------------------
   1833  1.1     ross */
   1834  1.1     ross float32 float32_sub( float32 a, float32 b )
   1835  1.1     ross {
   1836  1.1     ross     flag aSign, bSign;
   1837  1.1     ross 
   1838  1.1     ross     aSign = extractFloat32Sign( a );
   1839  1.1     ross     bSign = extractFloat32Sign( b );
   1840  1.1     ross     if ( aSign == bSign ) {
   1841  1.1     ross         return subFloat32Sigs( a, b, aSign );
   1842  1.1     ross     }
   1843  1.1     ross     else {
   1844  1.1     ross         return addFloat32Sigs( a, b, aSign );
   1845  1.1     ross     }
   1846  1.1     ross 
   1847  1.1     ross }
   1848  1.1     ross 
   1849  1.1     ross /*
   1850  1.1     ross -------------------------------------------------------------------------------
   1851  1.1     ross Returns the result of multiplying the single-precision floating-point values
   1852  1.1     ross `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1853  1.1     ross for Binary Floating-Point Arithmetic.
   1854  1.1     ross -------------------------------------------------------------------------------
   1855  1.1     ross */
   1856  1.1     ross float32 float32_mul( float32 a, float32 b )
   1857  1.1     ross {
   1858  1.1     ross     flag aSign, bSign, zSign;
   1859  1.1     ross     int16 aExp, bExp, zExp;
   1860  1.1     ross     bits32 aSig, bSig;
   1861  1.1     ross     bits64 zSig64;
   1862  1.1     ross     bits32 zSig;
   1863  1.1     ross 
   1864  1.1     ross     aSig = extractFloat32Frac( a );
   1865  1.1     ross     aExp = extractFloat32Exp( a );
   1866  1.1     ross     aSign = extractFloat32Sign( a );
   1867  1.1     ross     bSig = extractFloat32Frac( b );
   1868  1.1     ross     bExp = extractFloat32Exp( b );
   1869  1.1     ross     bSign = extractFloat32Sign( b );
   1870  1.1     ross     zSign = aSign ^ bSign;
   1871  1.1     ross     if ( aExp == 0xFF ) {
   1872  1.1     ross         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1873  1.1     ross             return propagateFloat32NaN( a, b );
   1874  1.1     ross         }
   1875  1.1     ross         if ( ( bExp | bSig ) == 0 ) {
   1876  1.1     ross             float_raise( float_flag_invalid );
   1877  1.1     ross             return float32_default_nan;
   1878  1.1     ross         }
   1879  1.1     ross         return packFloat32( zSign, 0xFF, 0 );
   1880  1.1     ross     }
   1881  1.1     ross     if ( bExp == 0xFF ) {
   1882  1.1     ross         if ( bSig ) return propagateFloat32NaN( a, b );
   1883  1.1     ross         if ( ( aExp | aSig ) == 0 ) {
   1884  1.1     ross             float_raise( float_flag_invalid );
   1885  1.1     ross             return float32_default_nan;
   1886  1.1     ross         }
   1887  1.1     ross         return packFloat32( zSign, 0xFF, 0 );
   1888  1.1     ross     }
   1889  1.1     ross     if ( aExp == 0 ) {
   1890  1.1     ross         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1891  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1892  1.1     ross     }
   1893  1.1     ross     if ( bExp == 0 ) {
   1894  1.1     ross         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1895  1.1     ross         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1896  1.1     ross     }
   1897  1.1     ross     zExp = aExp + bExp - 0x7F;
   1898  1.1     ross     aSig = ( aSig | 0x00800000 )<<7;
   1899  1.1     ross     bSig = ( bSig | 0x00800000 )<<8;
   1900  1.1     ross     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
   1901  1.1     ross     zSig = zSig64;
   1902  1.1     ross     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
   1903  1.1     ross         zSig <<= 1;
   1904  1.1     ross         --zExp;
   1905  1.1     ross     }
   1906  1.1     ross     return roundAndPackFloat32( zSign, zExp, zSig );
   1907  1.1     ross 
   1908  1.1     ross }
   1909  1.1     ross 
   1910  1.1     ross /*
   1911  1.1     ross -------------------------------------------------------------------------------
   1912  1.1     ross Returns the result of dividing the single-precision floating-point value `a'
   1913  1.1     ross by the corresponding value `b'.  The operation is performed according to the
   1914  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1915  1.1     ross -------------------------------------------------------------------------------
   1916  1.1     ross */
   1917  1.1     ross float32 float32_div( float32 a, float32 b )
   1918  1.1     ross {
   1919  1.1     ross     flag aSign, bSign, zSign;
   1920  1.1     ross     int16 aExp, bExp, zExp;
   1921  1.1     ross     bits32 aSig, bSig, zSig;
   1922  1.1     ross 
   1923  1.1     ross     aSig = extractFloat32Frac( a );
   1924  1.1     ross     aExp = extractFloat32Exp( a );
   1925  1.1     ross     aSign = extractFloat32Sign( a );
   1926  1.1     ross     bSig = extractFloat32Frac( b );
   1927  1.1     ross     bExp = extractFloat32Exp( b );
   1928  1.1     ross     bSign = extractFloat32Sign( b );
   1929  1.1     ross     zSign = aSign ^ bSign;
   1930  1.1     ross     if ( aExp == 0xFF ) {
   1931  1.1     ross         if ( aSig ) return propagateFloat32NaN( a, b );
   1932  1.1     ross         if ( bExp == 0xFF ) {
   1933  1.1     ross             if ( bSig ) return propagateFloat32NaN( a, b );
   1934  1.1     ross             float_raise( float_flag_invalid );
   1935  1.1     ross             return float32_default_nan;
   1936  1.1     ross         }
   1937  1.1     ross         return packFloat32( zSign, 0xFF, 0 );
   1938  1.1     ross     }
   1939  1.1     ross     if ( bExp == 0xFF ) {
   1940  1.1     ross         if ( bSig ) return propagateFloat32NaN( a, b );
   1941  1.1     ross         return packFloat32( zSign, 0, 0 );
   1942  1.1     ross     }
   1943  1.1     ross     if ( bExp == 0 ) {
   1944  1.1     ross         if ( bSig == 0 ) {
   1945  1.1     ross             if ( ( aExp | aSig ) == 0 ) {
   1946  1.1     ross                 float_raise( float_flag_invalid );
   1947  1.1     ross                 return float32_default_nan;
   1948  1.1     ross             }
   1949  1.1     ross             float_raise( float_flag_divbyzero );
   1950  1.1     ross             return packFloat32( zSign, 0xFF, 0 );
   1951  1.1     ross         }
   1952  1.1     ross         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1953  1.1     ross     }
   1954  1.1     ross     if ( aExp == 0 ) {
   1955  1.1     ross         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1956  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1957  1.1     ross     }
   1958  1.1     ross     zExp = aExp - bExp + 0x7D;
   1959  1.1     ross     aSig = ( aSig | 0x00800000 )<<7;
   1960  1.1     ross     bSig = ( bSig | 0x00800000 )<<8;
   1961  1.1     ross     if ( bSig <= ( aSig + aSig ) ) {
   1962  1.1     ross         aSig >>= 1;
   1963  1.1     ross         ++zExp;
   1964  1.1     ross     }
   1965  1.1     ross     zSig = ( ( (bits64) aSig )<<32 ) / bSig;
   1966  1.1     ross     if ( ( zSig & 0x3F ) == 0 ) {
   1967  1.1     ross         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
   1968  1.1     ross     }
   1969  1.1     ross     return roundAndPackFloat32( zSign, zExp, zSig );
   1970  1.1     ross 
   1971  1.1     ross }
   1972  1.1     ross 
   1973  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1974  1.1     ross /*
   1975  1.1     ross -------------------------------------------------------------------------------
   1976  1.1     ross Returns the remainder of the single-precision floating-point value `a'
   1977  1.1     ross with respect to the corresponding value `b'.  The operation is performed
   1978  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1979  1.1     ross -------------------------------------------------------------------------------
   1980  1.1     ross */
   1981  1.1     ross float32 float32_rem( float32 a, float32 b )
   1982  1.1     ross {
   1983  1.1     ross     flag aSign, bSign, zSign;
   1984  1.1     ross     int16 aExp, bExp, expDiff;
   1985  1.1     ross     bits32 aSig, bSig;
   1986  1.1     ross     bits32 q;
   1987  1.1     ross     bits64 aSig64, bSig64, q64;
   1988  1.1     ross     bits32 alternateASig;
   1989  1.1     ross     sbits32 sigMean;
   1990  1.1     ross 
   1991  1.1     ross     aSig = extractFloat32Frac( a );
   1992  1.1     ross     aExp = extractFloat32Exp( a );
   1993  1.1     ross     aSign = extractFloat32Sign( a );
   1994  1.1     ross     bSig = extractFloat32Frac( b );
   1995  1.1     ross     bExp = extractFloat32Exp( b );
   1996  1.1     ross     bSign = extractFloat32Sign( b );
   1997  1.1     ross     if ( aExp == 0xFF ) {
   1998  1.1     ross         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1999  1.1     ross             return propagateFloat32NaN( a, b );
   2000  1.1     ross         }
   2001  1.1     ross         float_raise( float_flag_invalid );
   2002  1.1     ross         return float32_default_nan;
   2003  1.1     ross     }
   2004  1.1     ross     if ( bExp == 0xFF ) {
   2005  1.1     ross         if ( bSig ) return propagateFloat32NaN( a, b );
   2006  1.1     ross         return a;
   2007  1.1     ross     }
   2008  1.1     ross     if ( bExp == 0 ) {
   2009  1.1     ross         if ( bSig == 0 ) {
   2010  1.1     ross             float_raise( float_flag_invalid );
   2011  1.1     ross             return float32_default_nan;
   2012  1.1     ross         }
   2013  1.1     ross         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2014  1.1     ross     }
   2015  1.1     ross     if ( aExp == 0 ) {
   2016  1.1     ross         if ( aSig == 0 ) return a;
   2017  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2018  1.1     ross     }
   2019  1.1     ross     expDiff = aExp - bExp;
   2020  1.1     ross     aSig |= 0x00800000;
   2021  1.1     ross     bSig |= 0x00800000;
   2022  1.1     ross     if ( expDiff < 32 ) {
   2023  1.1     ross         aSig <<= 8;
   2024  1.1     ross         bSig <<= 8;
   2025  1.1     ross         if ( expDiff < 0 ) {
   2026  1.1     ross             if ( expDiff < -1 ) return a;
   2027  1.1     ross             aSig >>= 1;
   2028  1.1     ross         }
   2029  1.1     ross         q = ( bSig <= aSig );
   2030  1.1     ross         if ( q ) aSig -= bSig;
   2031  1.1     ross         if ( 0 < expDiff ) {
   2032  1.1     ross             q = ( ( (bits64) aSig )<<32 ) / bSig;
   2033  1.1     ross             q >>= 32 - expDiff;
   2034  1.1     ross             bSig >>= 2;
   2035  1.1     ross             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   2036  1.1     ross         }
   2037  1.1     ross         else {
   2038  1.1     ross             aSig >>= 2;
   2039  1.1     ross             bSig >>= 2;
   2040  1.1     ross         }
   2041  1.1     ross     }
   2042  1.1     ross     else {
   2043  1.1     ross         if ( bSig <= aSig ) aSig -= bSig;
   2044  1.1     ross         aSig64 = ( (bits64) aSig )<<40;
   2045  1.1     ross         bSig64 = ( (bits64) bSig )<<40;
   2046  1.1     ross         expDiff -= 64;
   2047  1.1     ross         while ( 0 < expDiff ) {
   2048  1.1     ross             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2049  1.1     ross             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2050  1.1     ross             aSig64 = - ( ( bSig * q64 )<<38 );
   2051  1.1     ross             expDiff -= 62;
   2052  1.1     ross         }
   2053  1.1     ross         expDiff += 64;
   2054  1.1     ross         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2055  1.1     ross         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2056  1.1     ross         q = q64>>( 64 - expDiff );
   2057  1.1     ross         bSig <<= 6;
   2058  1.1     ross         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
   2059  1.1     ross     }
   2060  1.1     ross     do {
   2061  1.1     ross         alternateASig = aSig;
   2062  1.1     ross         ++q;
   2063  1.1     ross         aSig -= bSig;
   2064  1.1     ross     } while ( 0 <= (sbits32) aSig );
   2065  1.1     ross     sigMean = aSig + alternateASig;
   2066  1.1     ross     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   2067  1.1     ross         aSig = alternateASig;
   2068  1.1     ross     }
   2069  1.1     ross     zSign = ( (sbits32) aSig < 0 );
   2070  1.1     ross     if ( zSign ) aSig = - aSig;
   2071  1.1     ross     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   2072  1.1     ross 
   2073  1.1     ross }
   2074  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   2075  1.1     ross 
   2076  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2077  1.1     ross /*
   2078  1.1     ross -------------------------------------------------------------------------------
   2079  1.1     ross Returns the square root of the single-precision floating-point value `a'.
   2080  1.1     ross The operation is performed according to the IEC/IEEE Standard for Binary
   2081  1.1     ross Floating-Point Arithmetic.
   2082  1.1     ross -------------------------------------------------------------------------------
   2083  1.1     ross */
   2084  1.1     ross float32 float32_sqrt( float32 a )
   2085  1.1     ross {
   2086  1.1     ross     flag aSign;
   2087  1.1     ross     int16 aExp, zExp;
   2088  1.1     ross     bits32 aSig, zSig;
   2089  1.1     ross     bits64 rem, term;
   2090  1.1     ross 
   2091  1.1     ross     aSig = extractFloat32Frac( a );
   2092  1.1     ross     aExp = extractFloat32Exp( a );
   2093  1.1     ross     aSign = extractFloat32Sign( a );
   2094  1.1     ross     if ( aExp == 0xFF ) {
   2095  1.1     ross         if ( aSig ) return propagateFloat32NaN( a, 0 );
   2096  1.1     ross         if ( ! aSign ) return a;
   2097  1.1     ross         float_raise( float_flag_invalid );
   2098  1.1     ross         return float32_default_nan;
   2099  1.1     ross     }
   2100  1.1     ross     if ( aSign ) {
   2101  1.1     ross         if ( ( aExp | aSig ) == 0 ) return a;
   2102  1.1     ross         float_raise( float_flag_invalid );
   2103  1.1     ross         return float32_default_nan;
   2104  1.1     ross     }
   2105  1.1     ross     if ( aExp == 0 ) {
   2106  1.1     ross         if ( aSig == 0 ) return 0;
   2107  1.1     ross         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2108  1.1     ross     }
   2109  1.1     ross     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   2110  1.1     ross     aSig = ( aSig | 0x00800000 )<<8;
   2111  1.1     ross     zSig = estimateSqrt32( aExp, aSig ) + 2;
   2112  1.1     ross     if ( ( zSig & 0x7F ) <= 5 ) {
   2113  1.1     ross         if ( zSig < 2 ) {
   2114  1.1     ross             zSig = 0x7FFFFFFF;
   2115  1.1     ross             goto roundAndPack;
   2116  1.1     ross         }
   2117  1.1     ross         aSig >>= aExp & 1;
   2118  1.1     ross         term = ( (bits64) zSig ) * zSig;
   2119  1.1     ross         rem = ( ( (bits64) aSig )<<32 ) - term;
   2120  1.1     ross         while ( (sbits64) rem < 0 ) {
   2121  1.1     ross             --zSig;
   2122  1.1     ross             rem += ( ( (bits64) zSig )<<1 ) | 1;
   2123  1.1     ross         }
   2124  1.1     ross         zSig |= ( rem != 0 );
   2125  1.1     ross     }
   2126  1.1     ross     shift32RightJamming( zSig, 1, &zSig );
   2127  1.1     ross  roundAndPack:
   2128  1.1     ross     return roundAndPackFloat32( 0, zExp, zSig );
   2129  1.1     ross 
   2130  1.1     ross }
   2131  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   2132  1.1     ross 
   2133  1.1     ross /*
   2134  1.1     ross -------------------------------------------------------------------------------
   2135  1.1     ross Returns 1 if the single-precision floating-point value `a' is equal to
   2136  1.1     ross the corresponding value `b', and 0 otherwise.  The comparison is performed
   2137  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2138  1.1     ross -------------------------------------------------------------------------------
   2139  1.1     ross */
   2140  1.1     ross flag float32_eq( float32 a, float32 b )
   2141  1.1     ross {
   2142  1.1     ross 
   2143  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2144  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2145  1.1     ross        ) {
   2146  1.1     ross         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2147  1.1     ross             float_raise( float_flag_invalid );
   2148  1.1     ross         }
   2149  1.1     ross         return 0;
   2150  1.1     ross     }
   2151  1.1     ross     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2152  1.1     ross 
   2153  1.1     ross }
   2154  1.1     ross 
   2155  1.1     ross /*
   2156  1.1     ross -------------------------------------------------------------------------------
   2157  1.1     ross Returns 1 if the single-precision floating-point value `a' is less than
   2158  1.1     ross or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2159  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2160  1.1     ross Arithmetic.
   2161  1.1     ross -------------------------------------------------------------------------------
   2162  1.1     ross */
   2163  1.1     ross flag float32_le( float32 a, float32 b )
   2164  1.1     ross {
   2165  1.1     ross     flag aSign, bSign;
   2166  1.1     ross 
   2167  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2168  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2169  1.1     ross        ) {
   2170  1.1     ross         float_raise( float_flag_invalid );
   2171  1.1     ross         return 0;
   2172  1.1     ross     }
   2173  1.1     ross     aSign = extractFloat32Sign( a );
   2174  1.1     ross     bSign = extractFloat32Sign( b );
   2175  1.1     ross     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2176  1.1     ross     return ( a == b ) || ( aSign ^ ( a < b ) );
   2177  1.1     ross 
   2178  1.1     ross }
   2179  1.1     ross 
   2180  1.1     ross /*
   2181  1.1     ross -------------------------------------------------------------------------------
   2182  1.1     ross Returns 1 if the single-precision floating-point value `a' is less than
   2183  1.1     ross the corresponding value `b', and 0 otherwise.  The comparison is performed
   2184  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2185  1.1     ross -------------------------------------------------------------------------------
   2186  1.1     ross */
   2187  1.1     ross flag float32_lt( float32 a, float32 b )
   2188  1.1     ross {
   2189  1.1     ross     flag aSign, bSign;
   2190  1.1     ross 
   2191  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2192  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2193  1.1     ross        ) {
   2194  1.1     ross         float_raise( float_flag_invalid );
   2195  1.1     ross         return 0;
   2196  1.1     ross     }
   2197  1.1     ross     aSign = extractFloat32Sign( a );
   2198  1.1     ross     bSign = extractFloat32Sign( b );
   2199  1.1     ross     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2200  1.1     ross     return ( a != b ) && ( aSign ^ ( a < b ) );
   2201  1.1     ross 
   2202  1.1     ross }
   2203  1.1     ross 
   2204  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2205  1.1     ross /*
   2206  1.1     ross -------------------------------------------------------------------------------
   2207  1.1     ross Returns 1 if the single-precision floating-point value `a' is equal to
   2208  1.1     ross the corresponding value `b', and 0 otherwise.  The invalid exception is
   2209  1.1     ross raised if either operand is a NaN.  Otherwise, the comparison is performed
   2210  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2211  1.1     ross -------------------------------------------------------------------------------
   2212  1.1     ross */
   2213  1.1     ross flag float32_eq_signaling( float32 a, float32 b )
   2214  1.1     ross {
   2215  1.1     ross 
   2216  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2217  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2218  1.1     ross        ) {
   2219  1.1     ross         float_raise( float_flag_invalid );
   2220  1.1     ross         return 0;
   2221  1.1     ross     }
   2222  1.1     ross     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2223  1.1     ross 
   2224  1.1     ross }
   2225  1.1     ross 
   2226  1.1     ross /*
   2227  1.1     ross -------------------------------------------------------------------------------
   2228  1.1     ross Returns 1 if the single-precision floating-point value `a' is less than or
   2229  1.1     ross equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2230  1.1     ross cause an exception.  Otherwise, the comparison is performed according to the
   2231  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2232  1.1     ross -------------------------------------------------------------------------------
   2233  1.1     ross */
   2234  1.1     ross flag float32_le_quiet( float32 a, float32 b )
   2235  1.1     ross {
   2236  1.1     ross     flag aSign, bSign;
   2237  1.1     ross 
   2238  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2239  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2240  1.1     ross        ) {
   2241  1.1     ross         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2242  1.1     ross             float_raise( float_flag_invalid );
   2243  1.1     ross         }
   2244  1.1     ross         return 0;
   2245  1.1     ross     }
   2246  1.1     ross     aSign = extractFloat32Sign( a );
   2247  1.1     ross     bSign = extractFloat32Sign( b );
   2248  1.1     ross     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2249  1.1     ross     return ( a == b ) || ( aSign ^ ( a < b ) );
   2250  1.1     ross 
   2251  1.1     ross }
   2252  1.1     ross 
   2253  1.1     ross /*
   2254  1.1     ross -------------------------------------------------------------------------------
   2255  1.1     ross Returns 1 if the single-precision floating-point value `a' is less than
   2256  1.1     ross the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2257  1.1     ross exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2258  1.1     ross Standard for Binary Floating-Point Arithmetic.
   2259  1.1     ross -------------------------------------------------------------------------------
   2260  1.1     ross */
   2261  1.1     ross flag float32_lt_quiet( float32 a, float32 b )
   2262  1.1     ross {
   2263  1.1     ross     flag aSign, bSign;
   2264  1.1     ross 
   2265  1.1     ross     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2266  1.1     ross          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2267  1.1     ross        ) {
   2268  1.1     ross         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2269  1.1     ross             float_raise( float_flag_invalid );
   2270  1.1     ross         }
   2271  1.1     ross         return 0;
   2272  1.1     ross     }
   2273  1.1     ross     aSign = extractFloat32Sign( a );
   2274  1.1     ross     bSign = extractFloat32Sign( b );
   2275  1.1     ross     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2276  1.1     ross     return ( a != b ) && ( aSign ^ ( a < b ) );
   2277  1.1     ross 
   2278  1.1     ross }
   2279  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   2280  1.1     ross 
   2281  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2282  1.1     ross /*
   2283  1.1     ross -------------------------------------------------------------------------------
   2284  1.1     ross Returns the result of converting the double-precision floating-point value
   2285  1.1     ross `a' to the 32-bit two's complement integer format.  The conversion is
   2286  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2287  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   2288  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   2289  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   2290  1.1     ross largest integer with the same sign as `a' is returned.
   2291  1.1     ross -------------------------------------------------------------------------------
   2292  1.1     ross */
   2293  1.1     ross int32 float64_to_int32( float64 a )
   2294  1.1     ross {
   2295  1.1     ross     flag aSign;
   2296  1.1     ross     int16 aExp, shiftCount;
   2297  1.1     ross     bits64 aSig;
   2298  1.1     ross 
   2299  1.1     ross     aSig = extractFloat64Frac( a );
   2300  1.1     ross     aExp = extractFloat64Exp( a );
   2301  1.1     ross     aSign = extractFloat64Sign( a );
   2302  1.1     ross     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2303  1.1     ross     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2304  1.1     ross     shiftCount = 0x42C - aExp;
   2305  1.1     ross     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
   2306  1.1     ross     return roundAndPackInt32( aSign, aSig );
   2307  1.1     ross 
   2308  1.1     ross }
   2309  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   2310  1.1     ross 
   2311  1.1     ross /*
   2312  1.1     ross -------------------------------------------------------------------------------
   2313  1.1     ross Returns the result of converting the double-precision floating-point value
   2314  1.1     ross `a' to the 32-bit two's complement integer format.  The conversion is
   2315  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2316  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.
   2317  1.1     ross If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2318  1.1     ross the conversion overflows, the largest integer with the same sign as `a' is
   2319  1.1     ross returned.
   2320  1.1     ross -------------------------------------------------------------------------------
   2321  1.1     ross */
   2322  1.1     ross int32 float64_to_int32_round_to_zero( float64 a )
   2323  1.1     ross {
   2324  1.1     ross     flag aSign;
   2325  1.1     ross     int16 aExp, shiftCount;
   2326  1.1     ross     bits64 aSig, savedASig;
   2327  1.1     ross     int32 z;
   2328  1.1     ross 
   2329  1.1     ross     aSig = extractFloat64Frac( a );
   2330  1.1     ross     aExp = extractFloat64Exp( a );
   2331  1.1     ross     aSign = extractFloat64Sign( a );
   2332  1.1     ross     if ( 0x41E < aExp ) {
   2333  1.1     ross         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2334  1.1     ross         goto invalid;
   2335  1.1     ross     }
   2336  1.1     ross     else if ( aExp < 0x3FF ) {
   2337  1.1     ross         if ( aExp || aSig ) float_set_inexact();
   2338  1.1     ross         return 0;
   2339  1.1     ross     }
   2340  1.1     ross     aSig |= LIT64( 0x0010000000000000 );
   2341  1.1     ross     shiftCount = 0x433 - aExp;
   2342  1.1     ross     savedASig = aSig;
   2343  1.1     ross     aSig >>= shiftCount;
   2344  1.1     ross     z = aSig;
   2345  1.1     ross     if ( aSign ) z = - z;
   2346  1.1     ross     if ( ( z < 0 ) ^ aSign ) {
   2347  1.1     ross  invalid:
   2348  1.1     ross         float_raise( float_flag_invalid );
   2349  1.1     ross         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   2350  1.1     ross     }
   2351  1.1     ross     if ( ( aSig<<shiftCount ) != savedASig ) {
   2352  1.1     ross         float_set_inexact();
   2353  1.1     ross     }
   2354  1.1     ross     return z;
   2355  1.1     ross 
   2356  1.1     ross }
   2357  1.1     ross 
   2358  1.1     ross #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2359  1.1     ross /*
   2360  1.1     ross -------------------------------------------------------------------------------
   2361  1.1     ross Returns the result of converting the double-precision floating-point value
   2362  1.1     ross `a' to the 64-bit two's complement integer format.  The conversion is
   2363  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2364  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   2365  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   2366  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   2367  1.1     ross largest integer with the same sign as `a' is returned.
   2368  1.1     ross -------------------------------------------------------------------------------
   2369  1.1     ross */
   2370  1.1     ross int64 float64_to_int64( float64 a )
   2371  1.1     ross {
   2372  1.1     ross     flag aSign;
   2373  1.1     ross     int16 aExp, shiftCount;
   2374  1.1     ross     bits64 aSig, aSigExtra;
   2375  1.1     ross 
   2376  1.1     ross     aSig = extractFloat64Frac( a );
   2377  1.1     ross     aExp = extractFloat64Exp( a );
   2378  1.1     ross     aSign = extractFloat64Sign( a );
   2379  1.1     ross     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2380  1.1     ross     shiftCount = 0x433 - aExp;
   2381  1.1     ross     if ( shiftCount <= 0 ) {
   2382  1.1     ross         if ( 0x43E < aExp ) {
   2383  1.1     ross             float_raise( float_flag_invalid );
   2384  1.1     ross             if (    ! aSign
   2385  1.1     ross                  || (    ( aExp == 0x7FF )
   2386  1.1     ross                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2387  1.1     ross                ) {
   2388  1.1     ross                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   2389  1.1     ross             }
   2390  1.1     ross             return (sbits64) LIT64( 0x8000000000000000 );
   2391  1.1     ross         }
   2392  1.1     ross         aSigExtra = 0;
   2393  1.1     ross         aSig <<= - shiftCount;
   2394  1.1     ross     }
   2395  1.1     ross     else {
   2396  1.1     ross         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   2397  1.1     ross     }
   2398  1.1     ross     return roundAndPackInt64( aSign, aSig, aSigExtra );
   2399  1.1     ross 
   2400  1.1     ross }
   2401  1.1     ross 
   2402  1.1     ross /*
   2403  1.1     ross -------------------------------------------------------------------------------
   2404  1.1     ross Returns the result of converting the double-precision floating-point value
   2405  1.1     ross `a' to the 64-bit two's complement integer format.  The conversion is
   2406  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2407  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.
   2408  1.1     ross If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2409  1.1     ross the conversion overflows, the largest integer with the same sign as `a' is
   2410  1.1     ross returned.
   2411  1.1     ross -------------------------------------------------------------------------------
   2412  1.1     ross */
   2413  1.1     ross int64 float64_to_int64_round_to_zero( float64 a )
   2414  1.1     ross {
   2415  1.1     ross     flag aSign;
   2416  1.1     ross     int16 aExp, shiftCount;
   2417  1.1     ross     bits64 aSig;
   2418  1.1     ross     int64 z;
   2419  1.1     ross 
   2420  1.1     ross     aSig = extractFloat64Frac( a );
   2421  1.1     ross     aExp = extractFloat64Exp( a );
   2422  1.1     ross     aSign = extractFloat64Sign( a );
   2423  1.1     ross     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2424  1.1     ross     shiftCount = aExp - 0x433;
   2425  1.1     ross     if ( 0 <= shiftCount ) {
   2426  1.1     ross         if ( 0x43E <= aExp ) {
   2427  1.1     ross             if ( a != LIT64( 0xC3E0000000000000 ) ) {
   2428  1.1     ross                 float_raise( float_flag_invalid );
   2429  1.1     ross                 if (    ! aSign
   2430  1.1     ross                      || (    ( aExp == 0x7FF )
   2431  1.1     ross                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2432  1.1     ross                    ) {
   2433  1.1     ross                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   2434  1.1     ross                 }
   2435  1.1     ross             }
   2436  1.1     ross             return (sbits64) LIT64( 0x8000000000000000 );
   2437  1.1     ross         }
   2438  1.1     ross         z = aSig<<shiftCount;
   2439  1.1     ross     }
   2440  1.1     ross     else {
   2441  1.1     ross         if ( aExp < 0x3FE ) {
   2442  1.1     ross             if ( aExp | aSig ) float_set_inexact();
   2443  1.1     ross             return 0;
   2444  1.1     ross         }
   2445  1.1     ross         z = aSig>>( - shiftCount );
   2446  1.1     ross         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   2447  1.1     ross             float_set_inexact();
   2448  1.1     ross         }
   2449  1.1     ross     }
   2450  1.1     ross     if ( aSign ) z = - z;
   2451  1.1     ross     return z;
   2452  1.1     ross 
   2453  1.1     ross }
   2454  1.1     ross #endif /* !SOFTFLOAT_FOR_GCC */
   2455  1.1     ross 
   2456  1.1     ross /*
   2457  1.1     ross -------------------------------------------------------------------------------
   2458  1.1     ross Returns the result of converting the double-precision floating-point value
   2459  1.1     ross `a' to the single-precision floating-point format.  The conversion is
   2460  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2461  1.1     ross Arithmetic.
   2462  1.1     ross -------------------------------------------------------------------------------
   2463  1.1     ross */
   2464  1.1     ross float32 float64_to_float32( float64 a )
   2465  1.1     ross {
   2466  1.1     ross     flag aSign;
   2467  1.1     ross     int16 aExp;
   2468  1.1     ross     bits64 aSig;
   2469  1.1     ross     bits32 zSig;
   2470  1.1     ross 
   2471  1.1     ross     aSig = extractFloat64Frac( a );
   2472  1.1     ross     aExp = extractFloat64Exp( a );
   2473  1.1     ross     aSign = extractFloat64Sign( a );
   2474  1.1     ross     if ( aExp == 0x7FF ) {
   2475  1.1     ross         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
   2476  1.1     ross         return packFloat32( aSign, 0xFF, 0 );
   2477  1.1     ross     }
   2478  1.1     ross     shift64RightJamming( aSig, 22, &aSig );
   2479  1.1     ross     zSig = aSig;
   2480  1.1     ross     if ( aExp || zSig ) {
   2481  1.1     ross         zSig |= 0x40000000;
   2482  1.1     ross         aExp -= 0x381;
   2483  1.1     ross     }
   2484  1.1     ross     return roundAndPackFloat32( aSign, aExp, zSig );
   2485  1.1     ross 
   2486  1.1     ross }
   2487  1.1     ross 
   2488  1.1     ross #ifdef FLOATX80
   2489  1.1     ross 
   2490  1.1     ross /*
   2491  1.1     ross -------------------------------------------------------------------------------
   2492  1.1     ross Returns the result of converting the double-precision floating-point value
   2493  1.1     ross `a' to the extended double-precision floating-point format.  The conversion
   2494  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2495  1.1     ross Arithmetic.
   2496  1.1     ross -------------------------------------------------------------------------------
   2497  1.1     ross */
   2498  1.1     ross floatx80 float64_to_floatx80( float64 a )
   2499  1.1     ross {
   2500  1.1     ross     flag aSign;
   2501  1.1     ross     int16 aExp;
   2502  1.1     ross     bits64 aSig;
   2503  1.1     ross 
   2504  1.1     ross     aSig = extractFloat64Frac( a );
   2505  1.1     ross     aExp = extractFloat64Exp( a );
   2506  1.1     ross     aSign = extractFloat64Sign( a );
   2507  1.1     ross     if ( aExp == 0x7FF ) {
   2508  1.1     ross         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
   2509  1.1     ross         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   2510  1.1     ross     }
   2511  1.1     ross     if ( aExp == 0 ) {
   2512  1.1     ross         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   2513  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2514  1.1     ross     }
   2515  1.1     ross     return
   2516  1.1     ross         packFloatx80(
   2517  1.1     ross             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
   2518  1.1     ross 
   2519  1.1     ross }
   2520  1.1     ross 
   2521  1.1     ross #endif
   2522  1.1     ross 
   2523  1.1     ross #ifdef FLOAT128
   2524  1.1     ross 
   2525  1.1     ross /*
   2526  1.1     ross -------------------------------------------------------------------------------
   2527  1.1     ross Returns the result of converting the double-precision floating-point value
   2528  1.1     ross `a' to the quadruple-precision floating-point format.  The conversion is
   2529  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   2530  1.1     ross Arithmetic.
   2531  1.1     ross -------------------------------------------------------------------------------
   2532  1.1     ross */
   2533  1.1     ross float128 float64_to_float128( float64 a )
   2534  1.1     ross {
   2535  1.1     ross     flag aSign;
   2536  1.1     ross     int16 aExp;
   2537  1.1     ross     bits64 aSig, zSig0, zSig1;
   2538  1.1     ross 
   2539  1.1     ross     aSig = extractFloat64Frac( a );
   2540  1.1     ross     aExp = extractFloat64Exp( a );
   2541  1.1     ross     aSign = extractFloat64Sign( a );
   2542  1.1     ross     if ( aExp == 0x7FF ) {
   2543  1.1     ross         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
   2544  1.1     ross         return packFloat128( aSign, 0x7FFF, 0, 0 );
   2545  1.1     ross     }
   2546  1.1     ross     if ( aExp == 0 ) {
   2547  1.1     ross         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   2548  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2549  1.1     ross         --aExp;
   2550  1.1     ross     }
   2551  1.1     ross     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
   2552  1.1     ross     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
   2553  1.1     ross 
   2554  1.1     ross }
   2555  1.1     ross 
   2556  1.1     ross #endif
   2557  1.1     ross 
   2558  1.1     ross #ifndef SOFTFLOAT_FOR_GCC
   2559  1.1     ross /*
   2560  1.1     ross -------------------------------------------------------------------------------
   2561  1.1     ross Rounds the double-precision floating-point value `a' to an integer, and
   2562  1.1     ross returns the result as a double-precision floating-point value.  The
   2563  1.1     ross operation is performed according to the IEC/IEEE Standard for Binary
   2564  1.1     ross Floating-Point Arithmetic.
   2565  1.1     ross -------------------------------------------------------------------------------
   2566  1.1     ross */
   2567  1.1     ross float64 float64_round_to_int( float64 a )
   2568  1.1     ross {
   2569  1.1     ross     flag aSign;
   2570  1.1     ross     int16 aExp;
   2571  1.1     ross     bits64 lastBitMask, roundBitsMask;
   2572  1.1     ross     int8 roundingMode;
   2573  1.1     ross     float64 z;
   2574  1.1     ross 
   2575  1.1     ross     aExp = extractFloat64Exp( a );
   2576  1.1     ross     if ( 0x433 <= aExp ) {
   2577  1.1     ross         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
   2578  1.1     ross             return propagateFloat64NaN( a, a );
   2579  1.1     ross         }
   2580  1.1     ross         return a;
   2581  1.1     ross     }
   2582  1.1     ross     if ( aExp < 0x3FF ) {
   2583  1.1     ross         if ( (bits64) ( a<<1 ) == 0 ) return a;
   2584  1.1     ross         float_set_inexact();
   2585  1.1     ross         aSign = extractFloat64Sign( a );
   2586  1.1     ross         switch ( float_rounding_mode() ) {
   2587  1.1     ross          case float_round_nearest_even:
   2588  1.1     ross             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
   2589  1.1     ross                 return packFloat64( aSign, 0x3FF, 0 );
   2590  1.1     ross             }
   2591  1.1     ross             break;
   2592  1.1     ross          case float_round_down:
   2593  1.1     ross             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
   2594  1.1     ross          case float_round_up:
   2595  1.1     ross             return
   2596  1.1     ross             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
   2597  1.1     ross         }
   2598  1.1     ross         return packFloat64( aSign, 0, 0 );
   2599  1.1     ross     }
   2600  1.1     ross     lastBitMask = 1;
   2601  1.1     ross     lastBitMask <<= 0x433 - aExp;
   2602  1.1     ross     roundBitsMask = lastBitMask - 1;
   2603  1.1     ross     z = a;
   2604  1.1     ross     roundingMode = float_rounding_mode();
   2605  1.1     ross     if ( roundingMode == float_round_nearest_even ) {
   2606  1.1     ross         z += lastBitMask>>1;
   2607  1.1     ross         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   2608  1.1     ross     }
   2609  1.1     ross     else if ( roundingMode != float_round_to_zero ) {
   2610  1.1     ross         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   2611  1.1     ross             z += roundBitsMask;
   2612  1.1     ross         }
   2613  1.1     ross     }
   2614  1.1     ross     z &= ~ roundBitsMask;
   2615  1.1     ross     if ( z != a ) float_set_inexact();
   2616  1.1     ross     return z;
   2617  1.1     ross 
   2618  1.1     ross }
   2619  1.1     ross #endif
   2620  1.1     ross 
   2621  1.1     ross /*
   2622  1.1     ross -------------------------------------------------------------------------------
   2623  1.1     ross Returns the result of adding the absolute values of the double-precision
   2624  1.1     ross floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   2625  1.1     ross before being returned.  `zSign' is ignored if the result is a NaN.
   2626  1.1     ross The addition is performed according to the IEC/IEEE Standard for Binary
   2627  1.1     ross Floating-Point Arithmetic.
   2628  1.1     ross -------------------------------------------------------------------------------
   2629  1.1     ross */
   2630  1.1     ross static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   2631  1.1     ross {
   2632  1.1     ross     int16 aExp, bExp, zExp;
   2633  1.1     ross     bits64 aSig, bSig, zSig;
   2634  1.1     ross     int16 expDiff;
   2635  1.1     ross 
   2636  1.1     ross     aSig = extractFloat64Frac( a );
   2637  1.1     ross     aExp = extractFloat64Exp( a );
   2638  1.1     ross     bSig = extractFloat64Frac( b );
   2639  1.1     ross     bExp = extractFloat64Exp( b );
   2640  1.1     ross     expDiff = aExp - bExp;
   2641  1.1     ross     aSig <<= 9;
   2642  1.1     ross     bSig <<= 9;
   2643  1.1     ross     if ( 0 < expDiff ) {
   2644  1.1     ross         if ( aExp == 0x7FF ) {
   2645  1.1     ross             if ( aSig ) return propagateFloat64NaN( a, b );
   2646  1.1     ross             return a;
   2647  1.1     ross         }
   2648  1.1     ross         if ( bExp == 0 ) {
   2649  1.1     ross             --expDiff;
   2650  1.1     ross         }
   2651  1.1     ross         else {
   2652  1.1     ross             bSig |= LIT64( 0x2000000000000000 );
   2653  1.1     ross         }
   2654  1.1     ross         shift64RightJamming( bSig, expDiff, &bSig );
   2655  1.1     ross         zExp = aExp;
   2656  1.1     ross     }
   2657  1.1     ross     else if ( expDiff < 0 ) {
   2658  1.1     ross         if ( bExp == 0x7FF ) {
   2659  1.1     ross             if ( bSig ) return propagateFloat64NaN( a, b );
   2660  1.1     ross             return packFloat64( zSign, 0x7FF, 0 );
   2661  1.1     ross         }
   2662  1.1     ross         if ( aExp == 0 ) {
   2663  1.1     ross             ++expDiff;
   2664  1.1     ross         }
   2665  1.1     ross         else {
   2666  1.1     ross             aSig |= LIT64( 0x2000000000000000 );
   2667  1.1     ross         }
   2668  1.1     ross         shift64RightJamming( aSig, - expDiff, &aSig );
   2669  1.1     ross         zExp = bExp;
   2670  1.1     ross     }
   2671  1.1     ross     else {
   2672  1.1     ross         if ( aExp == 0x7FF ) {
   2673  1.1     ross             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2674  1.1     ross             return a;
   2675  1.1     ross         }
   2676  1.1     ross         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
   2677  1.1     ross         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
   2678  1.1     ross         zExp = aExp;
   2679  1.1     ross         goto roundAndPack;
   2680  1.1     ross     }
   2681  1.1     ross     aSig |= LIT64( 0x2000000000000000 );
   2682  1.1     ross     zSig = ( aSig + bSig )<<1;
   2683  1.1     ross     --zExp;
   2684  1.1     ross     if ( (sbits64) zSig < 0 ) {
   2685  1.1     ross         zSig = aSig + bSig;
   2686  1.1     ross         ++zExp;
   2687  1.1     ross     }
   2688  1.1     ross  roundAndPack:
   2689  1.1     ross     return roundAndPackFloat64( zSign, zExp, zSig );
   2690  1.1     ross 
   2691  1.1     ross }
   2692  1.1     ross 
   2693  1.1     ross /*
   2694  1.1     ross -------------------------------------------------------------------------------
   2695  1.1     ross Returns the result of subtracting the absolute values of the double-
   2696  1.1     ross precision floating-point values `a' and `b'.  If `zSign' is 1, the
   2697  1.1     ross difference is negated before being returned.  `zSign' is ignored if the
   2698  1.1     ross result is a NaN.  The subtraction is performed according to the IEC/IEEE
   2699  1.1     ross Standard for Binary Floating-Point Arithmetic.
   2700  1.1     ross -------------------------------------------------------------------------------
   2701  1.1     ross */
   2702  1.1     ross static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   2703  1.1     ross {
   2704  1.1     ross     int16 aExp, bExp, zExp;
   2705  1.1     ross     bits64 aSig, bSig, zSig;
   2706  1.1     ross     int16 expDiff;
   2707  1.1     ross 
   2708  1.1     ross     aSig = extractFloat64Frac( a );
   2709  1.1     ross     aExp = extractFloat64Exp( a );
   2710  1.1     ross     bSig = extractFloat64Frac( b );
   2711  1.1     ross     bExp = extractFloat64Exp( b );
   2712  1.1     ross     expDiff = aExp - bExp;
   2713  1.1     ross     aSig <<= 10;
   2714  1.1     ross     bSig <<= 10;
   2715  1.1     ross     if ( 0 < expDiff ) goto aExpBigger;
   2716  1.1     ross     if ( expDiff < 0 ) goto bExpBigger;
   2717  1.1     ross     if ( aExp == 0x7FF ) {
   2718  1.1     ross         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2719  1.1     ross         float_raise( float_flag_invalid );
   2720  1.1     ross         return float64_default_nan;
   2721  1.1     ross     }
   2722  1.1     ross     if ( aExp == 0 ) {
   2723  1.1     ross         aExp = 1;
   2724  1.1     ross         bExp = 1;
   2725  1.1     ross     }
   2726  1.1     ross     if ( bSig < aSig ) goto aBigger;
   2727  1.1     ross     if ( aSig < bSig ) goto bBigger;
   2728  1.1     ross     return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
   2729  1.1     ross  bExpBigger:
   2730  1.1     ross     if ( bExp == 0x7FF ) {
   2731  1.1     ross         if ( bSig ) return propagateFloat64NaN( a, b );
   2732  1.1     ross         return packFloat64( zSign ^ 1, 0x7FF, 0 );
   2733  1.1     ross     }
   2734  1.1     ross     if ( aExp == 0 ) {
   2735  1.1     ross         ++expDiff;
   2736  1.1     ross     }
   2737  1.1     ross     else {
   2738  1.1     ross         aSig |= LIT64( 0x4000000000000000 );
   2739  1.1     ross     }
   2740  1.1     ross     shift64RightJamming( aSig, - expDiff, &aSig );
   2741  1.1     ross     bSig |= LIT64( 0x4000000000000000 );
   2742  1.1     ross  bBigger:
   2743  1.1     ross     zSig = bSig - aSig;
   2744  1.1     ross     zExp = bExp;
   2745  1.1     ross     zSign ^= 1;
   2746  1.1     ross     goto normalizeRoundAndPack;
   2747  1.1     ross  aExpBigger:
   2748  1.1     ross     if ( aExp == 0x7FF ) {
   2749  1.1     ross         if ( aSig ) return propagateFloat64NaN( a, b );
   2750  1.1     ross         return a;
   2751  1.1     ross     }
   2752  1.1     ross     if ( bExp == 0 ) {
   2753  1.1     ross         --expDiff;
   2754  1.1     ross     }
   2755  1.1     ross     else {
   2756  1.1     ross         bSig |= LIT64( 0x4000000000000000 );
   2757  1.1     ross     }
   2758  1.1     ross     shift64RightJamming( bSig, expDiff, &bSig );
   2759  1.1     ross     aSig |= LIT64( 0x4000000000000000 );
   2760  1.1     ross  aBigger:
   2761  1.1     ross     zSig = aSig - bSig;
   2762  1.1     ross     zExp = aExp;
   2763  1.1     ross  normalizeRoundAndPack:
   2764  1.1     ross     --zExp;
   2765  1.1     ross     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
   2766  1.1     ross 
   2767  1.1     ross }
   2768  1.1     ross 
   2769  1.1     ross /*
   2770  1.1     ross -------------------------------------------------------------------------------
   2771  1.1     ross Returns the result of adding the double-precision floating-point values `a'
   2772  1.1     ross and `b'.  The operation is performed according to the IEC/IEEE Standard for
   2773  1.1     ross Binary Floating-Point Arithmetic.
   2774  1.1     ross -------------------------------------------------------------------------------
   2775  1.1     ross */
   2776  1.1     ross float64 float64_add( float64 a, float64 b )
   2777  1.1     ross {
   2778  1.1     ross     flag aSign, bSign;
   2779  1.1     ross 
   2780  1.1     ross     aSign = extractFloat64Sign( a );
   2781  1.1     ross     bSign = extractFloat64Sign( b );
   2782  1.1     ross     if ( aSign == bSign ) {
   2783  1.1     ross         return addFloat64Sigs( a, b, aSign );
   2784  1.1     ross     }
   2785  1.1     ross     else {
   2786  1.1     ross         return subFloat64Sigs( a, b, aSign );
   2787  1.1     ross     }
   2788  1.1     ross 
   2789  1.1     ross }
   2790  1.1     ross 
   2791  1.1     ross /*
   2792  1.1     ross -------------------------------------------------------------------------------
   2793  1.1     ross Returns the result of subtracting the double-precision floating-point values
   2794  1.1     ross `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2795  1.1     ross for Binary Floating-Point Arithmetic.
   2796  1.1     ross -------------------------------------------------------------------------------
   2797  1.1     ross */
   2798  1.1     ross float64 float64_sub( float64 a, float64 b )
   2799  1.1     ross {
   2800  1.1     ross     flag aSign, bSign;
   2801  1.1     ross 
   2802  1.1     ross     aSign = extractFloat64Sign( a );
   2803  1.1     ross     bSign = extractFloat64Sign( b );
   2804  1.1     ross     if ( aSign == bSign ) {
   2805  1.1     ross         return subFloat64Sigs( a, b, aSign );
   2806  1.1     ross     }
   2807  1.1     ross     else {
   2808  1.1     ross         return addFloat64Sigs( a, b, aSign );
   2809  1.1     ross     }
   2810  1.1     ross 
   2811  1.1     ross }
   2812  1.1     ross 
   2813  1.1     ross /*
   2814  1.1     ross -------------------------------------------------------------------------------
   2815  1.1     ross Returns the result of multiplying the double-precision floating-point values
   2816  1.1     ross `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2817  1.1     ross for Binary Floating-Point Arithmetic.
   2818  1.1     ross -------------------------------------------------------------------------------
   2819  1.1     ross */
   2820  1.1     ross float64 float64_mul( float64 a, float64 b )
   2821  1.1     ross {
   2822  1.1     ross     flag aSign, bSign, zSign;
   2823  1.1     ross     int16 aExp, bExp, zExp;
   2824  1.1     ross     bits64 aSig, bSig, zSig0, zSig1;
   2825  1.1     ross 
   2826  1.1     ross     aSig = extractFloat64Frac( a );
   2827  1.1     ross     aExp = extractFloat64Exp( a );
   2828  1.1     ross     aSign = extractFloat64Sign( a );
   2829  1.1     ross     bSig = extractFloat64Frac( b );
   2830  1.1     ross     bExp = extractFloat64Exp( b );
   2831  1.1     ross     bSign = extractFloat64Sign( b );
   2832  1.1     ross     zSign = aSign ^ bSign;
   2833  1.1     ross     if ( aExp == 0x7FF ) {
   2834  1.1     ross         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2835  1.1     ross             return propagateFloat64NaN( a, b );
   2836  1.1     ross         }
   2837  1.1     ross         if ( ( bExp | bSig ) == 0 ) {
   2838  1.1     ross             float_raise( float_flag_invalid );
   2839  1.1     ross             return float64_default_nan;
   2840  1.1     ross         }
   2841  1.1     ross         return packFloat64( zSign, 0x7FF, 0 );
   2842  1.1     ross     }
   2843  1.1     ross     if ( bExp == 0x7FF ) {
   2844  1.1     ross         if ( bSig ) return propagateFloat64NaN( a, b );
   2845  1.1     ross         if ( ( aExp | aSig ) == 0 ) {
   2846  1.1     ross             float_raise( float_flag_invalid );
   2847  1.1     ross             return float64_default_nan;
   2848  1.1     ross         }
   2849  1.1     ross         return packFloat64( zSign, 0x7FF, 0 );
   2850  1.1     ross     }
   2851  1.1     ross     if ( aExp == 0 ) {
   2852  1.1     ross         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2853  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2854  1.1     ross     }
   2855  1.1     ross     if ( bExp == 0 ) {
   2856  1.1     ross         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
   2857  1.1     ross         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2858  1.1     ross     }
   2859  1.1     ross     zExp = aExp + bExp - 0x3FF;
   2860  1.1     ross     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2861  1.1     ross     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2862  1.1     ross     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   2863  1.1     ross     zSig0 |= ( zSig1 != 0 );
   2864  1.1     ross     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
   2865  1.1     ross         zSig0 <<= 1;
   2866  1.1     ross         --zExp;
   2867  1.1     ross     }
   2868  1.1     ross     return roundAndPackFloat64( zSign, zExp, zSig0 );
   2869  1.1     ross 
   2870  1.1     ross }
   2871  1.1     ross 
   2872  1.1     ross /*
   2873  1.1     ross -------------------------------------------------------------------------------
   2874  1.1     ross Returns the result of dividing the double-precision floating-point value `a'
   2875  1.1     ross by the corresponding value `b'.  The operation is performed according to
   2876  1.1     ross the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2877  1.1     ross -------------------------------------------------------------------------------
   2878  1.1     ross */
   2879  1.1     ross float64 float64_div( float64 a, float64 b )
   2880  1.1     ross {
   2881  1.1     ross     flag aSign, bSign, zSign;
   2882  1.1     ross     int16 aExp, bExp, zExp;
   2883  1.1     ross     bits64 aSig, bSig, zSig;
   2884  1.1     ross     bits64 rem0, rem1;
   2885  1.1     ross     bits64 term0, term1;
   2886  1.1     ross 
   2887  1.1     ross     aSig = extractFloat64Frac( a );
   2888  1.1     ross     aExp = extractFloat64Exp( a );
   2889  1.1     ross     aSign = extractFloat64Sign( a );
   2890  1.1     ross     bSig = extractFloat64Frac( b );
   2891  1.1     ross     bExp = extractFloat64Exp( b );
   2892  1.1     ross     bSign = extractFloat64Sign( b );
   2893  1.1     ross     zSign = aSign ^ bSign;
   2894  1.1     ross     if ( aExp == 0x7FF ) {
   2895  1.1     ross         if ( aSig ) return propagateFloat64NaN( a, b );
   2896  1.1     ross         if ( bExp == 0x7FF ) {
   2897  1.1     ross             if ( bSig ) return propagateFloat64NaN( a, b );
   2898  1.1     ross             float_raise( float_flag_invalid );
   2899  1.1     ross             return float64_default_nan;
   2900  1.1     ross         }
   2901  1.1     ross         return packFloat64( zSign, 0x7FF, 0 );
   2902  1.1     ross     }
   2903  1.1     ross     if ( bExp == 0x7FF ) {
   2904  1.1     ross         if ( bSig ) return propagateFloat64NaN( a, b );
   2905  1.1     ross         return packFloat64( zSign, 0, 0 );
   2906  1.1     ross     }
   2907  1.1     ross     if ( bExp == 0 ) {
   2908  1.1     ross         if ( bSig == 0 ) {
   2909  1.1     ross             if ( ( aExp | aSig ) == 0 ) {
   2910  1.1     ross                 float_raise( float_flag_invalid );
   2911  1.1     ross                 return float64_default_nan;
   2912  1.1     ross             }
   2913  1.1     ross             float_raise( float_flag_divbyzero );
   2914  1.1     ross             return packFloat64( zSign, 0x7FF, 0 );
   2915  1.1     ross         }
   2916  1.1     ross         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2917  1.1     ross     }
   2918  1.1     ross     if ( aExp == 0 ) {
   2919  1.1     ross         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2920  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2921  1.1     ross     }
   2922  1.1     ross     zExp = aExp - bExp + 0x3FD;
   2923  1.1     ross     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2924  1.1     ross     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2925  1.1     ross     if ( bSig <= ( aSig + aSig ) ) {
   2926  1.1     ross         aSig >>= 1;
   2927  1.1     ross         ++zExp;
   2928  1.1     ross     }
   2929  1.1     ross     zSig = estimateDiv128To64( aSig, 0, bSig );
   2930  1.1     ross     if ( ( zSig & 0x1FF ) <= 2 ) {
   2931  1.1     ross         mul64To128( bSig, zSig, &term0, &term1 );
   2932  1.1     ross         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   2933  1.1     ross         while ( (sbits64) rem0 < 0 ) {
   2934  1.1     ross             --zSig;
   2935  1.1     ross             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   2936  1.1     ross         }
   2937  1.1     ross         zSig |= ( rem1 != 0 );
   2938  1.1     ross     }
   2939  1.1     ross     return roundAndPackFloat64( zSign, zExp, zSig );
   2940  1.1     ross 
   2941  1.1     ross }
   2942  1.1     ross 
   2943  1.1     ross #ifndef SOFTFLOAT_FOR_GCC
   2944  1.1     ross /*
   2945  1.1     ross -------------------------------------------------------------------------------
   2946  1.1     ross Returns the remainder of the double-precision floating-point value `a'
   2947  1.1     ross with respect to the corresponding value `b'.  The operation is performed
   2948  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2949  1.1     ross -------------------------------------------------------------------------------
   2950  1.1     ross */
   2951  1.1     ross float64 float64_rem( float64 a, float64 b )
   2952  1.1     ross {
   2953  1.1     ross     flag aSign, bSign, zSign;
   2954  1.1     ross     int16 aExp, bExp, expDiff;
   2955  1.1     ross     bits64 aSig, bSig;
   2956  1.1     ross     bits64 q, alternateASig;
   2957  1.1     ross     sbits64 sigMean;
   2958  1.1     ross 
   2959  1.1     ross     aSig = extractFloat64Frac( a );
   2960  1.1     ross     aExp = extractFloat64Exp( a );
   2961  1.1     ross     aSign = extractFloat64Sign( a );
   2962  1.1     ross     bSig = extractFloat64Frac( b );
   2963  1.1     ross     bExp = extractFloat64Exp( b );
   2964  1.1     ross     bSign = extractFloat64Sign( b );
   2965  1.1     ross     if ( aExp == 0x7FF ) {
   2966  1.1     ross         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2967  1.1     ross             return propagateFloat64NaN( a, b );
   2968  1.1     ross         }
   2969  1.1     ross         float_raise( float_flag_invalid );
   2970  1.1     ross         return float64_default_nan;
   2971  1.1     ross     }
   2972  1.1     ross     if ( bExp == 0x7FF ) {
   2973  1.1     ross         if ( bSig ) return propagateFloat64NaN( a, b );
   2974  1.1     ross         return a;
   2975  1.1     ross     }
   2976  1.1     ross     if ( bExp == 0 ) {
   2977  1.1     ross         if ( bSig == 0 ) {
   2978  1.1     ross             float_raise( float_flag_invalid );
   2979  1.1     ross             return float64_default_nan;
   2980  1.1     ross         }
   2981  1.1     ross         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2982  1.1     ross     }
   2983  1.1     ross     if ( aExp == 0 ) {
   2984  1.1     ross         if ( aSig == 0 ) return a;
   2985  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2986  1.1     ross     }
   2987  1.1     ross     expDiff = aExp - bExp;
   2988  1.1     ross     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
   2989  1.1     ross     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2990  1.1     ross     if ( expDiff < 0 ) {
   2991  1.1     ross         if ( expDiff < -1 ) return a;
   2992  1.1     ross         aSig >>= 1;
   2993  1.1     ross     }
   2994  1.1     ross     q = ( bSig <= aSig );
   2995  1.1     ross     if ( q ) aSig -= bSig;
   2996  1.1     ross     expDiff -= 64;
   2997  1.1     ross     while ( 0 < expDiff ) {
   2998  1.1     ross         q = estimateDiv128To64( aSig, 0, bSig );
   2999  1.1     ross         q = ( 2 < q ) ? q - 2 : 0;
   3000  1.1     ross         aSig = - ( ( bSig>>2 ) * q );
   3001  1.1     ross         expDiff -= 62;
   3002  1.1     ross     }
   3003  1.1     ross     expDiff += 64;
   3004  1.1     ross     if ( 0 < expDiff ) {
   3005  1.1     ross         q = estimateDiv128To64( aSig, 0, bSig );
   3006  1.1     ross         q = ( 2 < q ) ? q - 2 : 0;
   3007  1.1     ross         q >>= 64 - expDiff;
   3008  1.1     ross         bSig >>= 2;
   3009  1.1     ross         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   3010  1.1     ross     }
   3011  1.1     ross     else {
   3012  1.1     ross         aSig >>= 2;
   3013  1.1     ross         bSig >>= 2;
   3014  1.1     ross     }
   3015  1.1     ross     do {
   3016  1.1     ross         alternateASig = aSig;
   3017  1.1     ross         ++q;
   3018  1.1     ross         aSig -= bSig;
   3019  1.1     ross     } while ( 0 <= (sbits64) aSig );
   3020  1.1     ross     sigMean = aSig + alternateASig;
   3021  1.1     ross     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   3022  1.1     ross         aSig = alternateASig;
   3023  1.1     ross     }
   3024  1.1     ross     zSign = ( (sbits64) aSig < 0 );
   3025  1.1     ross     if ( zSign ) aSig = - aSig;
   3026  1.1     ross     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
   3027  1.1     ross 
   3028  1.1     ross }
   3029  1.1     ross 
   3030  1.1     ross /*
   3031  1.1     ross -------------------------------------------------------------------------------
   3032  1.1     ross Returns the square root of the double-precision floating-point value `a'.
   3033  1.1     ross The operation is performed according to the IEC/IEEE Standard for Binary
   3034  1.1     ross Floating-Point Arithmetic.
   3035  1.1     ross -------------------------------------------------------------------------------
   3036  1.1     ross */
   3037  1.1     ross float64 float64_sqrt( float64 a )
   3038  1.1     ross {
   3039  1.1     ross     flag aSign;
   3040  1.1     ross     int16 aExp, zExp;
   3041  1.1     ross     bits64 aSig, zSig, doubleZSig;
   3042  1.1     ross     bits64 rem0, rem1, term0, term1;
   3043  1.1     ross 
   3044  1.1     ross     aSig = extractFloat64Frac( a );
   3045  1.1     ross     aExp = extractFloat64Exp( a );
   3046  1.1     ross     aSign = extractFloat64Sign( a );
   3047  1.1     ross     if ( aExp == 0x7FF ) {
   3048  1.1     ross         if ( aSig ) return propagateFloat64NaN( a, a );
   3049  1.1     ross         if ( ! aSign ) return a;
   3050  1.1     ross         float_raise( float_flag_invalid );
   3051  1.1     ross         return float64_default_nan;
   3052  1.1     ross     }
   3053  1.1     ross     if ( aSign ) {
   3054  1.1     ross         if ( ( aExp | aSig ) == 0 ) return a;
   3055  1.1     ross         float_raise( float_flag_invalid );
   3056  1.1     ross         return float64_default_nan;
   3057  1.1     ross     }
   3058  1.1     ross     if ( aExp == 0 ) {
   3059  1.1     ross         if ( aSig == 0 ) return 0;
   3060  1.1     ross         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3061  1.1     ross     }
   3062  1.1     ross     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   3063  1.1     ross     aSig |= LIT64( 0x0010000000000000 );
   3064  1.1     ross     zSig = estimateSqrt32( aExp, aSig>>21 );
   3065  1.1     ross     aSig <<= 9 - ( aExp & 1 );
   3066  1.1     ross     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
   3067  1.1     ross     if ( ( zSig & 0x1FF ) <= 5 ) {
   3068  1.1     ross         doubleZSig = zSig<<1;
   3069  1.1     ross         mul64To128( zSig, zSig, &term0, &term1 );
   3070  1.1     ross         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   3071  1.1     ross         while ( (sbits64) rem0 < 0 ) {
   3072  1.1     ross             --zSig;
   3073  1.1     ross             doubleZSig -= 2;
   3074  1.1     ross             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
   3075  1.1     ross         }
   3076  1.1     ross         zSig |= ( ( rem0 | rem1 ) != 0 );
   3077  1.1     ross     }
   3078  1.1     ross     return roundAndPackFloat64( 0, zExp, zSig );
   3079  1.1     ross 
   3080  1.1     ross }
   3081  1.1     ross #endif
   3082  1.1     ross 
   3083  1.1     ross /*
   3084  1.1     ross -------------------------------------------------------------------------------
   3085  1.1     ross Returns 1 if the double-precision floating-point value `a' is equal to the
   3086  1.1     ross corresponding value `b', and 0 otherwise.  The comparison is performed
   3087  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3088  1.1     ross -------------------------------------------------------------------------------
   3089  1.1     ross */
   3090  1.1     ross flag float64_eq( float64 a, float64 b )
   3091  1.1     ross {
   3092  1.1     ross 
   3093  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3094  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3095  1.1     ross        ) {
   3096  1.1     ross         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3097  1.1     ross             float_raise( float_flag_invalid );
   3098  1.1     ross         }
   3099  1.1     ross         return 0;
   3100  1.1     ross     }
   3101  1.1     ross     return ( a == b ) ||
   3102  1.1     ross 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   3103  1.1     ross 
   3104  1.1     ross }
   3105  1.1     ross 
   3106  1.1     ross /*
   3107  1.1     ross -------------------------------------------------------------------------------
   3108  1.1     ross Returns 1 if the double-precision floating-point value `a' is less than or
   3109  1.1     ross equal to the corresponding value `b', and 0 otherwise.  The comparison is
   3110  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   3111  1.1     ross Arithmetic.
   3112  1.1     ross -------------------------------------------------------------------------------
   3113  1.1     ross */
   3114  1.1     ross flag float64_le( float64 a, float64 b )
   3115  1.1     ross {
   3116  1.1     ross     flag aSign, bSign;
   3117  1.1     ross 
   3118  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3119  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3120  1.1     ross        ) {
   3121  1.1     ross         float_raise( float_flag_invalid );
   3122  1.1     ross         return 0;
   3123  1.1     ross     }
   3124  1.1     ross     aSign = extractFloat64Sign( a );
   3125  1.1     ross     bSign = extractFloat64Sign( b );
   3126  1.1     ross     if ( aSign != bSign )
   3127  1.1     ross 	return aSign ||
   3128  1.1     ross 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   3129  1.1     ross 	      0 );
   3130  1.1     ross     return ( a == b ) ||
   3131  1.1     ross 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3132  1.1     ross 
   3133  1.1     ross }
   3134  1.1     ross 
   3135  1.1     ross /*
   3136  1.1     ross -------------------------------------------------------------------------------
   3137  1.1     ross Returns 1 if the double-precision floating-point value `a' is less than
   3138  1.1     ross the corresponding value `b', and 0 otherwise.  The comparison is performed
   3139  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3140  1.1     ross -------------------------------------------------------------------------------
   3141  1.1     ross */
   3142  1.1     ross flag float64_lt( float64 a, float64 b )
   3143  1.1     ross {
   3144  1.1     ross     flag aSign, bSign;
   3145  1.1     ross 
   3146  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3147  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3148  1.1     ross        ) {
   3149  1.1     ross         float_raise( float_flag_invalid );
   3150  1.1     ross         return 0;
   3151  1.1     ross     }
   3152  1.1     ross     aSign = extractFloat64Sign( a );
   3153  1.1     ross     bSign = extractFloat64Sign( b );
   3154  1.1     ross     if ( aSign != bSign )
   3155  1.1     ross 	return aSign &&
   3156  1.1     ross 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   3157  1.1     ross 	      0 );
   3158  1.1     ross     return ( a != b ) &&
   3159  1.1     ross 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3160  1.1     ross 
   3161  1.1     ross }
   3162  1.1     ross 
   3163  1.1     ross #ifndef SOFTFLOAT_FOR_GCC
   3164  1.1     ross /*
   3165  1.1     ross -------------------------------------------------------------------------------
   3166  1.1     ross Returns 1 if the double-precision floating-point value `a' is equal to the
   3167  1.1     ross corresponding value `b', and 0 otherwise.  The invalid exception is raised
   3168  1.1     ross if either operand is a NaN.  Otherwise, the comparison is performed
   3169  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3170  1.1     ross -------------------------------------------------------------------------------
   3171  1.1     ross */
   3172  1.1     ross flag float64_eq_signaling( float64 a, float64 b )
   3173  1.1     ross {
   3174  1.1     ross 
   3175  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3176  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3177  1.1     ross        ) {
   3178  1.1     ross         float_raise( float_flag_invalid );
   3179  1.1     ross         return 0;
   3180  1.1     ross     }
   3181  1.1     ross     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3182  1.1     ross 
   3183  1.1     ross }
   3184  1.1     ross 
   3185  1.1     ross /*
   3186  1.1     ross -------------------------------------------------------------------------------
   3187  1.1     ross Returns 1 if the double-precision floating-point value `a' is less than or
   3188  1.1     ross equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   3189  1.1     ross cause an exception.  Otherwise, the comparison is performed according to the
   3190  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3191  1.1     ross -------------------------------------------------------------------------------
   3192  1.1     ross */
   3193  1.1     ross flag float64_le_quiet( float64 a, float64 b )
   3194  1.1     ross {
   3195  1.1     ross     flag aSign, bSign;
   3196  1.1     ross 
   3197  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3198  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3199  1.1     ross        ) {
   3200  1.1     ross         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3201  1.1     ross             float_raise( float_flag_invalid );
   3202  1.1     ross         }
   3203  1.1     ross         return 0;
   3204  1.1     ross     }
   3205  1.1     ross     aSign = extractFloat64Sign( a );
   3206  1.1     ross     bSign = extractFloat64Sign( b );
   3207  1.1     ross     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3208  1.1     ross     return ( a == b ) || ( aSign ^ ( a < b ) );
   3209  1.1     ross 
   3210  1.1     ross }
   3211  1.1     ross 
   3212  1.1     ross /*
   3213  1.1     ross -------------------------------------------------------------------------------
   3214  1.1     ross Returns 1 if the double-precision floating-point value `a' is less than
   3215  1.1     ross the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   3216  1.1     ross exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   3217  1.1     ross Standard for Binary Floating-Point Arithmetic.
   3218  1.1     ross -------------------------------------------------------------------------------
   3219  1.1     ross */
   3220  1.1     ross flag float64_lt_quiet( float64 a, float64 b )
   3221  1.1     ross {
   3222  1.1     ross     flag aSign, bSign;
   3223  1.1     ross 
   3224  1.1     ross     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3225  1.1     ross          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3226  1.1     ross        ) {
   3227  1.1     ross         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3228  1.1     ross             float_raise( float_flag_invalid );
   3229  1.1     ross         }
   3230  1.1     ross         return 0;
   3231  1.1     ross     }
   3232  1.1     ross     aSign = extractFloat64Sign( a );
   3233  1.1     ross     bSign = extractFloat64Sign( b );
   3234  1.1     ross     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   3235  1.1     ross     return ( a != b ) && ( aSign ^ ( a < b ) );
   3236  1.1     ross 
   3237  1.1     ross }
   3238  1.1     ross #endif
   3239  1.1     ross 
   3240  1.1     ross #ifdef FLOATX80
   3241  1.1     ross 
   3242  1.1     ross /*
   3243  1.1     ross -------------------------------------------------------------------------------
   3244  1.1     ross Returns the result of converting the extended double-precision floating-
   3245  1.1     ross point value `a' to the 32-bit two's complement integer format.  The
   3246  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3247  1.1     ross Floating-Point Arithmetic---which means in particular that the conversion
   3248  1.1     ross is rounded according to the current rounding mode.  If `a' is a NaN, the
   3249  1.1     ross largest positive integer is returned.  Otherwise, if the conversion
   3250  1.1     ross overflows, the largest integer with the same sign as `a' is returned.
   3251  1.1     ross -------------------------------------------------------------------------------
   3252  1.1     ross */
   3253  1.1     ross int32 floatx80_to_int32( floatx80 a )
   3254  1.1     ross {
   3255  1.1     ross     flag aSign;
   3256  1.1     ross     int32 aExp, shiftCount;
   3257  1.1     ross     bits64 aSig;
   3258  1.1     ross 
   3259  1.1     ross     aSig = extractFloatx80Frac( a );
   3260  1.1     ross     aExp = extractFloatx80Exp( a );
   3261  1.1     ross     aSign = extractFloatx80Sign( a );
   3262  1.1     ross     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3263  1.1     ross     shiftCount = 0x4037 - aExp;
   3264  1.1     ross     if ( shiftCount <= 0 ) shiftCount = 1;
   3265  1.1     ross     shift64RightJamming( aSig, shiftCount, &aSig );
   3266  1.1     ross     return roundAndPackInt32( aSign, aSig );
   3267  1.1     ross 
   3268  1.1     ross }
   3269  1.1     ross 
   3270  1.1     ross /*
   3271  1.1     ross -------------------------------------------------------------------------------
   3272  1.1     ross Returns the result of converting the extended double-precision floating-
   3273  1.1     ross point value `a' to the 32-bit two's complement integer format.  The
   3274  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3275  1.1     ross Floating-Point Arithmetic, except that the conversion is always rounded
   3276  1.1     ross toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3277  1.1     ross Otherwise, if the conversion overflows, the largest integer with the same
   3278  1.1     ross sign as `a' is returned.
   3279  1.1     ross -------------------------------------------------------------------------------
   3280  1.1     ross */
   3281  1.1     ross int32 floatx80_to_int32_round_to_zero( floatx80 a )
   3282  1.1     ross {
   3283  1.1     ross     flag aSign;
   3284  1.1     ross     int32 aExp, shiftCount;
   3285  1.1     ross     bits64 aSig, savedASig;
   3286  1.1     ross     int32 z;
   3287  1.1     ross 
   3288  1.1     ross     aSig = extractFloatx80Frac( a );
   3289  1.1     ross     aExp = extractFloatx80Exp( a );
   3290  1.1     ross     aSign = extractFloatx80Sign( a );
   3291  1.1     ross     if ( 0x401E < aExp ) {
   3292  1.1     ross         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3293  1.1     ross         goto invalid;
   3294  1.1     ross     }
   3295  1.1     ross     else if ( aExp < 0x3FFF ) {
   3296  1.1     ross         if ( aExp || aSig ) float_set_inexact();
   3297  1.1     ross         return 0;
   3298  1.1     ross     }
   3299  1.1     ross     shiftCount = 0x403E - aExp;
   3300  1.1     ross     savedASig = aSig;
   3301  1.1     ross     aSig >>= shiftCount;
   3302  1.1     ross     z = aSig;
   3303  1.1     ross     if ( aSign ) z = - z;
   3304  1.1     ross     if ( ( z < 0 ) ^ aSign ) {
   3305  1.1     ross  invalid:
   3306  1.1     ross         float_raise( float_flag_invalid );
   3307  1.1     ross         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   3308  1.1     ross     }
   3309  1.1     ross     if ( ( aSig<<shiftCount ) != savedASig ) {
   3310  1.1     ross         float_set_inexact();
   3311  1.1     ross     }
   3312  1.1     ross     return z;
   3313  1.1     ross 
   3314  1.1     ross }
   3315  1.1     ross 
   3316  1.1     ross /*
   3317  1.1     ross -------------------------------------------------------------------------------
   3318  1.1     ross Returns the result of converting the extended double-precision floating-
   3319  1.1     ross point value `a' to the 64-bit two's complement integer format.  The
   3320  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3321  1.1     ross Floating-Point Arithmetic---which means in particular that the conversion
   3322  1.1     ross is rounded according to the current rounding mode.  If `a' is a NaN,
   3323  1.1     ross the largest positive integer is returned.  Otherwise, if the conversion
   3324  1.1     ross overflows, the largest integer with the same sign as `a' is returned.
   3325  1.1     ross -------------------------------------------------------------------------------
   3326  1.1     ross */
   3327  1.1     ross int64 floatx80_to_int64( floatx80 a )
   3328  1.1     ross {
   3329  1.1     ross     flag aSign;
   3330  1.1     ross     int32 aExp, shiftCount;
   3331  1.1     ross     bits64 aSig, aSigExtra;
   3332  1.1     ross 
   3333  1.1     ross     aSig = extractFloatx80Frac( a );
   3334  1.1     ross     aExp = extractFloatx80Exp( a );
   3335  1.1     ross     aSign = extractFloatx80Sign( a );
   3336  1.1     ross     shiftCount = 0x403E - aExp;
   3337  1.1     ross     if ( shiftCount <= 0 ) {
   3338  1.1     ross         if ( shiftCount ) {
   3339  1.1     ross             float_raise( float_flag_invalid );
   3340  1.1     ross             if (    ! aSign
   3341  1.1     ross                  || (    ( aExp == 0x7FFF )
   3342  1.1     ross                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
   3343  1.1     ross                ) {
   3344  1.1     ross                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3345  1.1     ross             }
   3346  1.1     ross             return (sbits64) LIT64( 0x8000000000000000 );
   3347  1.1     ross         }
   3348  1.1     ross         aSigExtra = 0;
   3349  1.1     ross     }
   3350  1.1     ross     else {
   3351  1.1     ross         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   3352  1.1     ross     }
   3353  1.1     ross     return roundAndPackInt64( aSign, aSig, aSigExtra );
   3354  1.1     ross 
   3355  1.1     ross }
   3356  1.1     ross 
   3357  1.1     ross /*
   3358  1.1     ross -------------------------------------------------------------------------------
   3359  1.1     ross Returns the result of converting the extended double-precision floating-
   3360  1.1     ross point value `a' to the 64-bit two's complement integer format.  The
   3361  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3362  1.1     ross Floating-Point Arithmetic, except that the conversion is always rounded
   3363  1.1     ross toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3364  1.1     ross Otherwise, if the conversion overflows, the largest integer with the same
   3365  1.1     ross sign as `a' is returned.
   3366  1.1     ross -------------------------------------------------------------------------------
   3367  1.1     ross */
   3368  1.1     ross int64 floatx80_to_int64_round_to_zero( floatx80 a )
   3369  1.1     ross {
   3370  1.1     ross     flag aSign;
   3371  1.1     ross     int32 aExp, shiftCount;
   3372  1.1     ross     bits64 aSig;
   3373  1.1     ross     int64 z;
   3374  1.1     ross 
   3375  1.1     ross     aSig = extractFloatx80Frac( a );
   3376  1.1     ross     aExp = extractFloatx80Exp( a );
   3377  1.1     ross     aSign = extractFloatx80Sign( a );
   3378  1.1     ross     shiftCount = aExp - 0x403E;
   3379  1.1     ross     if ( 0 <= shiftCount ) {
   3380  1.1     ross         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
   3381  1.1     ross         if ( ( a.high != 0xC03E ) || aSig ) {
   3382  1.1     ross             float_raise( float_flag_invalid );
   3383  1.1     ross             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
   3384  1.1     ross                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3385  1.1     ross             }
   3386  1.1     ross         }
   3387  1.1     ross         return (sbits64) LIT64( 0x8000000000000000 );
   3388  1.1     ross     }
   3389  1.1     ross     else if ( aExp < 0x3FFF ) {
   3390  1.1     ross         if ( aExp | aSig ) float_set_inexact();
   3391  1.1     ross         return 0;
   3392  1.1     ross     }
   3393  1.1     ross     z = aSig>>( - shiftCount );
   3394  1.1     ross     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   3395  1.1     ross         float_set_inexact();
   3396  1.1     ross     }
   3397  1.1     ross     if ( aSign ) z = - z;
   3398  1.1     ross     return z;
   3399  1.1     ross 
   3400  1.1     ross }
   3401  1.1     ross 
   3402  1.1     ross /*
   3403  1.1     ross -------------------------------------------------------------------------------
   3404  1.1     ross Returns the result of converting the extended double-precision floating-
   3405  1.1     ross point value `a' to the single-precision floating-point format.  The
   3406  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3407  1.1     ross Floating-Point Arithmetic.
   3408  1.1     ross -------------------------------------------------------------------------------
   3409  1.1     ross */
   3410  1.1     ross float32 floatx80_to_float32( floatx80 a )
   3411  1.1     ross {
   3412  1.1     ross     flag aSign;
   3413  1.1     ross     int32 aExp;
   3414  1.1     ross     bits64 aSig;
   3415  1.1     ross 
   3416  1.1     ross     aSig = extractFloatx80Frac( a );
   3417  1.1     ross     aExp = extractFloatx80Exp( a );
   3418  1.1     ross     aSign = extractFloatx80Sign( a );
   3419  1.1     ross     if ( aExp == 0x7FFF ) {
   3420  1.1     ross         if ( (bits64) ( aSig<<1 ) ) {
   3421  1.1     ross             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
   3422  1.1     ross         }
   3423  1.1     ross         return packFloat32( aSign, 0xFF, 0 );
   3424  1.1     ross     }
   3425  1.1     ross     shift64RightJamming( aSig, 33, &aSig );
   3426  1.1     ross     if ( aExp || aSig ) aExp -= 0x3F81;
   3427  1.1     ross     return roundAndPackFloat32( aSign, aExp, aSig );
   3428  1.1     ross 
   3429  1.1     ross }
   3430  1.1     ross 
   3431  1.1     ross /*
   3432  1.1     ross -------------------------------------------------------------------------------
   3433  1.1     ross Returns the result of converting the extended double-precision floating-
   3434  1.1     ross point value `a' to the double-precision floating-point format.  The
   3435  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3436  1.1     ross Floating-Point Arithmetic.
   3437  1.1     ross -------------------------------------------------------------------------------
   3438  1.1     ross */
   3439  1.1     ross float64 floatx80_to_float64( floatx80 a )
   3440  1.1     ross {
   3441  1.1     ross     flag aSign;
   3442  1.1     ross     int32 aExp;
   3443  1.1     ross     bits64 aSig, zSig;
   3444  1.1     ross 
   3445  1.1     ross     aSig = extractFloatx80Frac( a );
   3446  1.1     ross     aExp = extractFloatx80Exp( a );
   3447  1.1     ross     aSign = extractFloatx80Sign( a );
   3448  1.1     ross     if ( aExp == 0x7FFF ) {
   3449  1.1     ross         if ( (bits64) ( aSig<<1 ) ) {
   3450  1.1     ross             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
   3451  1.1     ross         }
   3452  1.1     ross         return packFloat64( aSign, 0x7FF, 0 );
   3453  1.1     ross     }
   3454  1.1     ross     shift64RightJamming( aSig, 1, &zSig );
   3455  1.1     ross     if ( aExp || aSig ) aExp -= 0x3C01;
   3456  1.1     ross     return roundAndPackFloat64( aSign, aExp, zSig );
   3457  1.1     ross 
   3458  1.1     ross }
   3459  1.1     ross 
   3460  1.1     ross #ifdef FLOAT128
   3461  1.1     ross 
   3462  1.1     ross /*
   3463  1.1     ross -------------------------------------------------------------------------------
   3464  1.1     ross Returns the result of converting the extended double-precision floating-
   3465  1.1     ross point value `a' to the quadruple-precision floating-point format.  The
   3466  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   3467  1.1     ross Floating-Point Arithmetic.
   3468  1.1     ross -------------------------------------------------------------------------------
   3469  1.1     ross */
   3470  1.1     ross float128 floatx80_to_float128( floatx80 a )
   3471  1.1     ross {
   3472  1.1     ross     flag aSign;
   3473  1.1     ross     int16 aExp;
   3474  1.1     ross     bits64 aSig, zSig0, zSig1;
   3475  1.1     ross 
   3476  1.1     ross     aSig = extractFloatx80Frac( a );
   3477  1.1     ross     aExp = extractFloatx80Exp( a );
   3478  1.1     ross     aSign = extractFloatx80Sign( a );
   3479  1.1     ross     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
   3480  1.1     ross         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
   3481  1.1     ross     }
   3482  1.1     ross     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
   3483  1.1     ross     return packFloat128( aSign, aExp, zSig0, zSig1 );
   3484  1.1     ross 
   3485  1.1     ross }
   3486  1.1     ross 
   3487  1.1     ross #endif
   3488  1.1     ross 
   3489  1.1     ross /*
   3490  1.1     ross -------------------------------------------------------------------------------
   3491  1.1     ross Rounds the extended double-precision floating-point value `a' to an integer,
   3492  1.1     ross and returns the result as an extended quadruple-precision floating-point
   3493  1.1     ross value.  The operation is performed according to the IEC/IEEE Standard for
   3494  1.1     ross Binary Floating-Point Arithmetic.
   3495  1.1     ross -------------------------------------------------------------------------------
   3496  1.1     ross */
   3497  1.1     ross floatx80 floatx80_round_to_int( floatx80 a )
   3498  1.1     ross {
   3499  1.1     ross     flag aSign;
   3500  1.1     ross     int32 aExp;
   3501  1.1     ross     bits64 lastBitMask, roundBitsMask;
   3502  1.1     ross     int8 roundingMode;
   3503  1.1     ross     floatx80 z;
   3504  1.1     ross 
   3505  1.1     ross     aExp = extractFloatx80Exp( a );
   3506  1.1     ross     if ( 0x403E <= aExp ) {
   3507  1.1     ross         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
   3508  1.1     ross             return propagateFloatx80NaN( a, a );
   3509  1.1     ross         }
   3510  1.1     ross         return a;
   3511  1.1     ross     }
   3512  1.1     ross     if ( aExp < 0x3FFF ) {
   3513  1.1     ross         if (    ( aExp == 0 )
   3514  1.1     ross              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
   3515  1.1     ross             return a;
   3516  1.1     ross         }
   3517  1.1     ross         float_set_inexact();
   3518  1.1     ross         aSign = extractFloatx80Sign( a );
   3519  1.1     ross         switch ( float_rounding_mode() ) {
   3520  1.1     ross          case float_round_nearest_even:
   3521  1.1     ross             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
   3522  1.1     ross                ) {
   3523  1.1     ross                 return
   3524  1.1     ross                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3525  1.1     ross             }
   3526  1.1     ross             break;
   3527  1.1     ross          case float_round_down:
   3528  1.1     ross             return
   3529  1.1     ross                   aSign ?
   3530  1.1     ross                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
   3531  1.1     ross                 : packFloatx80( 0, 0, 0 );
   3532  1.1     ross          case float_round_up:
   3533  1.1     ross             return
   3534  1.1     ross                   aSign ? packFloatx80( 1, 0, 0 )
   3535  1.1     ross                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3536  1.1     ross         }
   3537  1.1     ross         return packFloatx80( aSign, 0, 0 );
   3538  1.1     ross     }
   3539  1.1     ross     lastBitMask = 1;
   3540  1.1     ross     lastBitMask <<= 0x403E - aExp;
   3541  1.1     ross     roundBitsMask = lastBitMask - 1;
   3542  1.1     ross     z = a;
   3543  1.1     ross     roundingMode = float_rounding_mode();
   3544  1.1     ross     if ( roundingMode == float_round_nearest_even ) {
   3545  1.1     ross         z.low += lastBitMask>>1;
   3546  1.1     ross         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   3547  1.1     ross     }
   3548  1.1     ross     else if ( roundingMode != float_round_to_zero ) {
   3549  1.1     ross         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   3550  1.1     ross             z.low += roundBitsMask;
   3551  1.1     ross         }
   3552  1.1     ross     }
   3553  1.1     ross     z.low &= ~ roundBitsMask;
   3554  1.1     ross     if ( z.low == 0 ) {
   3555  1.1     ross         ++z.high;
   3556  1.1     ross         z.low = LIT64( 0x8000000000000000 );
   3557  1.1     ross     }
   3558  1.1     ross     if ( z.low != a.low ) float_set_inexact();
   3559  1.1     ross     return z;
   3560  1.1     ross 
   3561  1.1     ross }
   3562  1.1     ross 
   3563  1.1     ross /*
   3564  1.1     ross -------------------------------------------------------------------------------
   3565  1.1     ross Returns the result of adding the absolute values of the extended double-
   3566  1.1     ross precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
   3567  1.1     ross negated before being returned.  `zSign' is ignored if the result is a NaN.
   3568  1.1     ross The addition is performed according to the IEC/IEEE Standard for Binary
   3569  1.1     ross Floating-Point Arithmetic.
   3570  1.1     ross -------------------------------------------------------------------------------
   3571  1.1     ross */
   3572  1.1     ross static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3573  1.1     ross {
   3574  1.1     ross     int32 aExp, bExp, zExp;
   3575  1.1     ross     bits64 aSig, bSig, zSig0, zSig1;
   3576  1.1     ross     int32 expDiff;
   3577  1.1     ross 
   3578  1.1     ross     aSig = extractFloatx80Frac( a );
   3579  1.1     ross     aExp = extractFloatx80Exp( a );
   3580  1.1     ross     bSig = extractFloatx80Frac( b );
   3581  1.1     ross     bExp = extractFloatx80Exp( b );
   3582  1.1     ross     expDiff = aExp - bExp;
   3583  1.1     ross     if ( 0 < expDiff ) {
   3584  1.1     ross         if ( aExp == 0x7FFF ) {
   3585  1.1     ross             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3586  1.1     ross             return a;
   3587  1.1     ross         }
   3588  1.1     ross         if ( bExp == 0 ) --expDiff;
   3589  1.1     ross         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3590  1.1     ross         zExp = aExp;
   3591  1.1     ross     }
   3592  1.1     ross     else if ( expDiff < 0 ) {
   3593  1.1     ross         if ( bExp == 0x7FFF ) {
   3594  1.1     ross             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3595  1.1     ross             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3596  1.1     ross         }
   3597  1.1     ross         if ( aExp == 0 ) ++expDiff;
   3598  1.1     ross         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3599  1.1     ross         zExp = bExp;
   3600  1.1     ross     }
   3601  1.1     ross     else {
   3602  1.1     ross         if ( aExp == 0x7FFF ) {
   3603  1.1     ross             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3604  1.1     ross                 return propagateFloatx80NaN( a, b );
   3605  1.1     ross             }
   3606  1.1     ross             return a;
   3607  1.1     ross         }
   3608  1.1     ross         zSig1 = 0;
   3609  1.1     ross         zSig0 = aSig + bSig;
   3610  1.1     ross         if ( aExp == 0 ) {
   3611  1.1     ross             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
   3612  1.1     ross             goto roundAndPack;
   3613  1.1     ross         }
   3614  1.1     ross         zExp = aExp;
   3615  1.1     ross         goto shiftRight1;
   3616  1.1     ross     }
   3617  1.1     ross     zSig0 = aSig + bSig;
   3618  1.1     ross     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
   3619  1.1     ross  shiftRight1:
   3620  1.1     ross     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3621  1.1     ross     zSig0 |= LIT64( 0x8000000000000000 );
   3622  1.1     ross     ++zExp;
   3623  1.1     ross  roundAndPack:
   3624  1.1     ross     return
   3625  1.1     ross         roundAndPackFloatx80(
   3626  1.1     ross             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3627  1.1     ross 
   3628  1.1     ross }
   3629  1.1     ross 
   3630  1.1     ross /*
   3631  1.1     ross -------------------------------------------------------------------------------
   3632  1.1     ross Returns the result of subtracting the absolute values of the extended
   3633  1.1     ross double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
   3634  1.1     ross difference is negated before being returned.  `zSign' is ignored if the
   3635  1.1     ross result is a NaN.  The subtraction is performed according to the IEC/IEEE
   3636  1.1     ross Standard for Binary Floating-Point Arithmetic.
   3637  1.1     ross -------------------------------------------------------------------------------
   3638  1.1     ross */
   3639  1.1     ross static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3640  1.1     ross {
   3641  1.1     ross     int32 aExp, bExp, zExp;
   3642  1.1     ross     bits64 aSig, bSig, zSig0, zSig1;
   3643  1.1     ross     int32 expDiff;
   3644  1.1     ross     floatx80 z;
   3645  1.1     ross 
   3646  1.1     ross     aSig = extractFloatx80Frac( a );
   3647  1.1     ross     aExp = extractFloatx80Exp( a );
   3648  1.1     ross     bSig = extractFloatx80Frac( b );
   3649  1.1     ross     bExp = extractFloatx80Exp( b );
   3650  1.1     ross     expDiff = aExp - bExp;
   3651  1.1     ross     if ( 0 < expDiff ) goto aExpBigger;
   3652  1.1     ross     if ( expDiff < 0 ) goto bExpBigger;
   3653  1.1     ross     if ( aExp == 0x7FFF ) {
   3654  1.1     ross         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3655  1.1     ross             return propagateFloatx80NaN( a, b );
   3656  1.1     ross         }
   3657  1.1     ross         float_raise( float_flag_invalid );
   3658  1.1     ross         z.low = floatx80_default_nan_low;
   3659  1.1     ross         z.high = floatx80_default_nan_high;
   3660  1.1     ross         return z;
   3661  1.1     ross     }
   3662  1.1     ross     if ( aExp == 0 ) {
   3663  1.1     ross         aExp = 1;
   3664  1.1     ross         bExp = 1;
   3665  1.1     ross     }
   3666  1.1     ross     zSig1 = 0;
   3667  1.1     ross     if ( bSig < aSig ) goto aBigger;
   3668  1.1     ross     if ( aSig < bSig ) goto bBigger;
   3669  1.1     ross     return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
   3670  1.1     ross  bExpBigger:
   3671  1.1     ross     if ( bExp == 0x7FFF ) {
   3672  1.1     ross         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3673  1.1     ross         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3674  1.1     ross     }
   3675  1.1     ross     if ( aExp == 0 ) ++expDiff;
   3676  1.1     ross     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3677  1.1     ross  bBigger:
   3678  1.1     ross     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
   3679  1.1     ross     zExp = bExp;
   3680  1.1     ross     zSign ^= 1;
   3681  1.1     ross     goto normalizeRoundAndPack;
   3682  1.1     ross  aExpBigger:
   3683  1.1     ross     if ( aExp == 0x7FFF ) {
   3684  1.1     ross         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3685  1.1     ross         return a;
   3686  1.1     ross     }
   3687  1.1     ross     if ( bExp == 0 ) --expDiff;
   3688  1.1     ross     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3689  1.1     ross  aBigger:
   3690  1.1     ross     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
   3691  1.1     ross     zExp = aExp;
   3692  1.1     ross  normalizeRoundAndPack:
   3693  1.1     ross     return
   3694  1.1     ross         normalizeRoundAndPackFloatx80(
   3695  1.1     ross             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3696  1.1     ross 
   3697  1.1     ross }
   3698  1.1     ross 
   3699  1.1     ross /*
   3700  1.1     ross -------------------------------------------------------------------------------
   3701  1.1     ross Returns the result of adding the extended double-precision floating-point
   3702  1.1     ross values `a' and `b'.  The operation is performed according to the IEC/IEEE
   3703  1.1     ross Standard for Binary Floating-Point Arithmetic.
   3704  1.1     ross -------------------------------------------------------------------------------
   3705  1.1     ross */
   3706  1.1     ross floatx80 floatx80_add( floatx80 a, floatx80 b )
   3707  1.1     ross {
   3708  1.1     ross     flag aSign, bSign;
   3709  1.1     ross 
   3710  1.1     ross     aSign = extractFloatx80Sign( a );
   3711  1.1     ross     bSign = extractFloatx80Sign( b );
   3712  1.1     ross     if ( aSign == bSign ) {
   3713  1.1     ross         return addFloatx80Sigs( a, b, aSign );
   3714  1.1     ross     }
   3715  1.1     ross     else {
   3716  1.1     ross         return subFloatx80Sigs( a, b, aSign );
   3717  1.1     ross     }
   3718  1.1     ross 
   3719  1.1     ross }
   3720  1.1     ross 
   3721  1.1     ross /*
   3722  1.1     ross -------------------------------------------------------------------------------
   3723  1.1     ross Returns the result of subtracting the extended double-precision floating-
   3724  1.1     ross point values `a' and `b'.  The operation is performed according to the
   3725  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3726  1.1     ross -------------------------------------------------------------------------------
   3727  1.1     ross */
   3728  1.1     ross floatx80 floatx80_sub( floatx80 a, floatx80 b )
   3729  1.1     ross {
   3730  1.1     ross     flag aSign, bSign;
   3731  1.1     ross 
   3732  1.1     ross     aSign = extractFloatx80Sign( a );
   3733  1.1     ross     bSign = extractFloatx80Sign( b );
   3734  1.1     ross     if ( aSign == bSign ) {
   3735  1.1     ross         return subFloatx80Sigs( a, b, aSign );
   3736  1.1     ross     }
   3737  1.1     ross     else {
   3738  1.1     ross         return addFloatx80Sigs( a, b, aSign );
   3739  1.1     ross     }
   3740  1.1     ross 
   3741  1.1     ross }
   3742  1.1     ross 
   3743  1.1     ross /*
   3744  1.1     ross -------------------------------------------------------------------------------
   3745  1.1     ross Returns the result of multiplying the extended double-precision floating-
   3746  1.1     ross point values `a' and `b'.  The operation is performed according to the
   3747  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3748  1.1     ross -------------------------------------------------------------------------------
   3749  1.1     ross */
   3750  1.1     ross floatx80 floatx80_mul( floatx80 a, floatx80 b )
   3751  1.1     ross {
   3752  1.1     ross     flag aSign, bSign, zSign;
   3753  1.1     ross     int32 aExp, bExp, zExp;
   3754  1.1     ross     bits64 aSig, bSig, zSig0, zSig1;
   3755  1.1     ross     floatx80 z;
   3756  1.1     ross 
   3757  1.1     ross     aSig = extractFloatx80Frac( a );
   3758  1.1     ross     aExp = extractFloatx80Exp( a );
   3759  1.1     ross     aSign = extractFloatx80Sign( a );
   3760  1.1     ross     bSig = extractFloatx80Frac( b );
   3761  1.1     ross     bExp = extractFloatx80Exp( b );
   3762  1.1     ross     bSign = extractFloatx80Sign( b );
   3763  1.1     ross     zSign = aSign ^ bSign;
   3764  1.1     ross     if ( aExp == 0x7FFF ) {
   3765  1.1     ross         if (    (bits64) ( aSig<<1 )
   3766  1.1     ross              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3767  1.1     ross             return propagateFloatx80NaN( a, b );
   3768  1.1     ross         }
   3769  1.1     ross         if ( ( bExp | bSig ) == 0 ) goto invalid;
   3770  1.1     ross         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3771  1.1     ross     }
   3772  1.1     ross     if ( bExp == 0x7FFF ) {
   3773  1.1     ross         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3774  1.1     ross         if ( ( aExp | aSig ) == 0 ) {
   3775  1.1     ross  invalid:
   3776  1.1     ross             float_raise( float_flag_invalid );
   3777  1.1     ross             z.low = floatx80_default_nan_low;
   3778  1.1     ross             z.high = floatx80_default_nan_high;
   3779  1.1     ross             return z;
   3780  1.1     ross         }
   3781  1.1     ross         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3782  1.1     ross     }
   3783  1.1     ross     if ( aExp == 0 ) {
   3784  1.1     ross         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3785  1.1     ross         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3786  1.1     ross     }
   3787  1.1     ross     if ( bExp == 0 ) {
   3788  1.1     ross         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3789  1.1     ross         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3790  1.1     ross     }
   3791  1.1     ross     zExp = aExp + bExp - 0x3FFE;
   3792  1.1     ross     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   3793  1.1     ross     if ( 0 < (sbits64) zSig0 ) {
   3794  1.1     ross         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3795  1.1     ross         --zExp;
   3796  1.1     ross     }
   3797  1.1     ross     return
   3798  1.1     ross         roundAndPackFloatx80(
   3799  1.1     ross             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3800  1.1     ross 
   3801  1.1     ross }
   3802  1.1     ross 
   3803  1.1     ross /*
   3804  1.1     ross -------------------------------------------------------------------------------
   3805  1.1     ross Returns the result of dividing the extended double-precision floating-point
   3806  1.1     ross value `a' by the corresponding value `b'.  The operation is performed
   3807  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3808  1.1     ross -------------------------------------------------------------------------------
   3809  1.1     ross */
   3810  1.1     ross floatx80 floatx80_div( floatx80 a, floatx80 b )
   3811  1.1     ross {
   3812  1.1     ross     flag aSign, bSign, zSign;
   3813  1.1     ross     int32 aExp, bExp, zExp;
   3814  1.1     ross     bits64 aSig, bSig, zSig0, zSig1;
   3815  1.1     ross     bits64 rem0, rem1, rem2, term0, term1, term2;
   3816  1.1     ross     floatx80 z;
   3817  1.1     ross 
   3818  1.1     ross     aSig = extractFloatx80Frac( a );
   3819  1.1     ross     aExp = extractFloatx80Exp( a );
   3820  1.1     ross     aSign = extractFloatx80Sign( a );
   3821  1.1     ross     bSig = extractFloatx80Frac( b );
   3822  1.1     ross     bExp = extractFloatx80Exp( b );
   3823  1.1     ross     bSign = extractFloatx80Sign( b );
   3824  1.1     ross     zSign = aSign ^ bSign;
   3825  1.1     ross     if ( aExp == 0x7FFF ) {
   3826  1.1     ross         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3827  1.1     ross         if ( bExp == 0x7FFF ) {
   3828  1.1     ross             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3829  1.1     ross             goto invalid;
   3830  1.1     ross         }
   3831  1.1     ross         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3832  1.1     ross     }
   3833  1.1     ross     if ( bExp == 0x7FFF ) {
   3834  1.1     ross         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3835  1.1     ross         return packFloatx80( zSign, 0, 0 );
   3836  1.1     ross     }
   3837  1.1     ross     if ( bExp == 0 ) {
   3838  1.1     ross         if ( bSig == 0 ) {
   3839  1.1     ross             if ( ( aExp | aSig ) == 0 ) {
   3840  1.1     ross  invalid:
   3841  1.1     ross                 float_raise( float_flag_invalid );
   3842  1.1     ross                 z.low = floatx80_default_nan_low;
   3843  1.1     ross                 z.high = floatx80_default_nan_high;
   3844  1.1     ross                 return z;
   3845  1.1     ross             }
   3846  1.1     ross             float_raise( float_flag_divbyzero );
   3847  1.1     ross             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3848  1.1     ross         }
   3849  1.1     ross         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3850  1.1     ross     }
   3851  1.1     ross     if ( aExp == 0 ) {
   3852  1.1     ross         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3853  1.1     ross         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3854  1.1     ross     }
   3855  1.1     ross     zExp = aExp - bExp + 0x3FFE;
   3856  1.1     ross     rem1 = 0;
   3857  1.1     ross     if ( bSig <= aSig ) {
   3858  1.1     ross         shift128Right( aSig, 0, 1, &aSig, &rem1 );
   3859  1.1     ross         ++zExp;
   3860  1.1     ross     }
   3861  1.1     ross     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
   3862  1.1     ross     mul64To128( bSig, zSig0, &term0, &term1 );
   3863  1.1     ross     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
   3864  1.1     ross     while ( (sbits64) rem0 < 0 ) {
   3865  1.1     ross         --zSig0;
   3866  1.1     ross         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   3867  1.1     ross     }
   3868  1.1     ross     zSig1 = estimateDiv128To64( rem1, 0, bSig );
   3869  1.1     ross     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
   3870  1.1     ross         mul64To128( bSig, zSig1, &term1, &term2 );
   3871  1.1     ross         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   3872  1.1     ross         while ( (sbits64) rem1 < 0 ) {
   3873  1.1     ross             --zSig1;
   3874  1.1     ross             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
   3875  1.1     ross         }
   3876  1.1     ross         zSig1 |= ( ( rem1 | rem2 ) != 0 );
   3877  1.1     ross     }
   3878  1.1     ross     return
   3879  1.1     ross         roundAndPackFloatx80(
   3880  1.1     ross             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3881  1.1     ross 
   3882  1.1     ross }
   3883  1.1     ross 
   3884  1.1     ross /*
   3885  1.1     ross -------------------------------------------------------------------------------
   3886  1.1     ross Returns the remainder of the extended double-precision floating-point value
   3887  1.1     ross `a' with respect to the corresponding value `b'.  The operation is performed
   3888  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3889  1.1     ross -------------------------------------------------------------------------------
   3890  1.1     ross */
   3891  1.1     ross floatx80 floatx80_rem( floatx80 a, floatx80 b )
   3892  1.1     ross {
   3893  1.1     ross     flag aSign, bSign, zSign;
   3894  1.1     ross     int32 aExp, bExp, expDiff;
   3895  1.1     ross     bits64 aSig0, aSig1, bSig;
   3896  1.1     ross     bits64 q, term0, term1, alternateASig0, alternateASig1;
   3897  1.1     ross     floatx80 z;
   3898  1.1     ross 
   3899  1.1     ross     aSig0 = extractFloatx80Frac( a );
   3900  1.1     ross     aExp = extractFloatx80Exp( a );
   3901  1.1     ross     aSign = extractFloatx80Sign( a );
   3902  1.1     ross     bSig = extractFloatx80Frac( b );
   3903  1.1     ross     bExp = extractFloatx80Exp( b );
   3904  1.1     ross     bSign = extractFloatx80Sign( b );
   3905  1.1     ross     if ( aExp == 0x7FFF ) {
   3906  1.1     ross         if (    (bits64) ( aSig0<<1 )
   3907  1.1     ross              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3908  1.1     ross             return propagateFloatx80NaN( a, b );
   3909  1.1     ross         }
   3910  1.1     ross         goto invalid;
   3911  1.1     ross     }
   3912  1.1     ross     if ( bExp == 0x7FFF ) {
   3913  1.1     ross         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3914  1.1     ross         return a;
   3915  1.1     ross     }
   3916  1.1     ross     if ( bExp == 0 ) {
   3917  1.1     ross         if ( bSig == 0 ) {
   3918  1.1     ross  invalid:
   3919  1.1     ross             float_raise( float_flag_invalid );
   3920  1.1     ross             z.low = floatx80_default_nan_low;
   3921  1.1     ross             z.high = floatx80_default_nan_high;
   3922  1.1     ross             return z;
   3923  1.1     ross         }
   3924  1.1     ross         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3925  1.1     ross     }
   3926  1.1     ross     if ( aExp == 0 ) {
   3927  1.1     ross         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
   3928  1.1     ross         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   3929  1.1     ross     }
   3930  1.1     ross     bSig |= LIT64( 0x8000000000000000 );
   3931  1.1     ross     zSign = aSign;
   3932  1.1     ross     expDiff = aExp - bExp;
   3933  1.1     ross     aSig1 = 0;
   3934  1.1     ross     if ( expDiff < 0 ) {
   3935  1.1     ross         if ( expDiff < -1 ) return a;
   3936  1.1     ross         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
   3937  1.1     ross         expDiff = 0;
   3938  1.1     ross     }
   3939  1.1     ross     q = ( bSig <= aSig0 );
   3940  1.1     ross     if ( q ) aSig0 -= bSig;
   3941  1.1     ross     expDiff -= 64;
   3942  1.1     ross     while ( 0 < expDiff ) {
   3943  1.1     ross         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3944  1.1     ross         q = ( 2 < q ) ? q - 2 : 0;
   3945  1.1     ross         mul64To128( bSig, q, &term0, &term1 );
   3946  1.1     ross         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3947  1.1     ross         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
   3948  1.1     ross         expDiff -= 62;
   3949  1.1     ross     }
   3950  1.1     ross     expDiff += 64;
   3951  1.1     ross     if ( 0 < expDiff ) {
   3952  1.1     ross         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3953  1.1     ross         q = ( 2 < q ) ? q - 2 : 0;
   3954  1.1     ross         q >>= 64 - expDiff;
   3955  1.1     ross         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
   3956  1.1     ross         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3957  1.1     ross         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
   3958  1.1     ross         while ( le128( term0, term1, aSig0, aSig1 ) ) {
   3959  1.1     ross             ++q;
   3960  1.1     ross             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3961  1.1     ross         }
   3962  1.1     ross     }
   3963  1.1     ross     else {
   3964  1.1     ross         term1 = 0;
   3965  1.1     ross         term0 = bSig;
   3966  1.1     ross     }
   3967  1.1     ross     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
   3968  1.1     ross     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
   3969  1.1     ross          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
   3970  1.1     ross               && ( q & 1 ) )
   3971  1.1     ross        ) {
   3972  1.1     ross         aSig0 = alternateASig0;
   3973  1.1     ross         aSig1 = alternateASig1;
   3974  1.1     ross         zSign = ! zSign;
   3975  1.1     ross     }
   3976  1.1     ross     return
   3977  1.1     ross         normalizeRoundAndPackFloatx80(
   3978  1.1     ross             80, zSign, bExp + expDiff, aSig0, aSig1 );
   3979  1.1     ross 
   3980  1.1     ross }
   3981  1.1     ross 
   3982  1.1     ross /*
   3983  1.1     ross -------------------------------------------------------------------------------
   3984  1.1     ross Returns the square root of the extended double-precision floating-point
   3985  1.1     ross value `a'.  The operation is performed according to the IEC/IEEE Standard
   3986  1.1     ross for Binary Floating-Point Arithmetic.
   3987  1.1     ross -------------------------------------------------------------------------------
   3988  1.1     ross */
   3989  1.1     ross floatx80 floatx80_sqrt( floatx80 a )
   3990  1.1     ross {
   3991  1.1     ross     flag aSign;
   3992  1.1     ross     int32 aExp, zExp;
   3993  1.1     ross     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
   3994  1.1     ross     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   3995  1.1     ross     floatx80 z;
   3996  1.1     ross 
   3997  1.1     ross     aSig0 = extractFloatx80Frac( a );
   3998  1.1     ross     aExp = extractFloatx80Exp( a );
   3999  1.1     ross     aSign = extractFloatx80Sign( a );
   4000  1.1     ross     if ( aExp == 0x7FFF ) {
   4001  1.1     ross         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
   4002  1.1     ross         if ( ! aSign ) return a;
   4003  1.1     ross         goto invalid;
   4004  1.1     ross     }
   4005  1.1     ross     if ( aSign ) {
   4006  1.1     ross         if ( ( aExp | aSig0 ) == 0 ) return a;
   4007  1.1     ross  invalid:
   4008  1.1     ross         float_raise( float_flag_invalid );
   4009  1.1     ross         z.low = floatx80_default_nan_low;
   4010  1.1     ross         z.high = floatx80_default_nan_high;
   4011  1.1     ross         return z;
   4012  1.1     ross     }
   4013  1.1     ross     if ( aExp == 0 ) {
   4014  1.1     ross         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
   4015  1.1     ross         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4016  1.1     ross     }
   4017  1.1     ross     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
   4018  1.1     ross     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
   4019  1.1     ross     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
   4020  1.1     ross     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   4021  1.1     ross     doubleZSig0 = zSig0<<1;
   4022  1.1     ross     mul64To128( zSig0, zSig0, &term0, &term1 );
   4023  1.1     ross     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   4024  1.1     ross     while ( (sbits64) rem0 < 0 ) {
   4025  1.1     ross         --zSig0;
   4026  1.1     ross         doubleZSig0 -= 2;
   4027  1.1     ross         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   4028  1.1     ross     }
   4029  1.1     ross     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   4030  1.1     ross     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
   4031  1.1     ross         if ( zSig1 == 0 ) zSig1 = 1;
   4032  1.1     ross         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   4033  1.1     ross         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   4034  1.1     ross         mul64To128( zSig1, zSig1, &term2, &term3 );
   4035  1.1     ross         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   4036  1.1     ross         while ( (sbits64) rem1 < 0 ) {
   4037  1.1     ross             --zSig1;
   4038  1.1     ross             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   4039  1.1     ross             term3 |= 1;
   4040  1.1     ross             term2 |= doubleZSig0;
   4041  1.1     ross             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   4042  1.1     ross         }
   4043  1.1     ross         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   4044  1.1     ross     }
   4045  1.1     ross     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
   4046  1.1     ross     zSig0 |= doubleZSig0;
   4047  1.1     ross     return
   4048  1.1     ross         roundAndPackFloatx80(
   4049  1.1     ross             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
   4050  1.1     ross 
   4051  1.1     ross }
   4052  1.1     ross 
   4053  1.1     ross /*
   4054  1.1     ross -------------------------------------------------------------------------------
   4055  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is
   4056  1.1     ross equal to the corresponding value `b', and 0 otherwise.  The comparison is
   4057  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-Point
   4058  1.1     ross Arithmetic.
   4059  1.1     ross -------------------------------------------------------------------------------
   4060  1.1     ross */
   4061  1.1     ross flag floatx80_eq( floatx80 a, floatx80 b )
   4062  1.1     ross {
   4063  1.1     ross 
   4064  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4065  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4066  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4067  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4068  1.1     ross        ) {
   4069  1.1     ross         if (    floatx80_is_signaling_nan( a )
   4070  1.1     ross              || floatx80_is_signaling_nan( b ) ) {
   4071  1.1     ross             float_raise( float_flag_invalid );
   4072  1.1     ross         }
   4073  1.1     ross         return 0;
   4074  1.1     ross     }
   4075  1.1     ross     return
   4076  1.1     ross            ( a.low == b.low )
   4077  1.1     ross         && (    ( a.high == b.high )
   4078  1.1     ross              || (    ( a.low == 0 )
   4079  1.1     ross                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
   4080  1.1     ross            );
   4081  1.1     ross 
   4082  1.1     ross }
   4083  1.1     ross 
   4084  1.1     ross /*
   4085  1.1     ross -------------------------------------------------------------------------------
   4086  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is
   4087  1.1     ross less than or equal to the corresponding value `b', and 0 otherwise.  The
   4088  1.1     ross comparison is performed according to the IEC/IEEE Standard for Binary
   4089  1.1     ross Floating-Point Arithmetic.
   4090  1.1     ross -------------------------------------------------------------------------------
   4091  1.1     ross */
   4092  1.1     ross flag floatx80_le( floatx80 a, floatx80 b )
   4093  1.1     ross {
   4094  1.1     ross     flag aSign, bSign;
   4095  1.1     ross 
   4096  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4097  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4098  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4099  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4100  1.1     ross        ) {
   4101  1.1     ross         float_raise( float_flag_invalid );
   4102  1.1     ross         return 0;
   4103  1.1     ross     }
   4104  1.1     ross     aSign = extractFloatx80Sign( a );
   4105  1.1     ross     bSign = extractFloatx80Sign( b );
   4106  1.1     ross     if ( aSign != bSign ) {
   4107  1.1     ross         return
   4108  1.1     ross                aSign
   4109  1.1     ross             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4110  1.1     ross                  == 0 );
   4111  1.1     ross     }
   4112  1.1     ross     return
   4113  1.1     ross           aSign ? le128( b.high, b.low, a.high, a.low )
   4114  1.1     ross         : le128( a.high, a.low, b.high, b.low );
   4115  1.1     ross 
   4116  1.1     ross }
   4117  1.1     ross 
   4118  1.1     ross /*
   4119  1.1     ross -------------------------------------------------------------------------------
   4120  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is
   4121  1.1     ross less than the corresponding value `b', and 0 otherwise.  The comparison
   4122  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4123  1.1     ross Arithmetic.
   4124  1.1     ross -------------------------------------------------------------------------------
   4125  1.1     ross */
   4126  1.1     ross flag floatx80_lt( floatx80 a, floatx80 b )
   4127  1.1     ross {
   4128  1.1     ross     flag aSign, bSign;
   4129  1.1     ross 
   4130  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4131  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4132  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4133  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4134  1.1     ross        ) {
   4135  1.1     ross         float_raise( float_flag_invalid );
   4136  1.1     ross         return 0;
   4137  1.1     ross     }
   4138  1.1     ross     aSign = extractFloatx80Sign( a );
   4139  1.1     ross     bSign = extractFloatx80Sign( b );
   4140  1.1     ross     if ( aSign != bSign ) {
   4141  1.1     ross         return
   4142  1.1     ross                aSign
   4143  1.1     ross             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4144  1.1     ross                  != 0 );
   4145  1.1     ross     }
   4146  1.1     ross     return
   4147  1.1     ross           aSign ? lt128( b.high, b.low, a.high, a.low )
   4148  1.1     ross         : lt128( a.high, a.low, b.high, b.low );
   4149  1.1     ross 
   4150  1.1     ross }
   4151  1.1     ross 
   4152  1.1     ross /*
   4153  1.1     ross -------------------------------------------------------------------------------
   4154  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is equal
   4155  1.1     ross to the corresponding value `b', and 0 otherwise.  The invalid exception is
   4156  1.1     ross raised if either operand is a NaN.  Otherwise, the comparison is performed
   4157  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4158  1.1     ross -------------------------------------------------------------------------------
   4159  1.1     ross */
   4160  1.1     ross flag floatx80_eq_signaling( floatx80 a, floatx80 b )
   4161  1.1     ross {
   4162  1.1     ross 
   4163  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4164  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4165  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4166  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4167  1.1     ross        ) {
   4168  1.1     ross         float_raise( float_flag_invalid );
   4169  1.1     ross         return 0;
   4170  1.1     ross     }
   4171  1.1     ross     return
   4172  1.1     ross            ( a.low == b.low )
   4173  1.1     ross         && (    ( a.high == b.high )
   4174  1.1     ross              || (    ( a.low == 0 )
   4175  1.1     ross                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
   4176  1.1     ross            );
   4177  1.1     ross 
   4178  1.1     ross }
   4179  1.1     ross 
   4180  1.1     ross /*
   4181  1.1     ross -------------------------------------------------------------------------------
   4182  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is less
   4183  1.1     ross than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
   4184  1.1     ross do not cause an exception.  Otherwise, the comparison is performed according
   4185  1.1     ross to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4186  1.1     ross -------------------------------------------------------------------------------
   4187  1.1     ross */
   4188  1.1     ross flag floatx80_le_quiet( floatx80 a, floatx80 b )
   4189  1.1     ross {
   4190  1.1     ross     flag aSign, bSign;
   4191  1.1     ross 
   4192  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4193  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4194  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4195  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4196  1.1     ross        ) {
   4197  1.1     ross         if (    floatx80_is_signaling_nan( a )
   4198  1.1     ross              || floatx80_is_signaling_nan( b ) ) {
   4199  1.1     ross             float_raise( float_flag_invalid );
   4200  1.1     ross         }
   4201  1.1     ross         return 0;
   4202  1.1     ross     }
   4203  1.1     ross     aSign = extractFloatx80Sign( a );
   4204  1.1     ross     bSign = extractFloatx80Sign( b );
   4205  1.1     ross     if ( aSign != bSign ) {
   4206  1.1     ross         return
   4207  1.1     ross                aSign
   4208  1.1     ross             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4209  1.1     ross                  == 0 );
   4210  1.1     ross     }
   4211  1.1     ross     return
   4212  1.1     ross           aSign ? le128( b.high, b.low, a.high, a.low )
   4213  1.1     ross         : le128( a.high, a.low, b.high, b.low );
   4214  1.1     ross 
   4215  1.1     ross }
   4216  1.1     ross 
   4217  1.1     ross /*
   4218  1.1     ross -------------------------------------------------------------------------------
   4219  1.1     ross Returns 1 if the extended double-precision floating-point value `a' is less
   4220  1.1     ross than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
   4221  1.1     ross an exception.  Otherwise, the comparison is performed according to the
   4222  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4223  1.1     ross -------------------------------------------------------------------------------
   4224  1.1     ross */
   4225  1.1     ross flag floatx80_lt_quiet( floatx80 a, floatx80 b )
   4226  1.1     ross {
   4227  1.1     ross     flag aSign, bSign;
   4228  1.1     ross 
   4229  1.1     ross     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4230  1.1     ross               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4231  1.1     ross          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4232  1.1     ross               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4233  1.1     ross        ) {
   4234  1.1     ross         if (    floatx80_is_signaling_nan( a )
   4235  1.1     ross              || floatx80_is_signaling_nan( b ) ) {
   4236  1.1     ross             float_raise( float_flag_invalid );
   4237  1.1     ross         }
   4238  1.1     ross         return 0;
   4239  1.1     ross     }
   4240  1.1     ross     aSign = extractFloatx80Sign( a );
   4241  1.1     ross     bSign = extractFloatx80Sign( b );
   4242  1.1     ross     if ( aSign != bSign ) {
   4243  1.1     ross         return
   4244  1.1     ross                aSign
   4245  1.1     ross             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   4246  1.1     ross                  != 0 );
   4247  1.1     ross     }
   4248  1.1     ross     return
   4249  1.1     ross           aSign ? lt128( b.high, b.low, a.high, a.low )
   4250  1.1     ross         : lt128( a.high, a.low, b.high, b.low );
   4251  1.1     ross 
   4252  1.1     ross }
   4253  1.1     ross 
   4254  1.1     ross #endif
   4255  1.1     ross 
   4256  1.1     ross #ifdef FLOAT128
   4257  1.1     ross 
   4258  1.1     ross /*
   4259  1.1     ross -------------------------------------------------------------------------------
   4260  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4261  1.1     ross value `a' to the 32-bit two's complement integer format.  The conversion
   4262  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4263  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   4264  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   4265  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   4266  1.1     ross largest integer with the same sign as `a' is returned.
   4267  1.1     ross -------------------------------------------------------------------------------
   4268  1.1     ross */
   4269  1.1     ross int32 float128_to_int32( float128 a )
   4270  1.1     ross {
   4271  1.1     ross     flag aSign;
   4272  1.1     ross     int32 aExp, shiftCount;
   4273  1.1     ross     bits64 aSig0, aSig1;
   4274  1.1     ross 
   4275  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4276  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4277  1.1     ross     aExp = extractFloat128Exp( a );
   4278  1.1     ross     aSign = extractFloat128Sign( a );
   4279  1.1     ross     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   4280  1.1     ross     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4281  1.1     ross     aSig0 |= ( aSig1 != 0 );
   4282  1.1     ross     shiftCount = 0x4028 - aExp;
   4283  1.1     ross     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
   4284  1.1     ross     return roundAndPackInt32( aSign, aSig0 );
   4285  1.1     ross 
   4286  1.1     ross }
   4287  1.1     ross 
   4288  1.1     ross /*
   4289  1.1     ross -------------------------------------------------------------------------------
   4290  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4291  1.1     ross value `a' to the 32-bit two's complement integer format.  The conversion
   4292  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4293  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.  If
   4294  1.1     ross `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   4295  1.1     ross conversion overflows, the largest integer with the same sign as `a' is
   4296  1.1     ross returned.
   4297  1.1     ross -------------------------------------------------------------------------------
   4298  1.1     ross */
   4299  1.1     ross int32 float128_to_int32_round_to_zero( float128 a )
   4300  1.1     ross {
   4301  1.1     ross     flag aSign;
   4302  1.1     ross     int32 aExp, shiftCount;
   4303  1.1     ross     bits64 aSig0, aSig1, savedASig;
   4304  1.1     ross     int32 z;
   4305  1.1     ross 
   4306  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4307  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4308  1.1     ross     aExp = extractFloat128Exp( a );
   4309  1.1     ross     aSign = extractFloat128Sign( a );
   4310  1.1     ross     aSig0 |= ( aSig1 != 0 );
   4311  1.1     ross     if ( 0x401E < aExp ) {
   4312  1.1     ross         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
   4313  1.1     ross         goto invalid;
   4314  1.1     ross     }
   4315  1.1     ross     else if ( aExp < 0x3FFF ) {
   4316  1.1     ross         if ( aExp || aSig0 ) float_set_inexact();
   4317  1.1     ross         return 0;
   4318  1.1     ross     }
   4319  1.1     ross     aSig0 |= LIT64( 0x0001000000000000 );
   4320  1.1     ross     shiftCount = 0x402F - aExp;
   4321  1.1     ross     savedASig = aSig0;
   4322  1.1     ross     aSig0 >>= shiftCount;
   4323  1.1     ross     z = aSig0;
   4324  1.1     ross     if ( aSign ) z = - z;
   4325  1.1     ross     if ( ( z < 0 ) ^ aSign ) {
   4326  1.1     ross  invalid:
   4327  1.1     ross         float_raise( float_flag_invalid );
   4328  1.1     ross         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   4329  1.1     ross     }
   4330  1.1     ross     if ( ( aSig0<<shiftCount ) != savedASig ) {
   4331  1.1     ross         float_set_inexact();
   4332  1.1     ross     }
   4333  1.1     ross     return z;
   4334  1.1     ross 
   4335  1.1     ross }
   4336  1.1     ross 
   4337  1.1     ross /*
   4338  1.1     ross -------------------------------------------------------------------------------
   4339  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4340  1.1     ross value `a' to the 64-bit two's complement integer format.  The conversion
   4341  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4342  1.1     ross Arithmetic---which means in particular that the conversion is rounded
   4343  1.1     ross according to the current rounding mode.  If `a' is a NaN, the largest
   4344  1.1     ross positive integer is returned.  Otherwise, if the conversion overflows, the
   4345  1.1     ross largest integer with the same sign as `a' is returned.
   4346  1.1     ross -------------------------------------------------------------------------------
   4347  1.1     ross */
   4348  1.1     ross int64 float128_to_int64( float128 a )
   4349  1.1     ross {
   4350  1.1     ross     flag aSign;
   4351  1.1     ross     int32 aExp, shiftCount;
   4352  1.1     ross     bits64 aSig0, aSig1;
   4353  1.1     ross 
   4354  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4355  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4356  1.1     ross     aExp = extractFloat128Exp( a );
   4357  1.1     ross     aSign = extractFloat128Sign( a );
   4358  1.1     ross     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4359  1.1     ross     shiftCount = 0x402F - aExp;
   4360  1.1     ross     if ( shiftCount <= 0 ) {
   4361  1.1     ross         if ( 0x403E < aExp ) {
   4362  1.1     ross             float_raise( float_flag_invalid );
   4363  1.1     ross             if (    ! aSign
   4364  1.1     ross                  || (    ( aExp == 0x7FFF )
   4365  1.1     ross                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
   4366  1.1     ross                     )
   4367  1.1     ross                ) {
   4368  1.1     ross                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   4369  1.1     ross             }
   4370  1.1     ross             return (sbits64) LIT64( 0x8000000000000000 );
   4371  1.1     ross         }
   4372  1.1     ross         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
   4373  1.1     ross     }
   4374  1.1     ross     else {
   4375  1.1     ross         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
   4376  1.1     ross     }
   4377  1.1     ross     return roundAndPackInt64( aSign, aSig0, aSig1 );
   4378  1.1     ross 
   4379  1.1     ross }
   4380  1.1     ross 
   4381  1.1     ross /*
   4382  1.1     ross -------------------------------------------------------------------------------
   4383  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4384  1.1     ross value `a' to the 64-bit two's complement integer format.  The conversion
   4385  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4386  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.
   4387  1.1     ross If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   4388  1.1     ross the conversion overflows, the largest integer with the same sign as `a' is
   4389  1.1     ross returned.
   4390  1.1     ross -------------------------------------------------------------------------------
   4391  1.1     ross */
   4392  1.1     ross int64 float128_to_int64_round_to_zero( float128 a )
   4393  1.1     ross {
   4394  1.1     ross     flag aSign;
   4395  1.1     ross     int32 aExp, shiftCount;
   4396  1.1     ross     bits64 aSig0, aSig1;
   4397  1.1     ross     int64 z;
   4398  1.1     ross 
   4399  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4400  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4401  1.1     ross     aExp = extractFloat128Exp( a );
   4402  1.1     ross     aSign = extractFloat128Sign( a );
   4403  1.1     ross     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4404  1.1     ross     shiftCount = aExp - 0x402F;
   4405  1.1     ross     if ( 0 < shiftCount ) {
   4406  1.1     ross         if ( 0x403E <= aExp ) {
   4407  1.1     ross             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4408  1.1     ross             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4409  1.1     ross                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4410  1.1     ross                 if ( aSig1 ) float_set_inexact();
   4411  1.1     ross             }
   4412  1.1     ross             else {
   4413  1.1     ross                 float_raise( float_flag_invalid );
   4414  1.1     ross                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
   4415  1.1     ross                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   4416  1.1     ross                 }
   4417  1.1     ross             }
   4418  1.1     ross             return (sbits64) LIT64( 0x8000000000000000 );
   4419  1.1     ross         }
   4420  1.1     ross         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4421  1.1     ross         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4422  1.1     ross             float_set_inexact();
   4423  1.1     ross         }
   4424  1.1     ross     }
   4425  1.1     ross     else {
   4426  1.1     ross         if ( aExp < 0x3FFF ) {
   4427  1.1     ross             if ( aExp | aSig0 | aSig1 ) {
   4428  1.1     ross                 float_set_inexact();
   4429  1.1     ross             }
   4430  1.1     ross             return 0;
   4431  1.1     ross         }
   4432  1.1     ross         z = aSig0>>( - shiftCount );
   4433  1.1     ross         if (    aSig1
   4434  1.1     ross              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4435  1.1     ross             float_set_inexact();
   4436  1.1     ross         }
   4437  1.1     ross     }
   4438  1.1     ross     if ( aSign ) z = - z;
   4439  1.1     ross     return z;
   4440  1.1     ross 
   4441  1.1     ross }
   4442  1.1     ross 
   4443  1.1     ross /*
   4444  1.1     ross -------------------------------------------------------------------------------
   4445  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4446  1.1     ross value `a' to the single-precision floating-point format.  The conversion
   4447  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4448  1.1     ross Arithmetic.
   4449  1.1     ross -------------------------------------------------------------------------------
   4450  1.1     ross */
   4451  1.1     ross float32 float128_to_float32( float128 a )
   4452  1.1     ross {
   4453  1.1     ross     flag aSign;
   4454  1.1     ross     int32 aExp;
   4455  1.1     ross     bits64 aSig0, aSig1;
   4456  1.1     ross     bits32 zSig;
   4457  1.1     ross 
   4458  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4459  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4460  1.1     ross     aExp = extractFloat128Exp( a );
   4461  1.1     ross     aSign = extractFloat128Sign( a );
   4462  1.1     ross     if ( aExp == 0x7FFF ) {
   4463  1.1     ross         if ( aSig0 | aSig1 ) {
   4464  1.1     ross             return commonNaNToFloat32( float128ToCommonNaN( a ) );
   4465  1.1     ross         }
   4466  1.1     ross         return packFloat32( aSign, 0xFF, 0 );
   4467  1.1     ross     }
   4468  1.1     ross     aSig0 |= ( aSig1 != 0 );
   4469  1.1     ross     shift64RightJamming( aSig0, 18, &aSig0 );
   4470  1.1     ross     zSig = aSig0;
   4471  1.1     ross     if ( aExp || zSig ) {
   4472  1.1     ross         zSig |= 0x40000000;
   4473  1.1     ross         aExp -= 0x3F81;
   4474  1.1     ross     }
   4475  1.1     ross     return roundAndPackFloat32( aSign, aExp, zSig );
   4476  1.1     ross 
   4477  1.1     ross }
   4478  1.1     ross 
   4479  1.1     ross /*
   4480  1.1     ross -------------------------------------------------------------------------------
   4481  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4482  1.1     ross value `a' to the double-precision floating-point format.  The conversion
   4483  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4484  1.1     ross Arithmetic.
   4485  1.1     ross -------------------------------------------------------------------------------
   4486  1.1     ross */
   4487  1.1     ross float64 float128_to_float64( float128 a )
   4488  1.1     ross {
   4489  1.1     ross     flag aSign;
   4490  1.1     ross     int32 aExp;
   4491  1.1     ross     bits64 aSig0, aSig1;
   4492  1.1     ross 
   4493  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4494  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4495  1.1     ross     aExp = extractFloat128Exp( a );
   4496  1.1     ross     aSign = extractFloat128Sign( a );
   4497  1.1     ross     if ( aExp == 0x7FFF ) {
   4498  1.1     ross         if ( aSig0 | aSig1 ) {
   4499  1.1     ross             return commonNaNToFloat64( float128ToCommonNaN( a ) );
   4500  1.1     ross         }
   4501  1.1     ross         return packFloat64( aSign, 0x7FF, 0 );
   4502  1.1     ross     }
   4503  1.1     ross     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4504  1.1     ross     aSig0 |= ( aSig1 != 0 );
   4505  1.1     ross     if ( aExp || aSig0 ) {
   4506  1.1     ross         aSig0 |= LIT64( 0x4000000000000000 );
   4507  1.1     ross         aExp -= 0x3C01;
   4508  1.1     ross     }
   4509  1.1     ross     return roundAndPackFloat64( aSign, aExp, aSig0 );
   4510  1.1     ross 
   4511  1.1     ross }
   4512  1.1     ross 
   4513  1.1     ross #ifdef FLOATX80
   4514  1.1     ross 
   4515  1.1     ross /*
   4516  1.1     ross -------------------------------------------------------------------------------
   4517  1.1     ross Returns the result of converting the quadruple-precision floating-point
   4518  1.1     ross value `a' to the extended double-precision floating-point format.  The
   4519  1.1     ross conversion is performed according to the IEC/IEEE Standard for Binary
   4520  1.1     ross Floating-Point Arithmetic.
   4521  1.1     ross -------------------------------------------------------------------------------
   4522  1.1     ross */
   4523  1.1     ross floatx80 float128_to_floatx80( float128 a )
   4524  1.1     ross {
   4525  1.1     ross     flag aSign;
   4526  1.1     ross     int32 aExp;
   4527  1.1     ross     bits64 aSig0, aSig1;
   4528  1.1     ross 
   4529  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4530  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4531  1.1     ross     aExp = extractFloat128Exp( a );
   4532  1.1     ross     aSign = extractFloat128Sign( a );
   4533  1.1     ross     if ( aExp == 0x7FFF ) {
   4534  1.1     ross         if ( aSig0 | aSig1 ) {
   4535  1.1     ross             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
   4536  1.1     ross         }
   4537  1.1     ross         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   4538  1.1     ross     }
   4539  1.1     ross     if ( aExp == 0 ) {
   4540  1.1     ross         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
   4541  1.1     ross         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4542  1.1     ross     }
   4543  1.1     ross     else {
   4544  1.1     ross         aSig0 |= LIT64( 0x0001000000000000 );
   4545  1.1     ross     }
   4546  1.1     ross     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
   4547  1.1     ross     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
   4548  1.1     ross 
   4549  1.1     ross }
   4550  1.1     ross 
   4551  1.1     ross #endif
   4552  1.1     ross 
   4553  1.1     ross /*
   4554  1.1     ross -------------------------------------------------------------------------------
   4555  1.1     ross Rounds the quadruple-precision floating-point value `a' to an integer, and
   4556  1.1     ross returns the result as a quadruple-precision floating-point value.  The
   4557  1.1     ross operation is performed according to the IEC/IEEE Standard for Binary
   4558  1.1     ross Floating-Point Arithmetic.
   4559  1.1     ross -------------------------------------------------------------------------------
   4560  1.1     ross */
   4561  1.1     ross float128 float128_round_to_int( float128 a )
   4562  1.1     ross {
   4563  1.1     ross     flag aSign;
   4564  1.1     ross     int32 aExp;
   4565  1.1     ross     bits64 lastBitMask, roundBitsMask;
   4566  1.1     ross     int8 roundingMode;
   4567  1.1     ross     float128 z;
   4568  1.1     ross 
   4569  1.1     ross     aExp = extractFloat128Exp( a );
   4570  1.1     ross     if ( 0x402F <= aExp ) {
   4571  1.1     ross         if ( 0x406F <= aExp ) {
   4572  1.1     ross             if (    ( aExp == 0x7FFF )
   4573  1.1     ross                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
   4574  1.1     ross                ) {
   4575  1.1     ross                 return propagateFloat128NaN( a, a );
   4576  1.1     ross             }
   4577  1.1     ross             return a;
   4578  1.1     ross         }
   4579  1.1     ross         lastBitMask = 1;
   4580  1.1     ross         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
   4581  1.1     ross         roundBitsMask = lastBitMask - 1;
   4582  1.1     ross         z = a;
   4583  1.1     ross         roundingMode = float_rounding_mode();
   4584  1.1     ross         if ( roundingMode == float_round_nearest_even ) {
   4585  1.1     ross             if ( lastBitMask ) {
   4586  1.1     ross                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   4587  1.1     ross                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   4588  1.1     ross             }
   4589  1.1     ross             else {
   4590  1.1     ross                 if ( (sbits64) z.low < 0 ) {
   4591  1.1     ross                     ++z.high;
   4592  1.1     ross                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
   4593  1.1     ross                 }
   4594  1.1     ross             }
   4595  1.1     ross         }
   4596  1.1     ross         else if ( roundingMode != float_round_to_zero ) {
   4597  1.1     ross             if (   extractFloat128Sign( z )
   4598  1.1     ross                  ^ ( roundingMode == float_round_up ) ) {
   4599  1.1     ross                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   4600  1.1     ross             }
   4601  1.1     ross         }
   4602  1.1     ross         z.low &= ~ roundBitsMask;
   4603  1.1     ross     }
   4604  1.1     ross     else {
   4605  1.1     ross         if ( aExp < 0x3FFF ) {
   4606  1.1     ross             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   4607  1.1     ross             float_set_inexact();
   4608  1.1     ross             aSign = extractFloat128Sign( a );
   4609  1.1     ross             switch ( float_rounding_mode() ) {
   4610  1.1     ross              case float_round_nearest_even:
   4611  1.1     ross                 if (    ( aExp == 0x3FFE )
   4612  1.1     ross                      && (   extractFloat128Frac0( a )
   4613  1.1     ross                           | extractFloat128Frac1( a ) )
   4614  1.1     ross                    ) {
   4615  1.1     ross                     return packFloat128( aSign, 0x3FFF, 0, 0 );
   4616  1.1     ross                 }
   4617  1.1     ross                 break;
   4618  1.1     ross              case float_round_down:
   4619  1.1     ross                 return
   4620  1.1     ross                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
   4621  1.1     ross                     : packFloat128( 0, 0, 0, 0 );
   4622  1.1     ross              case float_round_up:
   4623  1.1     ross                 return
   4624  1.1     ross                       aSign ? packFloat128( 1, 0, 0, 0 )
   4625  1.1     ross                     : packFloat128( 0, 0x3FFF, 0, 0 );
   4626  1.1     ross             }
   4627  1.1     ross             return packFloat128( aSign, 0, 0, 0 );
   4628  1.1     ross         }
   4629  1.1     ross         lastBitMask = 1;
   4630  1.1     ross         lastBitMask <<= 0x402F - aExp;
   4631  1.1     ross         roundBitsMask = lastBitMask - 1;
   4632  1.1     ross         z.low = 0;
   4633  1.1     ross         z.high = a.high;
   4634  1.1     ross         roundingMode = float_rounding_mode();
   4635  1.1     ross         if ( roundingMode == float_round_nearest_even ) {
   4636  1.1     ross             z.high += lastBitMask>>1;
   4637  1.1     ross             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   4638  1.1     ross                 z.high &= ~ lastBitMask;
   4639  1.1     ross             }
   4640  1.1     ross         }
   4641  1.1     ross         else if ( roundingMode != float_round_to_zero ) {
   4642  1.1     ross             if (   extractFloat128Sign( z )
   4643  1.1     ross                  ^ ( roundingMode == float_round_up ) ) {
   4644  1.1     ross                 z.high |= ( a.low != 0 );
   4645  1.1     ross                 z.high += roundBitsMask;
   4646  1.1     ross             }
   4647  1.1     ross         }
   4648  1.1     ross         z.high &= ~ roundBitsMask;
   4649  1.1     ross     }
   4650  1.1     ross     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   4651  1.1     ross         float_set_inexact();
   4652  1.1     ross     }
   4653  1.1     ross     return z;
   4654  1.1     ross 
   4655  1.1     ross }
   4656  1.1     ross 
   4657  1.1     ross /*
   4658  1.1     ross -------------------------------------------------------------------------------
   4659  1.1     ross Returns the result of adding the absolute values of the quadruple-precision
   4660  1.1     ross floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   4661  1.1     ross before being returned.  `zSign' is ignored if the result is a NaN.
   4662  1.1     ross The addition is performed according to the IEC/IEEE Standard for Binary
   4663  1.1     ross Floating-Point Arithmetic.
   4664  1.1     ross -------------------------------------------------------------------------------
   4665  1.1     ross */
   4666  1.1     ross static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
   4667  1.1     ross {
   4668  1.1     ross     int32 aExp, bExp, zExp;
   4669  1.1     ross     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4670  1.1     ross     int32 expDiff;
   4671  1.1     ross 
   4672  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4673  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4674  1.1     ross     aExp = extractFloat128Exp( a );
   4675  1.1     ross     bSig1 = extractFloat128Frac1( b );
   4676  1.1     ross     bSig0 = extractFloat128Frac0( b );
   4677  1.1     ross     bExp = extractFloat128Exp( b );
   4678  1.1     ross     expDiff = aExp - bExp;
   4679  1.1     ross     if ( 0 < expDiff ) {
   4680  1.1     ross         if ( aExp == 0x7FFF ) {
   4681  1.1     ross             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4682  1.1     ross             return a;
   4683  1.1     ross         }
   4684  1.1     ross         if ( bExp == 0 ) {
   4685  1.1     ross             --expDiff;
   4686  1.1     ross         }
   4687  1.1     ross         else {
   4688  1.1     ross             bSig0 |= LIT64( 0x0001000000000000 );
   4689  1.1     ross         }
   4690  1.1     ross         shift128ExtraRightJamming(
   4691  1.1     ross             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   4692  1.1     ross         zExp = aExp;
   4693  1.1     ross     }
   4694  1.1     ross     else if ( expDiff < 0 ) {
   4695  1.1     ross         if ( bExp == 0x7FFF ) {
   4696  1.1     ross             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4697  1.1     ross             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4698  1.1     ross         }
   4699  1.1     ross         if ( aExp == 0 ) {
   4700  1.1     ross             ++expDiff;
   4701  1.1     ross         }
   4702  1.1     ross         else {
   4703  1.1     ross             aSig0 |= LIT64( 0x0001000000000000 );
   4704  1.1     ross         }
   4705  1.1     ross         shift128ExtraRightJamming(
   4706  1.1     ross             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   4707  1.1     ross         zExp = bExp;
   4708  1.1     ross     }
   4709  1.1     ross     else {
   4710  1.1     ross         if ( aExp == 0x7FFF ) {
   4711  1.1     ross             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4712  1.1     ross                 return propagateFloat128NaN( a, b );
   4713  1.1     ross             }
   4714  1.1     ross             return a;
   4715  1.1     ross         }
   4716  1.1     ross         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4717  1.1     ross         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
   4718  1.1     ross         zSig2 = 0;
   4719  1.1     ross         zSig0 |= LIT64( 0x0002000000000000 );
   4720  1.1     ross         zExp = aExp;
   4721  1.1     ross         goto shiftRight1;
   4722  1.1     ross     }
   4723  1.1     ross     aSig0 |= LIT64( 0x0001000000000000 );
   4724  1.1     ross     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4725  1.1     ross     --zExp;
   4726  1.1     ross     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
   4727  1.1     ross     ++zExp;
   4728  1.1     ross  shiftRight1:
   4729  1.1     ross     shift128ExtraRightJamming(
   4730  1.1     ross         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4731  1.1     ross  roundAndPack:
   4732  1.1     ross     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4733  1.1     ross 
   4734  1.1     ross }
   4735  1.1     ross 
   4736  1.1     ross /*
   4737  1.1     ross -------------------------------------------------------------------------------
   4738  1.1     ross Returns the result of subtracting the absolute values of the quadruple-
   4739  1.1     ross precision floating-point values `a' and `b'.  If `zSign' is 1, the
   4740  1.1     ross difference is negated before being returned.  `zSign' is ignored if the
   4741  1.1     ross result is a NaN.  The subtraction is performed according to the IEC/IEEE
   4742  1.1     ross Standard for Binary Floating-Point Arithmetic.
   4743  1.1     ross -------------------------------------------------------------------------------
   4744  1.1     ross */
   4745  1.1     ross static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
   4746  1.1     ross {
   4747  1.1     ross     int32 aExp, bExp, zExp;
   4748  1.1     ross     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   4749  1.1     ross     int32 expDiff;
   4750  1.1     ross     float128 z;
   4751  1.1     ross 
   4752  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4753  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4754  1.1     ross     aExp = extractFloat128Exp( a );
   4755  1.1     ross     bSig1 = extractFloat128Frac1( b );
   4756  1.1     ross     bSig0 = extractFloat128Frac0( b );
   4757  1.1     ross     bExp = extractFloat128Exp( b );
   4758  1.1     ross     expDiff = aExp - bExp;
   4759  1.1     ross     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4760  1.1     ross     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
   4761  1.1     ross     if ( 0 < expDiff ) goto aExpBigger;
   4762  1.1     ross     if ( expDiff < 0 ) goto bExpBigger;
   4763  1.1     ross     if ( aExp == 0x7FFF ) {
   4764  1.1     ross         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4765  1.1     ross             return propagateFloat128NaN( a, b );
   4766  1.1     ross         }
   4767  1.1     ross         float_raise( float_flag_invalid );
   4768  1.1     ross         z.low = float128_default_nan_low;
   4769  1.1     ross         z.high = float128_default_nan_high;
   4770  1.1     ross         return z;
   4771  1.1     ross     }
   4772  1.1     ross     if ( aExp == 0 ) {
   4773  1.1     ross         aExp = 1;
   4774  1.1     ross         bExp = 1;
   4775  1.1     ross     }
   4776  1.1     ross     if ( bSig0 < aSig0 ) goto aBigger;
   4777  1.1     ross     if ( aSig0 < bSig0 ) goto bBigger;
   4778  1.1     ross     if ( bSig1 < aSig1 ) goto aBigger;
   4779  1.1     ross     if ( aSig1 < bSig1 ) goto bBigger;
   4780  1.1     ross     return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
   4781  1.1     ross  bExpBigger:
   4782  1.1     ross     if ( bExp == 0x7FFF ) {
   4783  1.1     ross         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4784  1.1     ross         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
   4785  1.1     ross     }
   4786  1.1     ross     if ( aExp == 0 ) {
   4787  1.1     ross         ++expDiff;
   4788  1.1     ross     }
   4789  1.1     ross     else {
   4790  1.1     ross         aSig0 |= LIT64( 0x4000000000000000 );
   4791  1.1     ross     }
   4792  1.1     ross     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   4793  1.1     ross     bSig0 |= LIT64( 0x4000000000000000 );
   4794  1.1     ross  bBigger:
   4795  1.1     ross     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4796  1.1     ross     zExp = bExp;
   4797  1.1     ross     zSign ^= 1;
   4798  1.1     ross     goto normalizeRoundAndPack;
   4799  1.1     ross  aExpBigger:
   4800  1.1     ross     if ( aExp == 0x7FFF ) {
   4801  1.1     ross         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4802  1.1     ross         return a;
   4803  1.1     ross     }
   4804  1.1     ross     if ( bExp == 0 ) {
   4805  1.1     ross         --expDiff;
   4806  1.1     ross     }
   4807  1.1     ross     else {
   4808  1.1     ross         bSig0 |= LIT64( 0x4000000000000000 );
   4809  1.1     ross     }
   4810  1.1     ross     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   4811  1.1     ross     aSig0 |= LIT64( 0x4000000000000000 );
   4812  1.1     ross  aBigger:
   4813  1.1     ross     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4814  1.1     ross     zExp = aExp;
   4815  1.1     ross  normalizeRoundAndPack:
   4816  1.1     ross     --zExp;
   4817  1.1     ross     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
   4818  1.1     ross 
   4819  1.1     ross }
   4820  1.1     ross 
   4821  1.1     ross /*
   4822  1.1     ross -------------------------------------------------------------------------------
   4823  1.1     ross Returns the result of adding the quadruple-precision floating-point values
   4824  1.1     ross `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   4825  1.1     ross for Binary Floating-Point Arithmetic.
   4826  1.1     ross -------------------------------------------------------------------------------
   4827  1.1     ross */
   4828  1.1     ross float128 float128_add( float128 a, float128 b )
   4829  1.1     ross {
   4830  1.1     ross     flag aSign, bSign;
   4831  1.1     ross 
   4832  1.1     ross     aSign = extractFloat128Sign( a );
   4833  1.1     ross     bSign = extractFloat128Sign( b );
   4834  1.1     ross     if ( aSign == bSign ) {
   4835  1.1     ross         return addFloat128Sigs( a, b, aSign );
   4836  1.1     ross     }
   4837  1.1     ross     else {
   4838  1.1     ross         return subFloat128Sigs( a, b, aSign );
   4839  1.1     ross     }
   4840  1.1     ross 
   4841  1.1     ross }
   4842  1.1     ross 
   4843  1.1     ross /*
   4844  1.1     ross -------------------------------------------------------------------------------
   4845  1.1     ross Returns the result of subtracting the quadruple-precision floating-point
   4846  1.1     ross values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4847  1.1     ross Standard for Binary Floating-Point Arithmetic.
   4848  1.1     ross -------------------------------------------------------------------------------
   4849  1.1     ross */
   4850  1.1     ross float128 float128_sub( float128 a, float128 b )
   4851  1.1     ross {
   4852  1.1     ross     flag aSign, bSign;
   4853  1.1     ross 
   4854  1.1     ross     aSign = extractFloat128Sign( a );
   4855  1.1     ross     bSign = extractFloat128Sign( b );
   4856  1.1     ross     if ( aSign == bSign ) {
   4857  1.1     ross         return subFloat128Sigs( a, b, aSign );
   4858  1.1     ross     }
   4859  1.1     ross     else {
   4860  1.1     ross         return addFloat128Sigs( a, b, aSign );
   4861  1.1     ross     }
   4862  1.1     ross 
   4863  1.1     ross }
   4864  1.1     ross 
   4865  1.1     ross /*
   4866  1.1     ross -------------------------------------------------------------------------------
   4867  1.1     ross Returns the result of multiplying the quadruple-precision floating-point
   4868  1.1     ross values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4869  1.1     ross Standard for Binary Floating-Point Arithmetic.
   4870  1.1     ross -------------------------------------------------------------------------------
   4871  1.1     ross */
   4872  1.1     ross float128 float128_mul( float128 a, float128 b )
   4873  1.1     ross {
   4874  1.1     ross     flag aSign, bSign, zSign;
   4875  1.1     ross     int32 aExp, bExp, zExp;
   4876  1.1     ross     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   4877  1.1     ross     float128 z;
   4878  1.1     ross 
   4879  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4880  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4881  1.1     ross     aExp = extractFloat128Exp( a );
   4882  1.1     ross     aSign = extractFloat128Sign( a );
   4883  1.1     ross     bSig1 = extractFloat128Frac1( b );
   4884  1.1     ross     bSig0 = extractFloat128Frac0( b );
   4885  1.1     ross     bExp = extractFloat128Exp( b );
   4886  1.1     ross     bSign = extractFloat128Sign( b );
   4887  1.1     ross     zSign = aSign ^ bSign;
   4888  1.1     ross     if ( aExp == 0x7FFF ) {
   4889  1.1     ross         if (    ( aSig0 | aSig1 )
   4890  1.1     ross              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   4891  1.1     ross             return propagateFloat128NaN( a, b );
   4892  1.1     ross         }
   4893  1.1     ross         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   4894  1.1     ross         return packFloat128( zSign, 0x7FFF, 0, 0 );
   4895  1.1     ross     }
   4896  1.1     ross     if ( bExp == 0x7FFF ) {
   4897  1.1     ross         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4898  1.1     ross         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   4899  1.1     ross  invalid:
   4900  1.1     ross             float_raise( float_flag_invalid );
   4901  1.1     ross             z.low = float128_default_nan_low;
   4902  1.1     ross             z.high = float128_default_nan_high;
   4903  1.1     ross             return z;
   4904  1.1     ross         }
   4905  1.1     ross         return packFloat128( zSign, 0x7FFF, 0, 0 );
   4906  1.1     ross     }
   4907  1.1     ross     if ( aExp == 0 ) {
   4908  1.1     ross         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   4909  1.1     ross         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4910  1.1     ross     }
   4911  1.1     ross     if ( bExp == 0 ) {
   4912  1.1     ross         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   4913  1.1     ross         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   4914  1.1     ross     }
   4915  1.1     ross     zExp = aExp + bExp - 0x4000;
   4916  1.1     ross     aSig0 |= LIT64( 0x0001000000000000 );
   4917  1.1     ross     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
   4918  1.1     ross     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   4919  1.1     ross     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4920  1.1     ross     zSig2 |= ( zSig3 != 0 );
   4921  1.1     ross     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
   4922  1.1     ross         shift128ExtraRightJamming(
   4923  1.1     ross             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4924  1.1     ross         ++zExp;
   4925  1.1     ross     }
   4926  1.1     ross     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4927  1.1     ross 
   4928  1.1     ross }
   4929  1.1     ross 
   4930  1.1     ross /*
   4931  1.1     ross -------------------------------------------------------------------------------
   4932  1.1     ross Returns the result of dividing the quadruple-precision floating-point value
   4933  1.1     ross `a' by the corresponding value `b'.  The operation is performed according to
   4934  1.1     ross the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4935  1.1     ross -------------------------------------------------------------------------------
   4936  1.1     ross */
   4937  1.1     ross float128 float128_div( float128 a, float128 b )
   4938  1.1     ross {
   4939  1.1     ross     flag aSign, bSign, zSign;
   4940  1.1     ross     int32 aExp, bExp, zExp;
   4941  1.1     ross     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4942  1.1     ross     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   4943  1.1     ross     float128 z;
   4944  1.1     ross 
   4945  1.1     ross     aSig1 = extractFloat128Frac1( a );
   4946  1.1     ross     aSig0 = extractFloat128Frac0( a );
   4947  1.1     ross     aExp = extractFloat128Exp( a );
   4948  1.1     ross     aSign = extractFloat128Sign( a );
   4949  1.1     ross     bSig1 = extractFloat128Frac1( b );
   4950  1.1     ross     bSig0 = extractFloat128Frac0( b );
   4951  1.1     ross     bExp = extractFloat128Exp( b );
   4952  1.1     ross     bSign = extractFloat128Sign( b );
   4953  1.1     ross     zSign = aSign ^ bSign;
   4954  1.1     ross     if ( aExp == 0x7FFF ) {
   4955  1.1     ross         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4956  1.1     ross         if ( bExp == 0x7FFF ) {
   4957  1.1     ross             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4958  1.1     ross             goto invalid;
   4959  1.1     ross         }
   4960  1.1     ross         return packFloat128( zSign, 0x7FFF, 0, 0 );
   4961  1.1     ross     }
   4962  1.1     ross     if ( bExp == 0x7FFF ) {
   4963  1.1     ross         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4964  1.1     ross         return packFloat128( zSign, 0, 0, 0 );
   4965  1.1     ross     }
   4966  1.1     ross     if ( bExp == 0 ) {
   4967  1.1     ross         if ( ( bSig0 | bSig1 ) == 0 ) {
   4968  1.1     ross             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   4969  1.1     ross  invalid:
   4970  1.1     ross                 float_raise( float_flag_invalid );
   4971  1.1     ross                 z.low = float128_default_nan_low;
   4972  1.1     ross                 z.high = float128_default_nan_high;
   4973  1.1     ross                 return z;
   4974  1.1     ross             }
   4975  1.1     ross             float_raise( float_flag_divbyzero );
   4976  1.1     ross             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4977  1.1     ross         }
   4978  1.1     ross         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   4979  1.1     ross     }
   4980  1.1     ross     if ( aExp == 0 ) {
   4981  1.1     ross         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   4982  1.1     ross         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4983  1.1     ross     }
   4984  1.1     ross     zExp = aExp - bExp + 0x3FFD;
   4985  1.1     ross     shortShift128Left(
   4986  1.1     ross         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
   4987  1.1     ross     shortShift128Left(
   4988  1.1     ross         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   4989  1.1     ross     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
   4990  1.1     ross         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   4991  1.1     ross         ++zExp;
   4992  1.1     ross     }
   4993  1.1     ross     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
   4994  1.1     ross     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   4995  1.1     ross     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   4996  1.1     ross     while ( (sbits64) rem0 < 0 ) {
   4997  1.1     ross         --zSig0;
   4998  1.1     ross         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   4999  1.1     ross     }
   5000  1.1     ross     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
   5001  1.1     ross     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
   5002  1.1     ross         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   5003  1.1     ross         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   5004  1.1     ross         while ( (sbits64) rem1 < 0 ) {
   5005  1.1     ross             --zSig1;
   5006  1.1     ross             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   5007  1.1     ross         }
   5008  1.1     ross         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5009  1.1     ross     }
   5010  1.1     ross     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
   5011  1.1     ross     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5012  1.1     ross 
   5013  1.1     ross }
   5014  1.1     ross 
   5015  1.1     ross /*
   5016  1.1     ross -------------------------------------------------------------------------------
   5017  1.1     ross Returns the remainder of the quadruple-precision floating-point value `a'
   5018  1.1     ross with respect to the corresponding value `b'.  The operation is performed
   5019  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5020  1.1     ross -------------------------------------------------------------------------------
   5021  1.1     ross */
   5022  1.1     ross float128 float128_rem( float128 a, float128 b )
   5023  1.1     ross {
   5024  1.1     ross     flag aSign, bSign, zSign;
   5025  1.1     ross     int32 aExp, bExp, expDiff;
   5026  1.1     ross     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   5027  1.1     ross     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
   5028  1.1     ross     sbits64 sigMean0;
   5029  1.1     ross     float128 z;
   5030  1.1     ross 
   5031  1.1     ross     aSig1 = extractFloat128Frac1( a );
   5032  1.1     ross     aSig0 = extractFloat128Frac0( a );
   5033  1.1     ross     aExp = extractFloat128Exp( a );
   5034  1.1     ross     aSign = extractFloat128Sign( a );
   5035  1.1     ross     bSig1 = extractFloat128Frac1( b );
   5036  1.1     ross     bSig0 = extractFloat128Frac0( b );
   5037  1.1     ross     bExp = extractFloat128Exp( b );
   5038  1.1     ross     bSign = extractFloat128Sign( b );
   5039  1.1     ross     if ( aExp == 0x7FFF ) {
   5040  1.1     ross         if (    ( aSig0 | aSig1 )
   5041  1.1     ross              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5042  1.1     ross             return propagateFloat128NaN( a, b );
   5043  1.1     ross         }
   5044  1.1     ross         goto invalid;
   5045  1.1     ross     }
   5046  1.1     ross     if ( bExp == 0x7FFF ) {
   5047  1.1     ross         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5048  1.1     ross         return a;
   5049  1.1     ross     }
   5050  1.1     ross     if ( bExp == 0 ) {
   5051  1.1     ross         if ( ( bSig0 | bSig1 ) == 0 ) {
   5052  1.1     ross  invalid:
   5053  1.1     ross             float_raise( float_flag_invalid );
   5054  1.1     ross             z.low = float128_default_nan_low;
   5055  1.1     ross             z.high = float128_default_nan_high;
   5056  1.1     ross             return z;
   5057  1.1     ross         }
   5058  1.1     ross         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5059  1.1     ross     }
   5060  1.1     ross     if ( aExp == 0 ) {
   5061  1.1     ross         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   5062  1.1     ross         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5063  1.1     ross     }
   5064  1.1     ross     expDiff = aExp - bExp;
   5065  1.1     ross     if ( expDiff < -1 ) return a;
   5066  1.1     ross     shortShift128Left(
   5067  1.1     ross         aSig0 | LIT64( 0x0001000000000000 ),
   5068  1.1     ross         aSig1,
   5069  1.1     ross         15 - ( expDiff < 0 ),
   5070  1.1     ross         &aSig0,
   5071  1.1     ross         &aSig1
   5072  1.1     ross     );
   5073  1.1     ross     shortShift128Left(
   5074  1.1     ross         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5075  1.1     ross     q = le128( bSig0, bSig1, aSig0, aSig1 );
   5076  1.1     ross     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5077  1.1     ross     expDiff -= 64;
   5078  1.1     ross     while ( 0 < expDiff ) {
   5079  1.1     ross         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5080  1.1     ross         q = ( 4 < q ) ? q - 4 : 0;
   5081  1.1     ross         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5082  1.1     ross         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
   5083  1.1     ross         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
   5084  1.1     ross         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   5085  1.1     ross         expDiff -= 61;
   5086  1.1     ross     }
   5087  1.1     ross     if ( -64 < expDiff ) {
   5088  1.1     ross         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5089  1.1     ross         q = ( 4 < q ) ? q - 4 : 0;
   5090  1.1     ross         q >>= - expDiff;
   5091  1.1     ross         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5092  1.1     ross         expDiff += 52;
   5093  1.1     ross         if ( expDiff < 0 ) {
   5094  1.1     ross             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   5095  1.1     ross         }
   5096  1.1     ross         else {
   5097  1.1     ross             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   5098  1.1     ross         }
   5099  1.1     ross         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5100  1.1     ross         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   5101  1.1     ross     }
   5102  1.1     ross     else {
   5103  1.1     ross         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
   5104  1.1     ross         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5105  1.1     ross     }
   5106  1.1     ross     do {
   5107  1.1     ross         alternateASig0 = aSig0;
   5108  1.1     ross         alternateASig1 = aSig1;
   5109  1.1     ross         ++q;
   5110  1.1     ross         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5111  1.1     ross     } while ( 0 <= (sbits64) aSig0 );
   5112  1.1     ross     add128(
   5113  1.1     ross         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
   5114  1.1     ross     if (    ( sigMean0 < 0 )
   5115  1.1     ross          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   5116  1.1     ross         aSig0 = alternateASig0;
   5117  1.1     ross         aSig1 = alternateASig1;
   5118  1.1     ross     }
   5119  1.1     ross     zSign = ( (sbits64) aSig0 < 0 );
   5120  1.1     ross     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   5121  1.1     ross     return
   5122  1.1     ross         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   5123  1.1     ross 
   5124  1.1     ross }
   5125  1.1     ross 
   5126  1.1     ross /*
   5127  1.1     ross -------------------------------------------------------------------------------
   5128  1.1     ross Returns the square root of the quadruple-precision floating-point value `a'.
   5129  1.1     ross The operation is performed according to the IEC/IEEE Standard for Binary
   5130  1.1     ross Floating-Point Arithmetic.
   5131  1.1     ross -------------------------------------------------------------------------------
   5132  1.1     ross */
   5133  1.1     ross float128 float128_sqrt( float128 a )
   5134  1.1     ross {
   5135  1.1     ross     flag aSign;
   5136  1.1     ross     int32 aExp, zExp;
   5137  1.1     ross     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   5138  1.1     ross     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5139  1.1     ross     float128 z;
   5140  1.1     ross 
   5141  1.1     ross     aSig1 = extractFloat128Frac1( a );
   5142  1.1     ross     aSig0 = extractFloat128Frac0( a );
   5143  1.1     ross     aExp = extractFloat128Exp( a );
   5144  1.1     ross     aSign = extractFloat128Sign( a );
   5145  1.1     ross     if ( aExp == 0x7FFF ) {
   5146  1.1     ross         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
   5147  1.1     ross         if ( ! aSign ) return a;
   5148  1.1     ross         goto invalid;
   5149  1.1     ross     }
   5150  1.1     ross     if ( aSign ) {
   5151  1.1     ross         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   5152  1.1     ross  invalid:
   5153  1.1     ross         float_raise( float_flag_invalid );
   5154  1.1     ross         z.low = float128_default_nan_low;
   5155  1.1     ross         z.high = float128_default_nan_high;
   5156  1.1     ross         return z;
   5157  1.1     ross     }
   5158  1.1     ross     if ( aExp == 0 ) {
   5159  1.1     ross         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
   5160  1.1     ross         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5161  1.1     ross     }
   5162  1.1     ross     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
   5163  1.1     ross     aSig0 |= LIT64( 0x0001000000000000 );
   5164  1.1     ross     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
   5165  1.1     ross     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
   5166  1.1     ross     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   5167  1.1     ross     doubleZSig0 = zSig0<<1;
   5168  1.1     ross     mul64To128( zSig0, zSig0, &term0, &term1 );
   5169  1.1     ross     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   5170  1.1     ross     while ( (sbits64) rem0 < 0 ) {
   5171  1.1     ross         --zSig0;
   5172  1.1     ross         doubleZSig0 -= 2;
   5173  1.1     ross         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   5174  1.1     ross     }
   5175  1.1     ross     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   5176  1.1     ross     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
   5177  1.1     ross         if ( zSig1 == 0 ) zSig1 = 1;
   5178  1.1     ross         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   5179  1.1     ross         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   5180  1.1     ross         mul64To128( zSig1, zSig1, &term2, &term3 );
   5181  1.1     ross         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   5182  1.1     ross         while ( (sbits64) rem1 < 0 ) {
   5183  1.1     ross             --zSig1;
   5184  1.1     ross             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   5185  1.1     ross             term3 |= 1;
   5186  1.1     ross             term2 |= doubleZSig0;
   5187  1.1     ross             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   5188  1.1     ross         }
   5189  1.1     ross         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5190  1.1     ross     }
   5191  1.1     ross     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
   5192  1.1     ross     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
   5193  1.1     ross 
   5194  1.1     ross }
   5195  1.1     ross 
   5196  1.1     ross /*
   5197  1.1     ross -------------------------------------------------------------------------------
   5198  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5199  1.1     ross the corresponding value `b', and 0 otherwise.  The comparison is performed
   5200  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5201  1.1     ross -------------------------------------------------------------------------------
   5202  1.1     ross */
   5203  1.1     ross flag float128_eq( float128 a, float128 b )
   5204  1.1     ross {
   5205  1.1     ross 
   5206  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5207  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5208  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5209  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5210  1.1     ross        ) {
   5211  1.1     ross         if (    float128_is_signaling_nan( a )
   5212  1.1     ross              || float128_is_signaling_nan( b ) ) {
   5213  1.1     ross             float_raise( float_flag_invalid );
   5214  1.1     ross         }
   5215  1.1     ross         return 0;
   5216  1.1     ross     }
   5217  1.1     ross     return
   5218  1.1     ross            ( a.low == b.low )
   5219  1.1     ross         && (    ( a.high == b.high )
   5220  1.1     ross              || (    ( a.low == 0 )
   5221  1.1     ross                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5222  1.1     ross            );
   5223  1.1     ross 
   5224  1.1     ross }
   5225  1.1     ross 
   5226  1.1     ross /*
   5227  1.1     ross -------------------------------------------------------------------------------
   5228  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is less than
   5229  1.1     ross or equal to the corresponding value `b', and 0 otherwise.  The comparison
   5230  1.1     ross is performed according to the IEC/IEEE Standard for Binary Floating-Point
   5231  1.1     ross Arithmetic.
   5232  1.1     ross -------------------------------------------------------------------------------
   5233  1.1     ross */
   5234  1.1     ross flag float128_le( float128 a, float128 b )
   5235  1.1     ross {
   5236  1.1     ross     flag aSign, bSign;
   5237  1.1     ross 
   5238  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5239  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5240  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5241  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5242  1.1     ross        ) {
   5243  1.1     ross         float_raise( float_flag_invalid );
   5244  1.1     ross         return 0;
   5245  1.1     ross     }
   5246  1.1     ross     aSign = extractFloat128Sign( a );
   5247  1.1     ross     bSign = extractFloat128Sign( b );
   5248  1.1     ross     if ( aSign != bSign ) {
   5249  1.1     ross         return
   5250  1.1     ross                aSign
   5251  1.1     ross             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5252  1.1     ross                  == 0 );
   5253  1.1     ross     }
   5254  1.1     ross     return
   5255  1.1     ross           aSign ? le128( b.high, b.low, a.high, a.low )
   5256  1.1     ross         : le128( a.high, a.low, b.high, b.low );
   5257  1.1     ross 
   5258  1.1     ross }
   5259  1.1     ross 
   5260  1.1     ross /*
   5261  1.1     ross -------------------------------------------------------------------------------
   5262  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is less than
   5263  1.1     ross the corresponding value `b', and 0 otherwise.  The comparison is performed
   5264  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5265  1.1     ross -------------------------------------------------------------------------------
   5266  1.1     ross */
   5267  1.1     ross flag float128_lt( float128 a, float128 b )
   5268  1.1     ross {
   5269  1.1     ross     flag aSign, bSign;
   5270  1.1     ross 
   5271  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5272  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5273  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5274  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5275  1.1     ross        ) {
   5276  1.1     ross         float_raise( float_flag_invalid );
   5277  1.1     ross         return 0;
   5278  1.1     ross     }
   5279  1.1     ross     aSign = extractFloat128Sign( a );
   5280  1.1     ross     bSign = extractFloat128Sign( b );
   5281  1.1     ross     if ( aSign != bSign ) {
   5282  1.1     ross         return
   5283  1.1     ross                aSign
   5284  1.1     ross             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5285  1.1     ross                  != 0 );
   5286  1.1     ross     }
   5287  1.1     ross     return
   5288  1.1     ross           aSign ? lt128( b.high, b.low, a.high, a.low )
   5289  1.1     ross         : lt128( a.high, a.low, b.high, b.low );
   5290  1.1     ross 
   5291  1.1     ross }
   5292  1.1     ross 
   5293  1.1     ross /*
   5294  1.1     ross -------------------------------------------------------------------------------
   5295  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5296  1.1     ross the corresponding value `b', and 0 otherwise.  The invalid exception is
   5297  1.1     ross raised if either operand is a NaN.  Otherwise, the comparison is performed
   5298  1.1     ross according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5299  1.1     ross -------------------------------------------------------------------------------
   5300  1.1     ross */
   5301  1.1     ross flag float128_eq_signaling( float128 a, float128 b )
   5302  1.1     ross {
   5303  1.1     ross 
   5304  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5305  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5306  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5307  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5308  1.1     ross        ) {
   5309  1.1     ross         float_raise( float_flag_invalid );
   5310  1.1     ross         return 0;
   5311  1.1     ross     }
   5312  1.1     ross     return
   5313  1.1     ross            ( a.low == b.low )
   5314  1.1     ross         && (    ( a.high == b.high )
   5315  1.1     ross              || (    ( a.low == 0 )
   5316  1.1     ross                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5317  1.1     ross            );
   5318  1.1     ross 
   5319  1.1     ross }
   5320  1.1     ross 
   5321  1.1     ross /*
   5322  1.1     ross -------------------------------------------------------------------------------
   5323  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is less than
   5324  1.1     ross or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   5325  1.1     ross cause an exception.  Otherwise, the comparison is performed according to the
   5326  1.1     ross IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5327  1.1     ross -------------------------------------------------------------------------------
   5328  1.1     ross */
   5329  1.1     ross flag float128_le_quiet( float128 a, float128 b )
   5330  1.1     ross {
   5331  1.1     ross     flag aSign, bSign;
   5332  1.1     ross 
   5333  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5334  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5335  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5336  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5337  1.1     ross        ) {
   5338  1.1     ross         if (    float128_is_signaling_nan( a )
   5339  1.1     ross              || float128_is_signaling_nan( b ) ) {
   5340  1.1     ross             float_raise( float_flag_invalid );
   5341  1.1     ross         }
   5342  1.1     ross         return 0;
   5343  1.1     ross     }
   5344  1.1     ross     aSign = extractFloat128Sign( a );
   5345  1.1     ross     bSign = extractFloat128Sign( b );
   5346  1.1     ross     if ( aSign != bSign ) {
   5347  1.1     ross         return
   5348  1.1     ross                aSign
   5349  1.1     ross             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5350  1.1     ross                  == 0 );
   5351  1.1     ross     }
   5352  1.1     ross     return
   5353  1.1     ross           aSign ? le128( b.high, b.low, a.high, a.low )
   5354  1.1     ross         : le128( a.high, a.low, b.high, b.low );
   5355  1.1     ross 
   5356  1.1     ross }
   5357  1.1     ross 
   5358  1.1     ross /*
   5359  1.1     ross -------------------------------------------------------------------------------
   5360  1.1     ross Returns 1 if the quadruple-precision floating-point value `a' is less than
   5361  1.1     ross the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   5362  1.1     ross exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   5363  1.1     ross Standard for Binary Floating-Point Arithmetic.
   5364  1.1     ross -------------------------------------------------------------------------------
   5365  1.1     ross */
   5366  1.1     ross flag float128_lt_quiet( float128 a, float128 b )
   5367  1.1     ross {
   5368  1.1     ross     flag aSign, bSign;
   5369  1.1     ross 
   5370  1.1     ross     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5371  1.1     ross               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5372  1.1     ross          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5373  1.1     ross               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5374  1.1     ross        ) {
   5375  1.1     ross         if (    float128_is_signaling_nan( a )
   5376  1.1     ross              || float128_is_signaling_nan( b ) ) {
   5377  1.1     ross             float_raise( float_flag_invalid );
   5378  1.1     ross         }
   5379  1.1     ross         return 0;
   5380  1.1     ross     }
   5381  1.1     ross     aSign = extractFloat128Sign( a );
   5382  1.1     ross     bSign = extractFloat128Sign( b );
   5383  1.1     ross     if ( aSign != bSign ) {
   5384  1.1     ross         return
   5385  1.1     ross                aSign
   5386  1.1     ross             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5387  1.1     ross                  != 0 );
   5388  1.1     ross     }
   5389  1.1     ross     return
   5390  1.1     ross           aSign ? lt128( b.high, b.low, a.high, a.low )
   5391  1.1     ross         : lt128( a.high, a.low, b.high, b.low );
   5392  1.1     ross 
   5393  1.1     ross }
   5394  1.1     ross 
   5395  1.1     ross #endif
   5396  1.1     ross 
   5397  1.1     ross 
   5398  1.1     ross #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
   5399  1.1     ross 
   5400  1.1     ross /*
   5401  1.1     ross  * These two routines are not part of the original softfloat distribution.
   5402  1.1     ross  *
   5403  1.1     ross  * They are based on the corresponding conversions to integer but return
   5404  1.1     ross  * unsigned numbers instead since these functions are required by GCC.
   5405  1.1     ross  *
   5406  1.1     ross  * Added by Mark Brinicombe <mark (at) netbsd.org>	27/09/97
   5407  1.1     ross  *
   5408  1.1     ross  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
   5409  1.1     ross  */
   5410  1.1     ross 
   5411  1.1     ross /*
   5412  1.1     ross -------------------------------------------------------------------------------
   5413  1.1     ross Returns the result of converting the double-precision floating-point value
   5414  1.1     ross `a' to the 32-bit unsigned integer format.  The conversion is
   5415  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-point
   5416  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.  If
   5417  1.1     ross `a' is a NaN, the largest positive integer is returned.  If the conversion
   5418  1.1     ross overflows, the largest integer positive is returned.
   5419  1.1     ross -------------------------------------------------------------------------------
   5420  1.1     ross */
   5421  1.1     ross uint32 float64_to_uint32_round_to_zero( float64 a )
   5422  1.1     ross {
   5423  1.1     ross     flag aSign;
   5424  1.1     ross     int16 aExp, shiftCount;
   5425  1.1     ross     bits64 aSig, savedASig;
   5426  1.1     ross     uint32 z;
   5427  1.1     ross 
   5428  1.1     ross     aSig = extractFloat64Frac( a );
   5429  1.1     ross     aExp = extractFloat64Exp( a );
   5430  1.1     ross     aSign = extractFloat64Sign( a );
   5431  1.1     ross 
   5432  1.1     ross     if (aSign) {
   5433  1.1     ross         float_raise( float_flag_invalid );
   5434  1.1     ross     	return(0);
   5435  1.1     ross     }
   5436  1.1     ross 
   5437  1.1     ross     if ( 0x41E < aExp ) {
   5438  1.1     ross         float_raise( float_flag_invalid );
   5439  1.1     ross         return 0xffffffff;
   5440  1.1     ross     }
   5441  1.1     ross     else if ( aExp < 0x3FF ) {
   5442  1.1     ross         if ( aExp || aSig ) float_set_inexact();
   5443  1.1     ross         return 0;
   5444  1.1     ross     }
   5445  1.1     ross     aSig |= LIT64( 0x0010000000000000 );
   5446  1.1     ross     shiftCount = 0x433 - aExp;
   5447  1.1     ross     savedASig = aSig;
   5448  1.1     ross     aSig >>= shiftCount;
   5449  1.1     ross     z = aSig;
   5450  1.1     ross     if ( ( aSig<<shiftCount ) != savedASig ) {
   5451  1.1     ross         float_set_inexact();
   5452  1.1     ross     }
   5453  1.1     ross     return z;
   5454  1.1     ross 
   5455  1.1     ross }
   5456  1.1     ross 
   5457  1.1     ross /*
   5458  1.1     ross -------------------------------------------------------------------------------
   5459  1.1     ross Returns the result of converting the single-precision floating-point value
   5460  1.1     ross `a' to the 32-bit unsigned integer format.  The conversion is
   5461  1.1     ross performed according to the IEC/IEEE Standard for Binary Floating-point
   5462  1.1     ross Arithmetic, except that the conversion is always rounded toward zero.  If
   5463  1.1     ross `a' is a NaN, the largest positive integer is returned.  If the conversion
   5464  1.1     ross overflows, the largest positive integer is returned.
   5465  1.1     ross -------------------------------------------------------------------------------
   5466  1.1     ross */
   5467  1.1     ross uint32 float32_to_uint32_round_to_zero( float32 a )
   5468  1.1     ross {
   5469  1.1     ross     flag aSign;
   5470  1.1     ross     int16 aExp, shiftCount;
   5471  1.1     ross     bits32 aSig;
   5472  1.1     ross     uint32 z;
   5473  1.1     ross 
   5474  1.1     ross     aSig = extractFloat32Frac( a );
   5475  1.1     ross     aExp = extractFloat32Exp( a );
   5476  1.1     ross     aSign = extractFloat32Sign( a );
   5477  1.1     ross     shiftCount = aExp - 0x9E;
   5478  1.1     ross 
   5479  1.1     ross     if (aSign) {
   5480  1.1     ross         float_raise( float_flag_invalid );
   5481  1.1     ross     	return(0);
   5482  1.1     ross     }
   5483  1.1     ross     if ( 0 < shiftCount ) {
   5484  1.1     ross         float_raise( float_flag_invalid );
   5485  1.1     ross         return 0xFFFFFFFF;
   5486  1.1     ross     }
   5487  1.1     ross     else if ( aExp <= 0x7E ) {
   5488  1.1     ross         if ( aExp | aSig ) float_set_inexact();
   5489  1.1     ross         return 0;
   5490  1.1     ross     }
   5491  1.1     ross     aSig = ( aSig | 0x800000 )<<8;
   5492  1.1     ross     z = aSig>>( - shiftCount );
   5493  1.1     ross     if ( aSig<<( shiftCount & 31 ) ) {
   5494  1.1     ross         float_set_inexact();
   5495  1.1     ross     }
   5496  1.1     ross     return z;
   5497  1.1     ross 
   5498  1.1     ross }
   5499  1.1     ross 
   5500  1.1     ross #endif
   5501  1.2  thorpej 
   5502  1.2  thorpej #endif /* _STANDALONE */
   5503