Home | History | Annotate | Line # | Download | only in bits64
      1  1.16       nat /* $NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $ */
      2   1.1     bjh21 
      3   1.1     bjh21 /*
      4   1.1     bjh21  * This version hacked for use with gcc -msoft-float by bjh21.
      5   1.1     bjh21  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6   1.1     bjh21  *  itself).
      7   1.1     bjh21  */
      8   1.1     bjh21 
      9   1.1     bjh21 /*
     10   1.1     bjh21  * Things you may want to define:
     11   1.1     bjh21  *
     12   1.1     bjh21  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13   1.1     bjh21  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14   1.1     bjh21  *   properly renamed.
     15   1.1     bjh21  */
     16   1.1     bjh21 
     17   1.1     bjh21 /*
     18   1.1     bjh21 ===============================================================================
     19   1.1     bjh21 
     20   1.1     bjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21   1.1     bjh21 Arithmetic Package, Release 2a.
     22   1.1     bjh21 
     23   1.1     bjh21 Written by John R. Hauser.  This work was made possible in part by the
     24   1.1     bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
     25   1.1     bjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
     26   1.1     bjh21 National Science Foundation under grant MIP-9311980.  The original version
     27   1.1     bjh21 of this code was written as part of a project to build a fixed-point vector
     28   1.1     bjh21 processor in collaboration with the University of California at Berkeley,
     29   1.1     bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30   1.1     bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31   1.1     bjh21 arithmetic/SoftFloat.html'.
     32   1.1     bjh21 
     33   1.1     bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34   1.1     bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35   1.1     bjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36   1.1     bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37   1.1     bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38   1.1     bjh21 
     39   1.1     bjh21 Derivative works are acceptable, even for commercial purposes, so long as
     40   1.1     bjh21 (1) they include prominent notice that the work is derivative, and (2) they
     41   1.1     bjh21 include prominent notice akin to these four paragraphs for those parts of
     42   1.1     bjh21 this code that are retained.
     43   1.1     bjh21 
     44   1.1     bjh21 ===============================================================================
     45   1.1     bjh21 */
     46   1.1     bjh21 
     47   1.1     bjh21 #include <sys/cdefs.h>
     48   1.1     bjh21 #if defined(LIBC_SCCS) && !defined(lint)
     49  1.16       nat __RCSID("$NetBSD: softfloat.c,v 1.16 2025/09/17 11:37:38 nat Exp $");
     50   1.1     bjh21 #endif /* LIBC_SCCS and not lint */
     51   1.1     bjh21 
     52   1.1     bjh21 #ifdef SOFTFLOAT_FOR_GCC
     53   1.1     bjh21 #include "softfloat-for-gcc.h"
     54   1.1     bjh21 #endif
     55   1.1     bjh21 
     56   1.1     bjh21 #include "milieu.h"
     57   1.1     bjh21 #include "softfloat.h"
     58   1.1     bjh21 
     59   1.1     bjh21 /*
     60   1.1     bjh21  * Conversions between floats as stored in memory and floats as
     61   1.1     bjh21  * SoftFloat uses them
     62   1.1     bjh21  */
     63   1.1     bjh21 #ifndef FLOAT64_DEMANGLE
     64   1.1     bjh21 #define FLOAT64_DEMANGLE(a)	(a)
     65   1.1     bjh21 #endif
     66   1.1     bjh21 #ifndef FLOAT64_MANGLE
     67   1.1     bjh21 #define FLOAT64_MANGLE(a)	(a)
     68   1.1     bjh21 #endif
     69   1.1     bjh21 
     70  1.16       nat #ifndef X80SHIFT
     71  1.16       nat #define X80SHIFT		0
     72  1.16       nat #endif
     73   1.1     bjh21 /*
     74   1.1     bjh21 -------------------------------------------------------------------------------
     75   1.1     bjh21 Floating-point rounding mode, extended double-precision rounding precision,
     76   1.1     bjh21 and exception flags.
     77   1.1     bjh21 -------------------------------------------------------------------------------
     78   1.1     bjh21 */
     79  1.12      matt #ifndef set_float_rounding_mode
     80   1.1     bjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
     81   1.1     bjh21 fp_except float_exception_flags = 0;
     82  1.12      matt #endif
     83  1.12      matt #ifndef set_float_exception_inexact_flag
     84  1.12      matt #define	set_float_exception_inexact_flag() \
     85  1.12      matt 	((void)(float_exception_flags |= float_flag_inexact))
     86  1.12      matt #endif
     87   1.1     bjh21 #ifdef FLOATX80
     88   1.1     bjh21 int8 floatx80_rounding_precision = 80;
     89   1.1     bjh21 #endif
     90   1.1     bjh21 
     91   1.1     bjh21 /*
     92   1.1     bjh21 -------------------------------------------------------------------------------
     93   1.1     bjh21 Primitive arithmetic functions, including multi-word arithmetic, and
     94   1.1     bjh21 division and square root approximations.  (Can be specialized to target if
     95   1.1     bjh21 desired.)
     96   1.1     bjh21 -------------------------------------------------------------------------------
     97   1.1     bjh21 */
     98   1.1     bjh21 #include "softfloat-macros"
     99   1.1     bjh21 
    100   1.1     bjh21 /*
    101   1.1     bjh21 -------------------------------------------------------------------------------
    102   1.1     bjh21 Functions and definitions to determine:  (1) whether tininess for underflow
    103   1.1     bjh21 is detected before or after rounding by default, (2) what (if anything)
    104   1.1     bjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
    105   1.1     bjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    106   1.1     bjh21 are propagated from function inputs to output.  These details are target-
    107   1.1     bjh21 specific.
    108   1.1     bjh21 -------------------------------------------------------------------------------
    109   1.1     bjh21 */
    110   1.1     bjh21 #include "softfloat-specialize"
    111   1.1     bjh21 
    112   1.1     bjh21 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    113   1.1     bjh21 /*
    114   1.1     bjh21 -------------------------------------------------------------------------------
    115   1.1     bjh21 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
    116   1.1     bjh21 and 7, and returns the properly rounded 32-bit integer corresponding to the
    117   1.1     bjh21 input.  If `zSign' is 1, the input is negated before being converted to an
    118   1.1     bjh21 integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
    119   1.1     bjh21 is simply rounded to an integer, with the inexact exception raised if the
    120   1.1     bjh21 input cannot be represented exactly as an integer.  However, if the fixed-
    121   1.1     bjh21 point input is too large, the invalid exception is raised and the largest
    122   1.1     bjh21 positive or negative integer is returned.
    123   1.1     bjh21 -------------------------------------------------------------------------------
    124   1.1     bjh21 */
    125   1.1     bjh21 static int32 roundAndPackInt32( flag zSign, bits64 absZ )
    126   1.1     bjh21 {
    127   1.1     bjh21     int8 roundingMode;
    128   1.1     bjh21     flag roundNearestEven;
    129   1.1     bjh21     int8 roundIncrement, roundBits;
    130   1.1     bjh21     int32 z;
    131   1.1     bjh21 
    132   1.1     bjh21     roundingMode = float_rounding_mode;
    133   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    134   1.1     bjh21     roundIncrement = 0x40;
    135   1.1     bjh21     if ( ! roundNearestEven ) {
    136   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    137   1.1     bjh21             roundIncrement = 0;
    138   1.1     bjh21         }
    139   1.1     bjh21         else {
    140   1.1     bjh21             roundIncrement = 0x7F;
    141   1.1     bjh21             if ( zSign ) {
    142   1.1     bjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    143   1.1     bjh21             }
    144   1.1     bjh21             else {
    145   1.1     bjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    146   1.1     bjh21             }
    147   1.1     bjh21         }
    148   1.1     bjh21     }
    149  1.10  christos     roundBits = (int8)(absZ & 0x7F);
    150   1.1     bjh21     absZ = ( absZ + roundIncrement )>>7;
    151   1.1     bjh21     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    152  1.10  christos     z = (int32)absZ;
    153   1.1     bjh21     if ( zSign ) z = - z;
    154   1.1     bjh21     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
    155   1.1     bjh21         float_raise( float_flag_invalid );
    156   1.1     bjh21         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
    157   1.1     bjh21     }
    158  1.12      matt     if ( roundBits ) set_float_exception_inexact_flag();
    159   1.1     bjh21     return z;
    160   1.1     bjh21 
    161   1.1     bjh21 }
    162   1.1     bjh21 
    163   1.1     bjh21 /*
    164   1.1     bjh21 -------------------------------------------------------------------------------
    165   1.1     bjh21 Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
    166   1.1     bjh21 `absZ1', with binary point between bits 63 and 64 (between the input words),
    167   1.1     bjh21 and returns the properly rounded 64-bit integer corresponding to the input.
    168   1.1     bjh21 If `zSign' is 1, the input is negated before being converted to an integer.
    169   1.1     bjh21 Ordinarily, the fixed-point input is simply rounded to an integer, with
    170   1.1     bjh21 the inexact exception raised if the input cannot be represented exactly as
    171   1.1     bjh21 an integer.  However, if the fixed-point input is too large, the invalid
    172   1.1     bjh21 exception is raised and the largest positive or negative integer is
    173   1.1     bjh21 returned.
    174   1.1     bjh21 -------------------------------------------------------------------------------
    175   1.1     bjh21 */
    176   1.1     bjh21 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
    177   1.1     bjh21 {
    178   1.1     bjh21     int8 roundingMode;
    179   1.1     bjh21     flag roundNearestEven, increment;
    180   1.1     bjh21     int64 z;
    181   1.1     bjh21 
    182   1.1     bjh21     roundingMode = float_rounding_mode;
    183   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    184   1.1     bjh21     increment = ( (sbits64) absZ1 < 0 );
    185   1.1     bjh21     if ( ! roundNearestEven ) {
    186   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    187   1.1     bjh21             increment = 0;
    188   1.1     bjh21         }
    189   1.1     bjh21         else {
    190   1.1     bjh21             if ( zSign ) {
    191   1.1     bjh21                 increment = ( roundingMode == float_round_down ) && absZ1;
    192   1.1     bjh21             }
    193   1.1     bjh21             else {
    194   1.1     bjh21                 increment = ( roundingMode == float_round_up ) && absZ1;
    195   1.1     bjh21             }
    196   1.1     bjh21         }
    197   1.1     bjh21     }
    198   1.1     bjh21     if ( increment ) {
    199   1.1     bjh21         ++absZ0;
    200   1.1     bjh21         if ( absZ0 == 0 ) goto overflow;
    201   1.1     bjh21         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
    202   1.1     bjh21     }
    203   1.1     bjh21     z = absZ0;
    204   1.1     bjh21     if ( zSign ) z = - z;
    205   1.1     bjh21     if ( z && ( ( z < 0 ) ^ zSign ) ) {
    206   1.1     bjh21  overflow:
    207   1.1     bjh21         float_raise( float_flag_invalid );
    208   1.1     bjh21         return
    209   1.1     bjh21               zSign ? (sbits64) LIT64( 0x8000000000000000 )
    210   1.1     bjh21             : LIT64( 0x7FFFFFFFFFFFFFFF );
    211   1.1     bjh21     }
    212  1.12      matt     if ( absZ1 ) set_float_exception_inexact_flag();
    213   1.1     bjh21     return z;
    214   1.1     bjh21 
    215   1.1     bjh21 }
    216   1.1     bjh21 #endif
    217   1.1     bjh21 
    218   1.1     bjh21 /*
    219   1.1     bjh21 -------------------------------------------------------------------------------
    220   1.1     bjh21 Returns the fraction bits of the single-precision floating-point value `a'.
    221   1.1     bjh21 -------------------------------------------------------------------------------
    222   1.1     bjh21 */
    223   1.1     bjh21 INLINE bits32 extractFloat32Frac( float32 a )
    224   1.1     bjh21 {
    225   1.1     bjh21 
    226   1.1     bjh21     return a & 0x007FFFFF;
    227   1.1     bjh21 
    228   1.1     bjh21 }
    229   1.1     bjh21 
    230   1.1     bjh21 /*
    231   1.1     bjh21 -------------------------------------------------------------------------------
    232   1.1     bjh21 Returns the exponent bits of the single-precision floating-point value `a'.
    233   1.1     bjh21 -------------------------------------------------------------------------------
    234   1.1     bjh21 */
    235   1.1     bjh21 INLINE int16 extractFloat32Exp( float32 a )
    236   1.1     bjh21 {
    237   1.1     bjh21 
    238   1.1     bjh21     return ( a>>23 ) & 0xFF;
    239   1.1     bjh21 
    240   1.1     bjh21 }
    241   1.1     bjh21 
    242   1.1     bjh21 /*
    243   1.1     bjh21 -------------------------------------------------------------------------------
    244   1.1     bjh21 Returns the sign bit of the single-precision floating-point value `a'.
    245   1.1     bjh21 -------------------------------------------------------------------------------
    246   1.1     bjh21 */
    247   1.1     bjh21 INLINE flag extractFloat32Sign( float32 a )
    248   1.1     bjh21 {
    249   1.1     bjh21 
    250   1.1     bjh21     return a>>31;
    251   1.1     bjh21 
    252   1.1     bjh21 }
    253   1.1     bjh21 
    254   1.1     bjh21 /*
    255   1.1     bjh21 -------------------------------------------------------------------------------
    256   1.1     bjh21 Normalizes the subnormal single-precision floating-point value represented
    257   1.1     bjh21 by the denormalized significand `aSig'.  The normalized exponent and
    258   1.1     bjh21 significand are stored at the locations pointed to by `zExpPtr' and
    259   1.1     bjh21 `zSigPtr', respectively.
    260   1.1     bjh21 -------------------------------------------------------------------------------
    261   1.1     bjh21 */
    262   1.1     bjh21 static void
    263   1.1     bjh21  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    264   1.1     bjh21 {
    265   1.1     bjh21     int8 shiftCount;
    266   1.1     bjh21 
    267   1.1     bjh21     shiftCount = countLeadingZeros32( aSig ) - 8;
    268   1.1     bjh21     *zSigPtr = aSig<<shiftCount;
    269   1.1     bjh21     *zExpPtr = 1 - shiftCount;
    270   1.1     bjh21 
    271   1.1     bjh21 }
    272   1.1     bjh21 
    273   1.1     bjh21 /*
    274   1.1     bjh21 -------------------------------------------------------------------------------
    275   1.1     bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    276   1.1     bjh21 single-precision floating-point value, returning the result.  After being
    277   1.1     bjh21 shifted into the proper positions, the three fields are simply added
    278   1.1     bjh21 together to form the result.  This means that any integer portion of `zSig'
    279   1.1     bjh21 will be added into the exponent.  Since a properly normalized significand
    280   1.1     bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
    281   1.1     bjh21 than the desired result exponent whenever `zSig' is a complete, normalized
    282   1.1     bjh21 significand.
    283   1.1     bjh21 -------------------------------------------------------------------------------
    284   1.1     bjh21 */
    285   1.1     bjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    286   1.1     bjh21 {
    287   1.1     bjh21 
    288   1.1     bjh21     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    289   1.1     bjh21 
    290   1.1     bjh21 }
    291   1.1     bjh21 
    292   1.1     bjh21 /*
    293   1.1     bjh21 -------------------------------------------------------------------------------
    294   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    295   1.1     bjh21 and significand `zSig', and returns the proper single-precision floating-
    296   1.1     bjh21 point value corresponding to the abstract input.  Ordinarily, the abstract
    297   1.1     bjh21 value is simply rounded and packed into the single-precision format, with
    298   1.1     bjh21 the inexact exception raised if the abstract input cannot be represented
    299   1.1     bjh21 exactly.  However, if the abstract value is too large, the overflow and
    300   1.1     bjh21 inexact exceptions are raised and an infinity or maximal finite value is
    301   1.1     bjh21 returned.  If the abstract value is too small, the input value is rounded to
    302   1.1     bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
    303   1.1     bjh21 the abstract input cannot be represented exactly as a subnormal single-
    304   1.1     bjh21 precision floating-point number.
    305   1.1     bjh21     The input significand `zSig' has its binary point between bits 30
    306   1.1     bjh21 and 29, which is 7 bits to the left of the usual location.  This shifted
    307   1.1     bjh21 significand must be normalized or smaller.  If `zSig' is not normalized,
    308   1.1     bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
    309   1.1     bjh21 and it must not require rounding.  In the usual case that `zSig' is
    310   1.1     bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    311   1.1     bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
    312   1.1     bjh21 Binary Floating-Point Arithmetic.
    313   1.1     bjh21 -------------------------------------------------------------------------------
    314   1.1     bjh21 */
    315   1.1     bjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    316   1.1     bjh21 {
    317   1.1     bjh21     int8 roundingMode;
    318   1.1     bjh21     flag roundNearestEven;
    319   1.1     bjh21     int8 roundIncrement, roundBits;
    320   1.1     bjh21     flag isTiny;
    321   1.1     bjh21 
    322   1.1     bjh21     roundingMode = float_rounding_mode;
    323   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    324   1.1     bjh21     roundIncrement = 0x40;
    325   1.1     bjh21     if ( ! roundNearestEven ) {
    326   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    327   1.1     bjh21             roundIncrement = 0;
    328   1.1     bjh21         }
    329   1.1     bjh21         else {
    330   1.1     bjh21             roundIncrement = 0x7F;
    331   1.1     bjh21             if ( zSign ) {
    332   1.1     bjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    333   1.1     bjh21             }
    334   1.1     bjh21             else {
    335   1.1     bjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    336   1.1     bjh21             }
    337   1.1     bjh21         }
    338   1.1     bjh21     }
    339   1.1     bjh21     roundBits = zSig & 0x7F;
    340   1.1     bjh21     if ( 0xFD <= (bits16) zExp ) {
    341   1.1     bjh21         if (    ( 0xFD < zExp )
    342   1.1     bjh21              || (    ( zExp == 0xFD )
    343   1.1     bjh21                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    344   1.1     bjh21            ) {
    345   1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
    346   1.1     bjh21             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    347   1.1     bjh21         }
    348   1.1     bjh21         if ( zExp < 0 ) {
    349   1.1     bjh21             isTiny =
    350   1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    351   1.1     bjh21                 || ( zExp < -1 )
    352  1.10  christos                 || ( zSig + roundIncrement < 0x80000000U );
    353   1.1     bjh21             shift32RightJamming( zSig, - zExp, &zSig );
    354   1.1     bjh21             zExp = 0;
    355   1.1     bjh21             roundBits = zSig & 0x7F;
    356   1.1     bjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    357   1.1     bjh21         }
    358   1.1     bjh21     }
    359  1.12      matt     if ( roundBits ) set_float_exception_inexact_flag();
    360   1.1     bjh21     zSig = ( zSig + roundIncrement )>>7;
    361   1.1     bjh21     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    362   1.1     bjh21     if ( zSig == 0 ) zExp = 0;
    363   1.1     bjh21     return packFloat32( zSign, zExp, zSig );
    364   1.1     bjh21 
    365   1.1     bjh21 }
    366   1.1     bjh21 
    367   1.1     bjh21 /*
    368   1.1     bjh21 -------------------------------------------------------------------------------
    369   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    370   1.1     bjh21 and significand `zSig', and returns the proper single-precision floating-
    371   1.1     bjh21 point value corresponding to the abstract input.  This routine is just like
    372   1.1     bjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    373   1.1     bjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    374   1.1     bjh21 floating-point exponent.
    375   1.1     bjh21 -------------------------------------------------------------------------------
    376   1.1     bjh21 */
    377   1.1     bjh21 static float32
    378   1.1     bjh21  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    379   1.1     bjh21 {
    380   1.1     bjh21     int8 shiftCount;
    381   1.1     bjh21 
    382   1.1     bjh21     shiftCount = countLeadingZeros32( zSig ) - 1;
    383   1.1     bjh21     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    384   1.1     bjh21 
    385   1.1     bjh21 }
    386   1.1     bjh21 
    387   1.1     bjh21 /*
    388   1.1     bjh21 -------------------------------------------------------------------------------
    389   1.1     bjh21 Returns the fraction bits of the double-precision floating-point value `a'.
    390   1.1     bjh21 -------------------------------------------------------------------------------
    391   1.1     bjh21 */
    392   1.1     bjh21 INLINE bits64 extractFloat64Frac( float64 a )
    393   1.1     bjh21 {
    394   1.1     bjh21 
    395   1.1     bjh21     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
    396   1.1     bjh21 
    397   1.1     bjh21 }
    398   1.1     bjh21 
    399   1.1     bjh21 /*
    400   1.1     bjh21 -------------------------------------------------------------------------------
    401   1.1     bjh21 Returns the exponent bits of the double-precision floating-point value `a'.
    402   1.1     bjh21 -------------------------------------------------------------------------------
    403   1.1     bjh21 */
    404   1.1     bjh21 INLINE int16 extractFloat64Exp( float64 a )
    405   1.1     bjh21 {
    406   1.1     bjh21 
    407  1.10  christos     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    408   1.1     bjh21 
    409   1.1     bjh21 }
    410   1.1     bjh21 
    411   1.1     bjh21 /*
    412   1.1     bjh21 -------------------------------------------------------------------------------
    413   1.1     bjh21 Returns the sign bit of the double-precision floating-point value `a'.
    414   1.1     bjh21 -------------------------------------------------------------------------------
    415   1.1     bjh21 */
    416   1.1     bjh21 INLINE flag extractFloat64Sign( float64 a )
    417   1.1     bjh21 {
    418   1.1     bjh21 
    419  1.10  christos     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    420   1.1     bjh21 
    421   1.1     bjh21 }
    422   1.1     bjh21 
    423   1.1     bjh21 /*
    424   1.1     bjh21 -------------------------------------------------------------------------------
    425   1.1     bjh21 Normalizes the subnormal double-precision floating-point value represented
    426   1.1     bjh21 by the denormalized significand `aSig'.  The normalized exponent and
    427   1.1     bjh21 significand are stored at the locations pointed to by `zExpPtr' and
    428   1.1     bjh21 `zSigPtr', respectively.
    429   1.1     bjh21 -------------------------------------------------------------------------------
    430   1.1     bjh21 */
    431   1.1     bjh21 static void
    432   1.1     bjh21  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
    433   1.1     bjh21 {
    434   1.1     bjh21     int8 shiftCount;
    435   1.1     bjh21 
    436   1.1     bjh21     shiftCount = countLeadingZeros64( aSig ) - 11;
    437   1.1     bjh21     *zSigPtr = aSig<<shiftCount;
    438   1.1     bjh21     *zExpPtr = 1 - shiftCount;
    439   1.1     bjh21 
    440   1.1     bjh21 }
    441   1.1     bjh21 
    442   1.1     bjh21 /*
    443   1.1     bjh21 -------------------------------------------------------------------------------
    444   1.1     bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    445   1.1     bjh21 double-precision floating-point value, returning the result.  After being
    446   1.1     bjh21 shifted into the proper positions, the three fields are simply added
    447   1.1     bjh21 together to form the result.  This means that any integer portion of `zSig'
    448   1.1     bjh21 will be added into the exponent.  Since a properly normalized significand
    449   1.1     bjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
    450   1.1     bjh21 than the desired result exponent whenever `zSig' is a complete, normalized
    451   1.1     bjh21 significand.
    452   1.1     bjh21 -------------------------------------------------------------------------------
    453   1.1     bjh21 */
    454   1.1     bjh21 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
    455   1.1     bjh21 {
    456   1.1     bjh21 
    457   1.1     bjh21     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    458   1.1     bjh21 			   ( ( (bits64) zExp )<<52 ) + zSig );
    459   1.1     bjh21 
    460   1.1     bjh21 }
    461   1.1     bjh21 
    462   1.1     bjh21 /*
    463   1.1     bjh21 -------------------------------------------------------------------------------
    464   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    465   1.1     bjh21 and significand `zSig', and returns the proper double-precision floating-
    466   1.1     bjh21 point value corresponding to the abstract input.  Ordinarily, the abstract
    467   1.1     bjh21 value is simply rounded and packed into the double-precision format, with
    468   1.1     bjh21 the inexact exception raised if the abstract input cannot be represented
    469   1.1     bjh21 exactly.  However, if the abstract value is too large, the overflow and
    470   1.1     bjh21 inexact exceptions are raised and an infinity or maximal finite value is
    471   1.1     bjh21 returned.  If the abstract value is too small, the input value is rounded to
    472   1.1     bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
    473   1.1     bjh21 the abstract input cannot be represented exactly as a subnormal double-
    474   1.1     bjh21 precision floating-point number.
    475   1.1     bjh21     The input significand `zSig' has its binary point between bits 62
    476   1.1     bjh21 and 61, which is 10 bits to the left of the usual location.  This shifted
    477   1.1     bjh21 significand must be normalized or smaller.  If `zSig' is not normalized,
    478   1.1     bjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
    479   1.1     bjh21 and it must not require rounding.  In the usual case that `zSig' is
    480   1.1     bjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    481   1.1     bjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
    482   1.1     bjh21 Binary Floating-Point Arithmetic.
    483   1.1     bjh21 -------------------------------------------------------------------------------
    484   1.1     bjh21 */
    485   1.1     bjh21 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    486   1.1     bjh21 {
    487   1.1     bjh21     int8 roundingMode;
    488   1.1     bjh21     flag roundNearestEven;
    489   1.1     bjh21     int16 roundIncrement, roundBits;
    490   1.1     bjh21     flag isTiny;
    491   1.1     bjh21 
    492   1.1     bjh21     roundingMode = float_rounding_mode;
    493   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    494   1.1     bjh21     roundIncrement = 0x200;
    495   1.1     bjh21     if ( ! roundNearestEven ) {
    496   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    497   1.1     bjh21             roundIncrement = 0;
    498   1.1     bjh21         }
    499   1.1     bjh21         else {
    500   1.1     bjh21             roundIncrement = 0x3FF;
    501   1.1     bjh21             if ( zSign ) {
    502   1.1     bjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    503   1.1     bjh21             }
    504   1.1     bjh21             else {
    505   1.1     bjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    506   1.1     bjh21             }
    507   1.1     bjh21         }
    508   1.1     bjh21     }
    509  1.10  christos     roundBits = (int16)(zSig & 0x3FF);
    510   1.1     bjh21     if ( 0x7FD <= (bits16) zExp ) {
    511   1.1     bjh21         if (    ( 0x7FD < zExp )
    512   1.1     bjh21              || (    ( zExp == 0x7FD )
    513   1.1     bjh21                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
    514   1.1     bjh21            ) {
    515   1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
    516   1.1     bjh21             return FLOAT64_MANGLE(
    517   1.1     bjh21 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
    518   1.1     bjh21 		( roundIncrement == 0 ));
    519   1.1     bjh21         }
    520   1.1     bjh21         if ( zExp < 0 ) {
    521   1.1     bjh21             isTiny =
    522   1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    523   1.1     bjh21                 || ( zExp < -1 )
    524  1.10  christos                 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
    525   1.1     bjh21             shift64RightJamming( zSig, - zExp, &zSig );
    526   1.1     bjh21             zExp = 0;
    527  1.10  christos             roundBits = (int16)(zSig & 0x3FF);
    528   1.1     bjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    529   1.1     bjh21         }
    530   1.1     bjh21     }
    531  1.12      matt     if ( roundBits ) set_float_exception_inexact_flag();
    532   1.1     bjh21     zSig = ( zSig + roundIncrement )>>10;
    533   1.1     bjh21     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
    534   1.1     bjh21     if ( zSig == 0 ) zExp = 0;
    535   1.1     bjh21     return packFloat64( zSign, zExp, zSig );
    536   1.1     bjh21 
    537   1.1     bjh21 }
    538   1.1     bjh21 
    539   1.1     bjh21 /*
    540   1.1     bjh21 -------------------------------------------------------------------------------
    541   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    542   1.1     bjh21 and significand `zSig', and returns the proper double-precision floating-
    543   1.1     bjh21 point value corresponding to the abstract input.  This routine is just like
    544   1.1     bjh21 `roundAndPackFloat64' except that `zSig' does not have to be normalized.
    545   1.1     bjh21 Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    546   1.1     bjh21 floating-point exponent.
    547   1.1     bjh21 -------------------------------------------------------------------------------
    548   1.1     bjh21 */
    549   1.1     bjh21 static float64
    550   1.1     bjh21  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
    551   1.1     bjh21 {
    552   1.1     bjh21     int8 shiftCount;
    553   1.1     bjh21 
    554   1.1     bjh21     shiftCount = countLeadingZeros64( zSig ) - 1;
    555   1.1     bjh21     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
    556   1.1     bjh21 
    557   1.1     bjh21 }
    558   1.1     bjh21 
    559   1.1     bjh21 #ifdef FLOATX80
    560   1.1     bjh21 
    561   1.1     bjh21 /*
    562   1.1     bjh21 -------------------------------------------------------------------------------
    563   1.1     bjh21 Returns the fraction bits of the extended double-precision floating-point
    564   1.1     bjh21 value `a'.
    565   1.1     bjh21 -------------------------------------------------------------------------------
    566   1.1     bjh21 */
    567   1.1     bjh21 INLINE bits64 extractFloatx80Frac( floatx80 a )
    568   1.1     bjh21 {
    569   1.1     bjh21 
    570   1.1     bjh21     return a.low;
    571   1.1     bjh21 
    572   1.1     bjh21 }
    573   1.1     bjh21 
    574   1.1     bjh21 /*
    575   1.1     bjh21 -------------------------------------------------------------------------------
    576   1.1     bjh21 Returns the exponent bits of the extended double-precision floating-point
    577   1.1     bjh21 value `a'.
    578   1.1     bjh21 -------------------------------------------------------------------------------
    579   1.1     bjh21 */
    580   1.1     bjh21 INLINE int32 extractFloatx80Exp( floatx80 a )
    581   1.1     bjh21 {
    582   1.1     bjh21 
    583  1.16       nat     return (a.high>>X80SHIFT) & 0x7FFF;
    584   1.1     bjh21 
    585   1.1     bjh21 }
    586   1.1     bjh21 
    587   1.1     bjh21 /*
    588   1.1     bjh21 -------------------------------------------------------------------------------
    589   1.1     bjh21 Returns the sign bit of the extended double-precision floating-point value
    590   1.1     bjh21 `a'.
    591   1.1     bjh21 -------------------------------------------------------------------------------
    592   1.1     bjh21 */
    593   1.1     bjh21 INLINE flag extractFloatx80Sign( floatx80 a )
    594   1.1     bjh21 {
    595   1.1     bjh21 
    596  1.16       nat     return a.high>>(15 + X80SHIFT);
    597   1.1     bjh21 
    598   1.1     bjh21 }
    599   1.1     bjh21 
    600   1.1     bjh21 /*
    601   1.1     bjh21 -------------------------------------------------------------------------------
    602   1.1     bjh21 Normalizes the subnormal extended double-precision floating-point value
    603   1.1     bjh21 represented by the denormalized significand `aSig'.  The normalized exponent
    604   1.1     bjh21 and significand are stored at the locations pointed to by `zExpPtr' and
    605   1.1     bjh21 `zSigPtr', respectively.
    606   1.1     bjh21 -------------------------------------------------------------------------------
    607   1.1     bjh21 */
    608   1.1     bjh21 static void
    609   1.1     bjh21  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
    610   1.1     bjh21 {
    611   1.1     bjh21     int8 shiftCount;
    612   1.1     bjh21 
    613   1.1     bjh21     shiftCount = countLeadingZeros64( aSig );
    614   1.1     bjh21     *zSigPtr = aSig<<shiftCount;
    615   1.1     bjh21     *zExpPtr = 1 - shiftCount;
    616   1.1     bjh21 
    617   1.1     bjh21 }
    618   1.1     bjh21 
    619   1.1     bjh21 /*
    620   1.1     bjh21 -------------------------------------------------------------------------------
    621   1.1     bjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
    622   1.1     bjh21 extended double-precision floating-point value, returning the result.
    623   1.1     bjh21 -------------------------------------------------------------------------------
    624   1.1     bjh21 */
    625   1.1     bjh21 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
    626   1.1     bjh21 {
    627   1.1     bjh21     floatx80 z;
    628   1.1     bjh21 
    629  1.16       nat     z.low = (bits64)zSig;
    630  1.16       nat     z.high = (bits32)( ( ( (bits32) zSign )<<15 ) + zExp )<<X80SHIFT;
    631   1.1     bjh21     return z;
    632   1.1     bjh21 
    633   1.1     bjh21 }
    634   1.1     bjh21 
    635   1.1     bjh21 /*
    636   1.1     bjh21 -------------------------------------------------------------------------------
    637   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    638   1.1     bjh21 and extended significand formed by the concatenation of `zSig0' and `zSig1',
    639   1.1     bjh21 and returns the proper extended double-precision floating-point value
    640   1.1     bjh21 corresponding to the abstract input.  Ordinarily, the abstract value is
    641   1.1     bjh21 rounded and packed into the extended double-precision format, with the
    642   1.1     bjh21 inexact exception raised if the abstract input cannot be represented
    643   1.1     bjh21 exactly.  However, if the abstract value is too large, the overflow and
    644   1.1     bjh21 inexact exceptions are raised and an infinity or maximal finite value is
    645   1.1     bjh21 returned.  If the abstract value is too small, the input value is rounded to
    646   1.1     bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
    647   1.1     bjh21 the abstract input cannot be represented exactly as a subnormal extended
    648   1.1     bjh21 double-precision floating-point number.
    649   1.1     bjh21     If `roundingPrecision' is 32 or 64, the result is rounded to the same
    650   1.1     bjh21 number of bits as single or double precision, respectively.  Otherwise, the
    651   1.1     bjh21 result is rounded to the full precision of the extended double-precision
    652   1.1     bjh21 format.
    653   1.1     bjh21     The input significand must be normalized or smaller.  If the input
    654   1.1     bjh21 significand is not normalized, `zExp' must be 0; in that case, the result
    655   1.1     bjh21 returned is a subnormal number, and it must not require rounding.  The
    656   1.1     bjh21 handling of underflow and overflow follows the IEC/IEEE Standard for Binary
    657   1.1     bjh21 Floating-Point Arithmetic.
    658   1.1     bjh21 -------------------------------------------------------------------------------
    659   1.1     bjh21 */
    660   1.1     bjh21 static floatx80
    661   1.1     bjh21  roundAndPackFloatx80(
    662   1.1     bjh21      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    663   1.1     bjh21  )
    664   1.1     bjh21 {
    665   1.1     bjh21     int8 roundingMode;
    666   1.1     bjh21     flag roundNearestEven, increment, isTiny;
    667  1.14    martin     uint64 roundIncrement, roundMask, roundBits;
    668   1.1     bjh21 
    669   1.1     bjh21     roundingMode = float_rounding_mode;
    670   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
    671   1.1     bjh21     if ( roundingPrecision == 80 ) goto precision80;
    672   1.1     bjh21     if ( roundingPrecision == 64 ) {
    673   1.1     bjh21         roundIncrement = LIT64( 0x0000000000000400 );
    674   1.1     bjh21         roundMask = LIT64( 0x00000000000007FF );
    675   1.1     bjh21     }
    676   1.1     bjh21     else if ( roundingPrecision == 32 ) {
    677   1.1     bjh21         roundIncrement = LIT64( 0x0000008000000000 );
    678   1.1     bjh21         roundMask = LIT64( 0x000000FFFFFFFFFF );
    679   1.1     bjh21     }
    680   1.1     bjh21     else {
    681   1.1     bjh21         goto precision80;
    682   1.1     bjh21     }
    683   1.1     bjh21     zSig0 |= ( zSig1 != 0 );
    684   1.1     bjh21     if ( ! roundNearestEven ) {
    685   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    686   1.1     bjh21             roundIncrement = 0;
    687   1.1     bjh21         }
    688   1.1     bjh21         else {
    689   1.1     bjh21             roundIncrement = roundMask;
    690   1.1     bjh21             if ( zSign ) {
    691   1.1     bjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    692   1.1     bjh21             }
    693   1.1     bjh21             else {
    694   1.1     bjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    695   1.1     bjh21             }
    696   1.1     bjh21         }
    697   1.1     bjh21     }
    698   1.1     bjh21     roundBits = zSig0 & roundMask;
    699   1.1     bjh21     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    700   1.1     bjh21         if (    ( 0x7FFE < zExp )
    701   1.1     bjh21              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
    702   1.1     bjh21            ) {
    703   1.1     bjh21             goto overflow;
    704   1.1     bjh21         }
    705   1.1     bjh21         if ( zExp <= 0 ) {
    706   1.1     bjh21             isTiny =
    707   1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    708   1.1     bjh21                 || ( zExp < 0 )
    709   1.1     bjh21                 || ( zSig0 <= zSig0 + roundIncrement );
    710   1.1     bjh21             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
    711   1.1     bjh21             zExp = 0;
    712   1.1     bjh21             roundBits = zSig0 & roundMask;
    713   1.1     bjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    714  1.12      matt             if ( roundBits ) set_float_exception_inexact_flag();
    715   1.1     bjh21             zSig0 += roundIncrement;
    716   1.1     bjh21             if ( (sbits64) zSig0 < 0 ) zExp = 1;
    717   1.1     bjh21             roundIncrement = roundMask + 1;
    718   1.1     bjh21             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    719   1.1     bjh21                 roundMask |= roundIncrement;
    720   1.1     bjh21             }
    721   1.1     bjh21             zSig0 &= ~ roundMask;
    722   1.1     bjh21             return packFloatx80( zSign, zExp, zSig0 );
    723   1.1     bjh21         }
    724   1.1     bjh21     }
    725  1.12      matt     if ( roundBits ) set_float_exception_inexact_flag();
    726   1.1     bjh21     zSig0 += roundIncrement;
    727   1.1     bjh21     if ( zSig0 < roundIncrement ) {
    728   1.1     bjh21         ++zExp;
    729   1.1     bjh21         zSig0 = LIT64( 0x8000000000000000 );
    730   1.1     bjh21     }
    731   1.1     bjh21     roundIncrement = roundMask + 1;
    732   1.1     bjh21     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
    733   1.1     bjh21         roundMask |= roundIncrement;
    734   1.1     bjh21     }
    735   1.1     bjh21     zSig0 &= ~ roundMask;
    736   1.1     bjh21     if ( zSig0 == 0 ) zExp = 0;
    737   1.1     bjh21     return packFloatx80( zSign, zExp, zSig0 );
    738   1.1     bjh21  precision80:
    739   1.1     bjh21     increment = ( (sbits64) zSig1 < 0 );
    740   1.1     bjh21     if ( ! roundNearestEven ) {
    741   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
    742   1.1     bjh21             increment = 0;
    743   1.1     bjh21         }
    744   1.1     bjh21         else {
    745   1.1     bjh21             if ( zSign ) {
    746   1.1     bjh21                 increment = ( roundingMode == float_round_down ) && zSig1;
    747   1.1     bjh21             }
    748   1.1     bjh21             else {
    749   1.1     bjh21                 increment = ( roundingMode == float_round_up ) && zSig1;
    750   1.1     bjh21             }
    751   1.1     bjh21         }
    752   1.1     bjh21     }
    753   1.1     bjh21     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
    754   1.1     bjh21         if (    ( 0x7FFE < zExp )
    755   1.1     bjh21              || (    ( zExp == 0x7FFE )
    756   1.1     bjh21                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
    757   1.1     bjh21                   && increment
    758   1.1     bjh21                 )
    759   1.1     bjh21            ) {
    760   1.1     bjh21             roundMask = 0;
    761   1.1     bjh21  overflow:
    762   1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
    763   1.1     bjh21             if (    ( roundingMode == float_round_to_zero )
    764   1.1     bjh21                  || ( zSign && ( roundingMode == float_round_up ) )
    765   1.1     bjh21                  || ( ! zSign && ( roundingMode == float_round_down ) )
    766   1.1     bjh21                ) {
    767   1.1     bjh21                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
    768   1.1     bjh21             }
    769   1.1     bjh21             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
    770   1.1     bjh21         }
    771   1.1     bjh21         if ( zExp <= 0 ) {
    772   1.1     bjh21             isTiny =
    773   1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
    774   1.1     bjh21                 || ( zExp < 0 )
    775   1.1     bjh21                 || ! increment
    776   1.1     bjh21                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
    777   1.1     bjh21             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
    778   1.1     bjh21             zExp = 0;
    779   1.1     bjh21             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
    780  1.12      matt             if ( zSig1 ) set_float_exception_inexact_flag();
    781   1.1     bjh21             if ( roundNearestEven ) {
    782   1.1     bjh21                 increment = ( (sbits64) zSig1 < 0 );
    783   1.1     bjh21             }
    784   1.1     bjh21             else {
    785   1.1     bjh21                 if ( zSign ) {
    786   1.1     bjh21                     increment = ( roundingMode == float_round_down ) && zSig1;
    787   1.1     bjh21                 }
    788   1.1     bjh21                 else {
    789   1.1     bjh21                     increment = ( roundingMode == float_round_up ) && zSig1;
    790   1.1     bjh21                 }
    791   1.1     bjh21             }
    792   1.1     bjh21             if ( increment ) {
    793   1.1     bjh21                 ++zSig0;
    794   1.1     bjh21                 zSig0 &=
    795   1.1     bjh21                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    796   1.1     bjh21                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
    797   1.1     bjh21             }
    798   1.1     bjh21             return packFloatx80( zSign, zExp, zSig0 );
    799   1.1     bjh21         }
    800   1.1     bjh21     }
    801  1.12      matt     if ( zSig1 ) set_float_exception_inexact_flag();
    802   1.1     bjh21     if ( increment ) {
    803   1.1     bjh21         ++zSig0;
    804   1.1     bjh21         if ( zSig0 == 0 ) {
    805   1.1     bjh21             ++zExp;
    806   1.1     bjh21             zSig0 = LIT64( 0x8000000000000000 );
    807   1.1     bjh21         }
    808   1.1     bjh21         else {
    809   1.1     bjh21             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
    810   1.1     bjh21         }
    811   1.1     bjh21     }
    812   1.1     bjh21     else {
    813   1.1     bjh21         if ( zSig0 == 0 ) zExp = 0;
    814   1.1     bjh21     }
    815   1.1     bjh21     return packFloatx80( zSign, zExp, zSig0 );
    816   1.1     bjh21 
    817   1.1     bjh21 }
    818   1.1     bjh21 
    819   1.1     bjh21 /*
    820   1.1     bjh21 -------------------------------------------------------------------------------
    821   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent
    822   1.1     bjh21 `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
    823   1.1     bjh21 and returns the proper extended double-precision floating-point value
    824   1.1     bjh21 corresponding to the abstract input.  This routine is just like
    825   1.1     bjh21 `roundAndPackFloatx80' except that the input significand does not have to be
    826   1.1     bjh21 normalized.
    827   1.1     bjh21 -------------------------------------------------------------------------------
    828   1.1     bjh21 */
    829   1.1     bjh21 static floatx80
    830   1.1     bjh21  normalizeRoundAndPackFloatx80(
    831   1.1     bjh21      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
    832   1.1     bjh21  )
    833   1.1     bjh21 {
    834   1.1     bjh21     int8 shiftCount;
    835   1.1     bjh21 
    836   1.1     bjh21     if ( zSig0 == 0 ) {
    837   1.1     bjh21         zSig0 = zSig1;
    838   1.1     bjh21         zSig1 = 0;
    839   1.1     bjh21         zExp -= 64;
    840   1.1     bjh21     }
    841   1.1     bjh21     shiftCount = countLeadingZeros64( zSig0 );
    842   1.1     bjh21     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    843   1.1     bjh21     zExp -= shiftCount;
    844   1.1     bjh21     return
    845   1.1     bjh21         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
    846   1.1     bjh21 
    847   1.1     bjh21 }
    848   1.1     bjh21 
    849   1.1     bjh21 #endif
    850   1.1     bjh21 
    851   1.1     bjh21 #ifdef FLOAT128
    852   1.1     bjh21 
    853   1.1     bjh21 /*
    854   1.1     bjh21 -------------------------------------------------------------------------------
    855   1.1     bjh21 Returns the least-significant 64 fraction bits of the quadruple-precision
    856   1.1     bjh21 floating-point value `a'.
    857   1.1     bjh21 -------------------------------------------------------------------------------
    858   1.1     bjh21 */
    859   1.1     bjh21 INLINE bits64 extractFloat128Frac1( float128 a )
    860   1.1     bjh21 {
    861   1.1     bjh21 
    862   1.1     bjh21     return a.low;
    863   1.1     bjh21 
    864   1.1     bjh21 }
    865   1.1     bjh21 
    866   1.1     bjh21 /*
    867   1.1     bjh21 -------------------------------------------------------------------------------
    868   1.1     bjh21 Returns the most-significant 48 fraction bits of the quadruple-precision
    869   1.1     bjh21 floating-point value `a'.
    870   1.1     bjh21 -------------------------------------------------------------------------------
    871   1.1     bjh21 */
    872   1.1     bjh21 INLINE bits64 extractFloat128Frac0( float128 a )
    873   1.1     bjh21 {
    874   1.1     bjh21 
    875   1.1     bjh21     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
    876   1.1     bjh21 
    877   1.1     bjh21 }
    878   1.1     bjh21 
    879   1.1     bjh21 /*
    880   1.1     bjh21 -------------------------------------------------------------------------------
    881   1.1     bjh21 Returns the exponent bits of the quadruple-precision floating-point value
    882   1.1     bjh21 `a'.
    883   1.1     bjh21 -------------------------------------------------------------------------------
    884   1.1     bjh21 */
    885   1.1     bjh21 INLINE int32 extractFloat128Exp( float128 a )
    886   1.1     bjh21 {
    887   1.1     bjh21 
    888  1.10  christos     return (int32)((a.high >> 48) & 0x7FFF);
    889   1.1     bjh21 
    890   1.1     bjh21 }
    891   1.1     bjh21 
    892   1.1     bjh21 /*
    893   1.1     bjh21 -------------------------------------------------------------------------------
    894   1.1     bjh21 Returns the sign bit of the quadruple-precision floating-point value `a'.
    895   1.1     bjh21 -------------------------------------------------------------------------------
    896   1.1     bjh21 */
    897   1.1     bjh21 INLINE flag extractFloat128Sign( float128 a )
    898   1.1     bjh21 {
    899   1.1     bjh21 
    900  1.10  christos     return (flag)(a.high >> 63);
    901   1.1     bjh21 
    902   1.1     bjh21 }
    903   1.1     bjh21 
    904   1.1     bjh21 /*
    905   1.1     bjh21 -------------------------------------------------------------------------------
    906   1.1     bjh21 Normalizes the subnormal quadruple-precision floating-point value
    907   1.1     bjh21 represented by the denormalized significand formed by the concatenation of
    908   1.1     bjh21 `aSig0' and `aSig1'.  The normalized exponent is stored at the location
    909   1.1     bjh21 pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
    910   1.1     bjh21 significand are stored at the location pointed to by `zSig0Ptr', and the
    911   1.1     bjh21 least significant 64 bits of the normalized significand are stored at the
    912   1.1     bjh21 location pointed to by `zSig1Ptr'.
    913   1.1     bjh21 -------------------------------------------------------------------------------
    914   1.1     bjh21 */
    915   1.1     bjh21 static void
    916   1.1     bjh21  normalizeFloat128Subnormal(
    917   1.1     bjh21      bits64 aSig0,
    918   1.1     bjh21      bits64 aSig1,
    919   1.1     bjh21      int32 *zExpPtr,
    920   1.1     bjh21      bits64 *zSig0Ptr,
    921   1.1     bjh21      bits64 *zSig1Ptr
    922   1.1     bjh21  )
    923   1.1     bjh21 {
    924   1.1     bjh21     int8 shiftCount;
    925   1.1     bjh21 
    926   1.1     bjh21     if ( aSig0 == 0 ) {
    927   1.1     bjh21         shiftCount = countLeadingZeros64( aSig1 ) - 15;
    928   1.1     bjh21         if ( shiftCount < 0 ) {
    929   1.1     bjh21             *zSig0Ptr = aSig1>>( - shiftCount );
    930   1.1     bjh21             *zSig1Ptr = aSig1<<( shiftCount & 63 );
    931   1.1     bjh21         }
    932   1.1     bjh21         else {
    933   1.1     bjh21             *zSig0Ptr = aSig1<<shiftCount;
    934   1.1     bjh21             *zSig1Ptr = 0;
    935   1.1     bjh21         }
    936   1.1     bjh21         *zExpPtr = - shiftCount - 63;
    937   1.1     bjh21     }
    938   1.1     bjh21     else {
    939   1.1     bjh21         shiftCount = countLeadingZeros64( aSig0 ) - 15;
    940   1.1     bjh21         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    941   1.1     bjh21         *zExpPtr = 1 - shiftCount;
    942   1.1     bjh21     }
    943   1.1     bjh21 
    944   1.1     bjh21 }
    945   1.1     bjh21 
    946   1.1     bjh21 /*
    947   1.1     bjh21 -------------------------------------------------------------------------------
    948   1.1     bjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed
    949   1.1     bjh21 by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
    950   1.1     bjh21 floating-point value, returning the result.  After being shifted into the
    951   1.1     bjh21 proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
    952   1.1     bjh21 added together to form the most significant 32 bits of the result.  This
    953   1.1     bjh21 means that any integer portion of `zSig0' will be added into the exponent.
    954   1.1     bjh21 Since a properly normalized significand will have an integer portion equal
    955   1.1     bjh21 to 1, the `zExp' input should be 1 less than the desired result exponent
    956   1.1     bjh21 whenever `zSig0' and `zSig1' concatenated form a complete, normalized
    957   1.1     bjh21 significand.
    958   1.1     bjh21 -------------------------------------------------------------------------------
    959   1.1     bjh21 */
    960   1.1     bjh21 INLINE float128
    961   1.1     bjh21  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
    962   1.1     bjh21 {
    963   1.1     bjh21     float128 z;
    964   1.1     bjh21 
    965   1.1     bjh21     z.low = zSig1;
    966   1.1     bjh21     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    967   1.1     bjh21     return z;
    968   1.1     bjh21 
    969   1.1     bjh21 }
    970   1.1     bjh21 
    971   1.1     bjh21 /*
    972   1.1     bjh21 -------------------------------------------------------------------------------
    973   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    974   1.1     bjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
    975   1.1     bjh21 and `zSig2', and returns the proper quadruple-precision floating-point value
    976   1.1     bjh21 corresponding to the abstract input.  Ordinarily, the abstract value is
    977   1.1     bjh21 simply rounded and packed into the quadruple-precision format, with the
    978   1.1     bjh21 inexact exception raised if the abstract input cannot be represented
    979   1.1     bjh21 exactly.  However, if the abstract value is too large, the overflow and
    980   1.1     bjh21 inexact exceptions are raised and an infinity or maximal finite value is
    981   1.1     bjh21 returned.  If the abstract value is too small, the input value is rounded to
    982   1.1     bjh21 a subnormal number, and the underflow and inexact exceptions are raised if
    983   1.1     bjh21 the abstract input cannot be represented exactly as a subnormal quadruple-
    984   1.1     bjh21 precision floating-point number.
    985   1.1     bjh21     The input significand must be normalized or smaller.  If the input
    986   1.1     bjh21 significand is not normalized, `zExp' must be 0; in that case, the result
    987   1.1     bjh21 returned is a subnormal number, and it must not require rounding.  In the
    988   1.1     bjh21 usual case that the input significand is normalized, `zExp' must be 1 less
    989   1.1     bjh21 than the ``true'' floating-point exponent.  The handling of underflow and
    990   1.1     bjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    991   1.1     bjh21 -------------------------------------------------------------------------------
    992   1.1     bjh21 */
    993   1.1     bjh21 static float128
    994   1.1     bjh21  roundAndPackFloat128(
    995   1.1     bjh21      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
    996   1.1     bjh21 {
    997   1.1     bjh21     int8 roundingMode;
    998   1.1     bjh21     flag roundNearestEven, increment, isTiny;
    999   1.1     bjh21 
   1000   1.1     bjh21     roundingMode = float_rounding_mode;
   1001   1.1     bjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
   1002   1.1     bjh21     increment = ( (sbits64) zSig2 < 0 );
   1003   1.1     bjh21     if ( ! roundNearestEven ) {
   1004   1.1     bjh21         if ( roundingMode == float_round_to_zero ) {
   1005   1.1     bjh21             increment = 0;
   1006   1.1     bjh21         }
   1007   1.1     bjh21         else {
   1008   1.1     bjh21             if ( zSign ) {
   1009   1.1     bjh21                 increment = ( roundingMode == float_round_down ) && zSig2;
   1010   1.1     bjh21             }
   1011   1.1     bjh21             else {
   1012   1.1     bjh21                 increment = ( roundingMode == float_round_up ) && zSig2;
   1013   1.1     bjh21             }
   1014   1.1     bjh21         }
   1015   1.1     bjh21     }
   1016   1.1     bjh21     if ( 0x7FFD <= (bits32) zExp ) {
   1017   1.1     bjh21         if (    ( 0x7FFD < zExp )
   1018   1.1     bjh21              || (    ( zExp == 0x7FFD )
   1019   1.1     bjh21                   && eq128(
   1020   1.1     bjh21                          LIT64( 0x0001FFFFFFFFFFFF ),
   1021   1.1     bjh21                          LIT64( 0xFFFFFFFFFFFFFFFF ),
   1022   1.1     bjh21                          zSig0,
   1023   1.1     bjh21                          zSig1
   1024   1.1     bjh21                      )
   1025   1.1     bjh21                   && increment
   1026   1.1     bjh21                 )
   1027   1.1     bjh21            ) {
   1028   1.1     bjh21             float_raise( float_flag_overflow | float_flag_inexact );
   1029   1.1     bjh21             if (    ( roundingMode == float_round_to_zero )
   1030   1.1     bjh21                  || ( zSign && ( roundingMode == float_round_up ) )
   1031   1.1     bjh21                  || ( ! zSign && ( roundingMode == float_round_down ) )
   1032   1.1     bjh21                ) {
   1033   1.1     bjh21                 return
   1034   1.1     bjh21                     packFloat128(
   1035   1.1     bjh21                         zSign,
   1036   1.1     bjh21                         0x7FFE,
   1037   1.1     bjh21                         LIT64( 0x0000FFFFFFFFFFFF ),
   1038   1.1     bjh21                         LIT64( 0xFFFFFFFFFFFFFFFF )
   1039   1.1     bjh21                     );
   1040   1.1     bjh21             }
   1041   1.1     bjh21             return packFloat128( zSign, 0x7FFF, 0, 0 );
   1042   1.1     bjh21         }
   1043   1.1     bjh21         if ( zExp < 0 ) {
   1044   1.1     bjh21             isTiny =
   1045   1.1     bjh21                    ( float_detect_tininess == float_tininess_before_rounding )
   1046   1.1     bjh21                 || ( zExp < -1 )
   1047   1.1     bjh21                 || ! increment
   1048   1.1     bjh21                 || lt128(
   1049   1.1     bjh21                        zSig0,
   1050   1.1     bjh21                        zSig1,
   1051   1.1     bjh21                        LIT64( 0x0001FFFFFFFFFFFF ),
   1052   1.1     bjh21                        LIT64( 0xFFFFFFFFFFFFFFFF )
   1053   1.1     bjh21                    );
   1054   1.1     bjh21             shift128ExtraRightJamming(
   1055   1.1     bjh21                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
   1056   1.1     bjh21             zExp = 0;
   1057   1.1     bjh21             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
   1058   1.1     bjh21             if ( roundNearestEven ) {
   1059   1.1     bjh21                 increment = ( (sbits64) zSig2 < 0 );
   1060   1.1     bjh21             }
   1061   1.1     bjh21             else {
   1062   1.1     bjh21                 if ( zSign ) {
   1063   1.1     bjh21                     increment = ( roundingMode == float_round_down ) && zSig2;
   1064   1.1     bjh21                 }
   1065   1.1     bjh21                 else {
   1066   1.1     bjh21                     increment = ( roundingMode == float_round_up ) && zSig2;
   1067   1.1     bjh21                 }
   1068   1.1     bjh21             }
   1069   1.1     bjh21         }
   1070   1.1     bjh21     }
   1071  1.12      matt     if ( zSig2 ) set_float_exception_inexact_flag();
   1072   1.1     bjh21     if ( increment ) {
   1073   1.1     bjh21         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
   1074   1.1     bjh21         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
   1075   1.1     bjh21     }
   1076   1.1     bjh21     else {
   1077   1.1     bjh21         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
   1078   1.1     bjh21     }
   1079   1.1     bjh21     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1080   1.1     bjh21 
   1081   1.1     bjh21 }
   1082   1.1     bjh21 
   1083   1.1     bjh21 /*
   1084   1.1     bjh21 -------------------------------------------------------------------------------
   1085   1.1     bjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   1086   1.1     bjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
   1087   1.1     bjh21 returns the proper quadruple-precision floating-point value corresponding
   1088   1.1     bjh21 to the abstract input.  This routine is just like `roundAndPackFloat128'
   1089   1.1     bjh21 except that the input significand has fewer bits and does not have to be
   1090   1.1     bjh21 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
   1091   1.1     bjh21 point exponent.
   1092   1.1     bjh21 -------------------------------------------------------------------------------
   1093   1.1     bjh21 */
   1094   1.1     bjh21 static float128
   1095   1.1     bjh21  normalizeRoundAndPackFloat128(
   1096   1.1     bjh21      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
   1097   1.1     bjh21 {
   1098   1.1     bjh21     int8 shiftCount;
   1099   1.1     bjh21     bits64 zSig2;
   1100   1.1     bjh21 
   1101   1.1     bjh21     if ( zSig0 == 0 ) {
   1102   1.1     bjh21         zSig0 = zSig1;
   1103   1.1     bjh21         zSig1 = 0;
   1104   1.1     bjh21         zExp -= 64;
   1105   1.1     bjh21     }
   1106   1.1     bjh21     shiftCount = countLeadingZeros64( zSig0 ) - 15;
   1107   1.1     bjh21     if ( 0 <= shiftCount ) {
   1108   1.1     bjh21         zSig2 = 0;
   1109   1.1     bjh21         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1110   1.1     bjh21     }
   1111   1.1     bjh21     else {
   1112   1.1     bjh21         shift128ExtraRightJamming(
   1113   1.1     bjh21             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
   1114   1.1     bjh21     }
   1115   1.1     bjh21     zExp -= shiftCount;
   1116   1.1     bjh21     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   1117   1.1     bjh21 
   1118   1.1     bjh21 }
   1119   1.1     bjh21 
   1120   1.1     bjh21 #endif
   1121   1.1     bjh21 
   1122   1.1     bjh21 /*
   1123   1.1     bjh21 -------------------------------------------------------------------------------
   1124   1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a'
   1125   1.1     bjh21 to the single-precision floating-point format.  The conversion is performed
   1126   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1127   1.1     bjh21 -------------------------------------------------------------------------------
   1128   1.1     bjh21 */
   1129   1.1     bjh21 float32 int32_to_float32( int32 a )
   1130   1.1     bjh21 {
   1131   1.1     bjh21     flag zSign;
   1132   1.1     bjh21 
   1133   1.1     bjh21     if ( a == 0 ) return 0;
   1134   1.1     bjh21     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
   1135   1.1     bjh21     zSign = ( a < 0 );
   1136  1.10  christos     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
   1137   1.1     bjh21 
   1138   1.1     bjh21 }
   1139   1.1     bjh21 
   1140   1.6      matt float32 uint32_to_float32( uint32 a )
   1141   1.6      matt {
   1142   1.6      matt     if ( a == 0 ) return 0;
   1143   1.6      matt     if ( a & (bits32) 0x80000000 )
   1144   1.6      matt 	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
   1145   1.6      matt     return normalizeRoundAndPackFloat32( 0, 0x9C, a );
   1146   1.6      matt }
   1147   1.6      matt 
   1148   1.6      matt 
   1149   1.1     bjh21 /*
   1150   1.1     bjh21 -------------------------------------------------------------------------------
   1151   1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a'
   1152   1.1     bjh21 to the double-precision floating-point format.  The conversion is performed
   1153   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1154   1.1     bjh21 -------------------------------------------------------------------------------
   1155   1.1     bjh21 */
   1156   1.1     bjh21 float64 int32_to_float64( int32 a )
   1157   1.1     bjh21 {
   1158   1.1     bjh21     flag zSign;
   1159   1.1     bjh21     uint32 absA;
   1160   1.1     bjh21     int8 shiftCount;
   1161   1.1     bjh21     bits64 zSig;
   1162   1.1     bjh21 
   1163   1.1     bjh21     if ( a == 0 ) return 0;
   1164   1.1     bjh21     zSign = ( a < 0 );
   1165   1.1     bjh21     absA = zSign ? - a : a;
   1166   1.1     bjh21     shiftCount = countLeadingZeros32( absA ) + 21;
   1167   1.1     bjh21     zSig = absA;
   1168   1.1     bjh21     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
   1169   1.1     bjh21 
   1170   1.1     bjh21 }
   1171   1.1     bjh21 
   1172   1.6      matt float64 uint32_to_float64( uint32 a )
   1173   1.6      matt {
   1174   1.6      matt     int8 shiftCount;
   1175   1.6      matt     bits64 zSig = a;
   1176   1.6      matt 
   1177   1.6      matt     if ( a == 0 ) return 0;
   1178   1.6      matt     shiftCount = countLeadingZeros32( a ) + 21;
   1179   1.6      matt     return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
   1180   1.6      matt 
   1181   1.6      matt }
   1182   1.6      matt 
   1183   1.1     bjh21 #ifdef FLOATX80
   1184   1.1     bjh21 
   1185   1.1     bjh21 /*
   1186   1.1     bjh21 -------------------------------------------------------------------------------
   1187   1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a'
   1188   1.1     bjh21 to the extended double-precision floating-point format.  The conversion
   1189   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1190   1.1     bjh21 Arithmetic.
   1191   1.1     bjh21 -------------------------------------------------------------------------------
   1192   1.1     bjh21 */
   1193   1.1     bjh21 floatx80 int32_to_floatx80( int32 a )
   1194   1.1     bjh21 {
   1195   1.1     bjh21     flag zSign;
   1196   1.1     bjh21     uint32 absA;
   1197   1.1     bjh21     int8 shiftCount;
   1198   1.1     bjh21     bits64 zSig;
   1199   1.1     bjh21 
   1200   1.1     bjh21     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1201   1.1     bjh21     zSign = ( a < 0 );
   1202   1.1     bjh21     absA = zSign ? - a : a;
   1203   1.1     bjh21     shiftCount = countLeadingZeros32( absA ) + 32;
   1204   1.1     bjh21     zSig = absA;
   1205   1.1     bjh21     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
   1206   1.1     bjh21 
   1207   1.1     bjh21 }
   1208   1.1     bjh21 
   1209   1.6      matt floatx80 uint32_to_floatx80( uint32 a )
   1210   1.6      matt {
   1211   1.6      matt     int8 shiftCount;
   1212   1.6      matt     bits64 zSig = a;
   1213   1.6      matt 
   1214   1.6      matt     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1215   1.6      matt     shiftCount = countLeadingZeros32( a ) + 32;
   1216   1.6      matt     return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
   1217   1.6      matt 
   1218   1.6      matt }
   1219   1.6      matt 
   1220   1.1     bjh21 #endif
   1221   1.1     bjh21 
   1222   1.1     bjh21 #ifdef FLOAT128
   1223   1.1     bjh21 
   1224   1.1     bjh21 /*
   1225   1.1     bjh21 -------------------------------------------------------------------------------
   1226   1.1     bjh21 Returns the result of converting the 32-bit two's complement integer `a' to
   1227   1.1     bjh21 the quadruple-precision floating-point format.  The conversion is performed
   1228   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1229   1.1     bjh21 -------------------------------------------------------------------------------
   1230   1.1     bjh21 */
   1231   1.1     bjh21 float128 int32_to_float128( int32 a )
   1232   1.1     bjh21 {
   1233   1.1     bjh21     flag zSign;
   1234   1.1     bjh21     uint32 absA;
   1235   1.1     bjh21     int8 shiftCount;
   1236   1.1     bjh21     bits64 zSig0;
   1237   1.1     bjh21 
   1238   1.1     bjh21     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1239   1.1     bjh21     zSign = ( a < 0 );
   1240   1.1     bjh21     absA = zSign ? - a : a;
   1241   1.1     bjh21     shiftCount = countLeadingZeros32( absA ) + 17;
   1242   1.1     bjh21     zSig0 = absA;
   1243   1.1     bjh21     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1244   1.1     bjh21 
   1245   1.1     bjh21 }
   1246   1.1     bjh21 
   1247   1.6      matt float128 uint32_to_float128( uint32 a )
   1248   1.6      matt {
   1249   1.6      matt     int8 shiftCount;
   1250   1.6      matt     bits64 zSig0 = a;
   1251   1.6      matt 
   1252   1.6      matt     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1253   1.6      matt     shiftCount = countLeadingZeros32( a ) + 17;
   1254   1.6      matt     return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
   1255   1.6      matt 
   1256   1.6      matt }
   1257   1.6      matt 
   1258   1.1     bjh21 #endif
   1259   1.1     bjh21 
   1260   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
   1261   1.1     bjh21 /*
   1262   1.1     bjh21 -------------------------------------------------------------------------------
   1263   1.1     bjh21 Returns the result of converting the 64-bit two's complement integer `a'
   1264   1.1     bjh21 to the single-precision floating-point format.  The conversion is performed
   1265   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1266   1.1     bjh21 -------------------------------------------------------------------------------
   1267   1.1     bjh21 */
   1268   1.1     bjh21 float32 int64_to_float32( int64 a )
   1269   1.1     bjh21 {
   1270   1.1     bjh21     flag zSign;
   1271   1.1     bjh21     uint64 absA;
   1272   1.1     bjh21     int8 shiftCount;
   1273   1.1     bjh21 
   1274   1.1     bjh21     if ( a == 0 ) return 0;
   1275   1.1     bjh21     zSign = ( a < 0 );
   1276   1.1     bjh21     absA = zSign ? - a : a;
   1277   1.1     bjh21     shiftCount = countLeadingZeros64( absA ) - 40;
   1278   1.1     bjh21     if ( 0 <= shiftCount ) {
   1279   1.1     bjh21         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
   1280   1.1     bjh21     }
   1281   1.1     bjh21     else {
   1282   1.1     bjh21         shiftCount += 7;
   1283   1.1     bjh21         if ( shiftCount < 0 ) {
   1284   1.1     bjh21             shift64RightJamming( absA, - shiftCount, &absA );
   1285   1.1     bjh21         }
   1286   1.1     bjh21         else {
   1287   1.1     bjh21             absA <<= shiftCount;
   1288   1.1     bjh21         }
   1289   1.1     bjh21         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
   1290   1.1     bjh21     }
   1291   1.1     bjh21 
   1292   1.1     bjh21 }
   1293   1.1     bjh21 
   1294   1.1     bjh21 /*
   1295   1.1     bjh21 -------------------------------------------------------------------------------
   1296   1.1     bjh21 Returns the result of converting the 64-bit two's complement integer `a'
   1297   1.1     bjh21 to the double-precision floating-point format.  The conversion is performed
   1298   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1299   1.1     bjh21 -------------------------------------------------------------------------------
   1300   1.1     bjh21 */
   1301   1.1     bjh21 float64 int64_to_float64( int64 a )
   1302   1.1     bjh21 {
   1303   1.1     bjh21     flag zSign;
   1304   1.1     bjh21 
   1305   1.1     bjh21     if ( a == 0 ) return 0;
   1306   1.1     bjh21     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
   1307   1.1     bjh21         return packFloat64( 1, 0x43E, 0 );
   1308   1.1     bjh21     }
   1309   1.1     bjh21     zSign = ( a < 0 );
   1310   1.1     bjh21     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
   1311   1.1     bjh21 
   1312   1.1     bjh21 }
   1313   1.1     bjh21 
   1314   1.1     bjh21 #ifdef FLOATX80
   1315   1.1     bjh21 
   1316   1.1     bjh21 /*
   1317   1.1     bjh21 -------------------------------------------------------------------------------
   1318   1.1     bjh21 Returns the result of converting the 64-bit two's complement integer `a'
   1319   1.1     bjh21 to the extended double-precision floating-point format.  The conversion
   1320   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1321   1.1     bjh21 Arithmetic.
   1322   1.1     bjh21 -------------------------------------------------------------------------------
   1323   1.1     bjh21 */
   1324   1.1     bjh21 floatx80 int64_to_floatx80( int64 a )
   1325   1.1     bjh21 {
   1326   1.1     bjh21     flag zSign;
   1327   1.1     bjh21     uint64 absA;
   1328   1.1     bjh21     int8 shiftCount;
   1329   1.1     bjh21 
   1330   1.1     bjh21     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
   1331   1.1     bjh21     zSign = ( a < 0 );
   1332   1.1     bjh21     absA = zSign ? - a : a;
   1333   1.1     bjh21     shiftCount = countLeadingZeros64( absA );
   1334   1.1     bjh21     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
   1335   1.1     bjh21 
   1336   1.1     bjh21 }
   1337   1.1     bjh21 
   1338   1.1     bjh21 #endif
   1339   1.1     bjh21 
   1340   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1341   1.1     bjh21 
   1342   1.1     bjh21 #ifdef FLOAT128
   1343   1.1     bjh21 
   1344   1.1     bjh21 /*
   1345   1.1     bjh21 -------------------------------------------------------------------------------
   1346   1.1     bjh21 Returns the result of converting the 64-bit two's complement integer `a' to
   1347   1.1     bjh21 the quadruple-precision floating-point format.  The conversion is performed
   1348   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1349   1.1     bjh21 -------------------------------------------------------------------------------
   1350   1.1     bjh21 */
   1351   1.1     bjh21 float128 int64_to_float128( int64 a )
   1352   1.1     bjh21 {
   1353   1.1     bjh21     flag zSign;
   1354   1.1     bjh21     uint64 absA;
   1355   1.1     bjh21     int8 shiftCount;
   1356   1.1     bjh21     int32 zExp;
   1357   1.1     bjh21     bits64 zSig0, zSig1;
   1358   1.1     bjh21 
   1359   1.1     bjh21     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
   1360   1.1     bjh21     zSign = ( a < 0 );
   1361   1.1     bjh21     absA = zSign ? - a : a;
   1362   1.1     bjh21     shiftCount = countLeadingZeros64( absA ) + 49;
   1363   1.1     bjh21     zExp = 0x406E - shiftCount;
   1364   1.1     bjh21     if ( 64 <= shiftCount ) {
   1365   1.1     bjh21         zSig1 = 0;
   1366   1.1     bjh21         zSig0 = absA;
   1367   1.1     bjh21         shiftCount -= 64;
   1368   1.1     bjh21     }
   1369   1.1     bjh21     else {
   1370   1.1     bjh21         zSig1 = absA;
   1371   1.1     bjh21         zSig0 = 0;
   1372   1.1     bjh21     }
   1373   1.1     bjh21     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   1374   1.1     bjh21     return packFloat128( zSign, zExp, zSig0, zSig1 );
   1375   1.1     bjh21 
   1376   1.1     bjh21 }
   1377   1.1     bjh21 
   1378   1.1     bjh21 #endif
   1379   1.1     bjh21 
   1380   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1381   1.1     bjh21 /*
   1382   1.1     bjh21 -------------------------------------------------------------------------------
   1383   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1384   1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   1385   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1386   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   1387   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   1388   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   1389   1.1     bjh21 largest integer with the same sign as `a' is returned.
   1390   1.1     bjh21 -------------------------------------------------------------------------------
   1391   1.1     bjh21 */
   1392   1.1     bjh21 int32 float32_to_int32( float32 a )
   1393   1.1     bjh21 {
   1394   1.1     bjh21     flag aSign;
   1395   1.1     bjh21     int16 aExp, shiftCount;
   1396   1.1     bjh21     bits32 aSig;
   1397   1.1     bjh21     bits64 aSig64;
   1398   1.1     bjh21 
   1399   1.1     bjh21     aSig = extractFloat32Frac( a );
   1400   1.1     bjh21     aExp = extractFloat32Exp( a );
   1401   1.1     bjh21     aSign = extractFloat32Sign( a );
   1402   1.1     bjh21     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
   1403   1.1     bjh21     if ( aExp ) aSig |= 0x00800000;
   1404   1.1     bjh21     shiftCount = 0xAF - aExp;
   1405   1.1     bjh21     aSig64 = aSig;
   1406   1.1     bjh21     aSig64 <<= 32;
   1407   1.1     bjh21     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
   1408   1.1     bjh21     return roundAndPackInt32( aSign, aSig64 );
   1409   1.1     bjh21 
   1410   1.1     bjh21 }
   1411   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1412   1.1     bjh21 
   1413   1.1     bjh21 /*
   1414   1.1     bjh21 -------------------------------------------------------------------------------
   1415   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1416   1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   1417   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1418   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
   1419   1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1420   1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
   1421   1.1     bjh21 returned.
   1422   1.1     bjh21 -------------------------------------------------------------------------------
   1423   1.1     bjh21 */
   1424   1.1     bjh21 int32 float32_to_int32_round_to_zero( float32 a )
   1425   1.1     bjh21 {
   1426   1.1     bjh21     flag aSign;
   1427   1.1     bjh21     int16 aExp, shiftCount;
   1428   1.1     bjh21     bits32 aSig;
   1429   1.1     bjh21     int32 z;
   1430   1.1     bjh21 
   1431   1.1     bjh21     aSig = extractFloat32Frac( a );
   1432   1.1     bjh21     aExp = extractFloat32Exp( a );
   1433   1.1     bjh21     aSign = extractFloat32Sign( a );
   1434   1.1     bjh21     shiftCount = aExp - 0x9E;
   1435   1.1     bjh21     if ( 0 <= shiftCount ) {
   1436   1.1     bjh21         if ( a != 0xCF000000 ) {
   1437   1.1     bjh21             float_raise( float_flag_invalid );
   1438   1.1     bjh21             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
   1439   1.1     bjh21         }
   1440   1.1     bjh21         return (sbits32) 0x80000000;
   1441   1.1     bjh21     }
   1442   1.1     bjh21     else if ( aExp <= 0x7E ) {
   1443  1.12      matt         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1444   1.1     bjh21         return 0;
   1445   1.1     bjh21     }
   1446   1.1     bjh21     aSig = ( aSig | 0x00800000 )<<8;
   1447   1.1     bjh21     z = aSig>>( - shiftCount );
   1448   1.1     bjh21     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
   1449  1.12      matt         set_float_exception_inexact_flag();
   1450   1.1     bjh21     }
   1451   1.1     bjh21     if ( aSign ) z = - z;
   1452   1.1     bjh21     return z;
   1453   1.1     bjh21 
   1454   1.1     bjh21 }
   1455   1.1     bjh21 
   1456   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
   1457   1.1     bjh21 /*
   1458   1.1     bjh21 -------------------------------------------------------------------------------
   1459   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1460   1.1     bjh21 `a' to the 64-bit two's complement integer format.  The conversion is
   1461   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1462   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   1463   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   1464   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   1465   1.1     bjh21 largest integer with the same sign as `a' is returned.
   1466   1.1     bjh21 -------------------------------------------------------------------------------
   1467   1.1     bjh21 */
   1468   1.1     bjh21 int64 float32_to_int64( float32 a )
   1469   1.1     bjh21 {
   1470   1.1     bjh21     flag aSign;
   1471   1.1     bjh21     int16 aExp, shiftCount;
   1472   1.1     bjh21     bits32 aSig;
   1473   1.1     bjh21     bits64 aSig64, aSigExtra;
   1474   1.1     bjh21 
   1475   1.1     bjh21     aSig = extractFloat32Frac( a );
   1476   1.1     bjh21     aExp = extractFloat32Exp( a );
   1477   1.1     bjh21     aSign = extractFloat32Sign( a );
   1478   1.1     bjh21     shiftCount = 0xBE - aExp;
   1479   1.1     bjh21     if ( shiftCount < 0 ) {
   1480   1.1     bjh21         float_raise( float_flag_invalid );
   1481   1.1     bjh21         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1482   1.1     bjh21             return LIT64( 0x7FFFFFFFFFFFFFFF );
   1483   1.1     bjh21         }
   1484   1.1     bjh21         return (sbits64) LIT64( 0x8000000000000000 );
   1485   1.1     bjh21     }
   1486   1.1     bjh21     if ( aExp ) aSig |= 0x00800000;
   1487   1.1     bjh21     aSig64 = aSig;
   1488   1.1     bjh21     aSig64 <<= 40;
   1489   1.1     bjh21     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
   1490   1.1     bjh21     return roundAndPackInt64( aSign, aSig64, aSigExtra );
   1491   1.1     bjh21 
   1492   1.1     bjh21 }
   1493   1.1     bjh21 
   1494   1.1     bjh21 /*
   1495   1.1     bjh21 -------------------------------------------------------------------------------
   1496   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1497   1.1     bjh21 `a' to the 64-bit two's complement integer format.  The conversion is
   1498   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1499   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.  If
   1500   1.1     bjh21 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   1501   1.1     bjh21 conversion overflows, the largest integer with the same sign as `a' is
   1502   1.1     bjh21 returned.
   1503   1.1     bjh21 -------------------------------------------------------------------------------
   1504   1.1     bjh21 */
   1505   1.1     bjh21 int64 float32_to_int64_round_to_zero( float32 a )
   1506   1.1     bjh21 {
   1507   1.1     bjh21     flag aSign;
   1508   1.1     bjh21     int16 aExp, shiftCount;
   1509   1.1     bjh21     bits32 aSig;
   1510   1.1     bjh21     bits64 aSig64;
   1511   1.1     bjh21     int64 z;
   1512   1.1     bjh21 
   1513   1.1     bjh21     aSig = extractFloat32Frac( a );
   1514   1.1     bjh21     aExp = extractFloat32Exp( a );
   1515   1.1     bjh21     aSign = extractFloat32Sign( a );
   1516   1.1     bjh21     shiftCount = aExp - 0xBE;
   1517   1.1     bjh21     if ( 0 <= shiftCount ) {
   1518   1.1     bjh21         if ( a != 0xDF000000 ) {
   1519   1.1     bjh21             float_raise( float_flag_invalid );
   1520   1.1     bjh21             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
   1521   1.1     bjh21                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   1522   1.1     bjh21             }
   1523   1.1     bjh21         }
   1524   1.1     bjh21         return (sbits64) LIT64( 0x8000000000000000 );
   1525   1.1     bjh21     }
   1526   1.1     bjh21     else if ( aExp <= 0x7E ) {
   1527  1.12      matt         if ( aExp | aSig ) set_float_exception_inexact_flag();
   1528   1.1     bjh21         return 0;
   1529   1.1     bjh21     }
   1530   1.1     bjh21     aSig64 = aSig | 0x00800000;
   1531   1.1     bjh21     aSig64 <<= 40;
   1532   1.1     bjh21     z = aSig64>>( - shiftCount );
   1533   1.1     bjh21     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
   1534  1.12      matt         set_float_exception_inexact_flag();
   1535   1.1     bjh21     }
   1536   1.1     bjh21     if ( aSign ) z = - z;
   1537   1.1     bjh21     return z;
   1538   1.1     bjh21 
   1539   1.1     bjh21 }
   1540   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1541   1.1     bjh21 
   1542   1.1     bjh21 /*
   1543   1.1     bjh21 -------------------------------------------------------------------------------
   1544   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1545   1.1     bjh21 `a' to the double-precision floating-point format.  The conversion is
   1546   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1547   1.1     bjh21 Arithmetic.
   1548   1.1     bjh21 -------------------------------------------------------------------------------
   1549   1.1     bjh21 */
   1550   1.1     bjh21 float64 float32_to_float64( float32 a )
   1551   1.1     bjh21 {
   1552   1.1     bjh21     flag aSign;
   1553   1.1     bjh21     int16 aExp;
   1554   1.1     bjh21     bits32 aSig;
   1555   1.1     bjh21 
   1556   1.1     bjh21     aSig = extractFloat32Frac( a );
   1557   1.1     bjh21     aExp = extractFloat32Exp( a );
   1558   1.1     bjh21     aSign = extractFloat32Sign( a );
   1559   1.1     bjh21     if ( aExp == 0xFF ) {
   1560   1.1     bjh21         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
   1561   1.1     bjh21         return packFloat64( aSign, 0x7FF, 0 );
   1562   1.1     bjh21     }
   1563   1.1     bjh21     if ( aExp == 0 ) {
   1564   1.1     bjh21         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
   1565   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1566   1.1     bjh21         --aExp;
   1567   1.1     bjh21     }
   1568   1.1     bjh21     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
   1569   1.1     bjh21 
   1570   1.1     bjh21 }
   1571   1.1     bjh21 
   1572   1.1     bjh21 #ifdef FLOATX80
   1573   1.1     bjh21 
   1574   1.1     bjh21 /*
   1575   1.1     bjh21 -------------------------------------------------------------------------------
   1576   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1577   1.1     bjh21 `a' to the extended double-precision floating-point format.  The conversion
   1578   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1579   1.1     bjh21 Arithmetic.
   1580   1.1     bjh21 -------------------------------------------------------------------------------
   1581   1.1     bjh21 */
   1582   1.1     bjh21 floatx80 float32_to_floatx80( float32 a )
   1583   1.1     bjh21 {
   1584   1.1     bjh21     flag aSign;
   1585   1.1     bjh21     int16 aExp;
   1586   1.1     bjh21     bits32 aSig;
   1587   1.1     bjh21 
   1588   1.1     bjh21     aSig = extractFloat32Frac( a );
   1589   1.1     bjh21     aExp = extractFloat32Exp( a );
   1590   1.1     bjh21     aSign = extractFloat32Sign( a );
   1591   1.1     bjh21     if ( aExp == 0xFF ) {
   1592   1.1     bjh21         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
   1593   1.1     bjh21         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   1594   1.1     bjh21     }
   1595   1.1     bjh21     if ( aExp == 0 ) {
   1596   1.1     bjh21         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   1597   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1598   1.1     bjh21     }
   1599   1.1     bjh21     aSig |= 0x00800000;
   1600   1.1     bjh21     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
   1601   1.1     bjh21 
   1602   1.1     bjh21 }
   1603   1.1     bjh21 
   1604   1.1     bjh21 #endif
   1605   1.1     bjh21 
   1606   1.1     bjh21 #ifdef FLOAT128
   1607   1.1     bjh21 
   1608   1.1     bjh21 /*
   1609   1.1     bjh21 -------------------------------------------------------------------------------
   1610   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   1611   1.1     bjh21 `a' to the double-precision floating-point format.  The conversion is
   1612   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1613   1.1     bjh21 Arithmetic.
   1614   1.1     bjh21 -------------------------------------------------------------------------------
   1615   1.1     bjh21 */
   1616   1.1     bjh21 float128 float32_to_float128( float32 a )
   1617   1.1     bjh21 {
   1618   1.1     bjh21     flag aSign;
   1619   1.1     bjh21     int16 aExp;
   1620   1.1     bjh21     bits32 aSig;
   1621   1.1     bjh21 
   1622   1.1     bjh21     aSig = extractFloat32Frac( a );
   1623   1.1     bjh21     aExp = extractFloat32Exp( a );
   1624   1.1     bjh21     aSign = extractFloat32Sign( a );
   1625   1.1     bjh21     if ( aExp == 0xFF ) {
   1626   1.1     bjh21         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
   1627   1.1     bjh21         return packFloat128( aSign, 0x7FFF, 0, 0 );
   1628   1.1     bjh21     }
   1629   1.1     bjh21     if ( aExp == 0 ) {
   1630   1.1     bjh21         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   1631   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1632   1.1     bjh21         --aExp;
   1633   1.1     bjh21     }
   1634   1.1     bjh21     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
   1635   1.1     bjh21 
   1636   1.1     bjh21 }
   1637   1.1     bjh21 
   1638   1.1     bjh21 #endif
   1639   1.1     bjh21 
   1640   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1641   1.1     bjh21 /*
   1642   1.1     bjh21 -------------------------------------------------------------------------------
   1643   1.1     bjh21 Rounds the single-precision floating-point value `a' to an integer, and
   1644   1.1     bjh21 returns the result as a single-precision floating-point value.  The
   1645   1.1     bjh21 operation is performed according to the IEC/IEEE Standard for Binary
   1646   1.1     bjh21 Floating-Point Arithmetic.
   1647   1.1     bjh21 -------------------------------------------------------------------------------
   1648   1.1     bjh21 */
   1649   1.1     bjh21 float32 float32_round_to_int( float32 a )
   1650   1.1     bjh21 {
   1651   1.1     bjh21     flag aSign;
   1652   1.1     bjh21     int16 aExp;
   1653   1.1     bjh21     bits32 lastBitMask, roundBitsMask;
   1654   1.1     bjh21     int8 roundingMode;
   1655   1.1     bjh21     float32 z;
   1656   1.1     bjh21 
   1657   1.1     bjh21     aExp = extractFloat32Exp( a );
   1658   1.1     bjh21     if ( 0x96 <= aExp ) {
   1659   1.1     bjh21         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
   1660   1.1     bjh21             return propagateFloat32NaN( a, a );
   1661   1.1     bjh21         }
   1662   1.1     bjh21         return a;
   1663   1.1     bjh21     }
   1664   1.1     bjh21     if ( aExp <= 0x7E ) {
   1665   1.1     bjh21         if ( (bits32) ( a<<1 ) == 0 ) return a;
   1666  1.12      matt         set_float_exception_inexact_flag();
   1667   1.1     bjh21         aSign = extractFloat32Sign( a );
   1668   1.1     bjh21         switch ( float_rounding_mode ) {
   1669   1.1     bjh21          case float_round_nearest_even:
   1670   1.1     bjh21             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
   1671   1.1     bjh21                 return packFloat32( aSign, 0x7F, 0 );
   1672   1.1     bjh21             }
   1673   1.1     bjh21             break;
   1674   1.1     bjh21 	 case float_round_to_zero:
   1675   1.1     bjh21 	    break;
   1676   1.1     bjh21          case float_round_down:
   1677   1.1     bjh21             return aSign ? 0xBF800000 : 0;
   1678   1.1     bjh21          case float_round_up:
   1679   1.1     bjh21             return aSign ? 0x80000000 : 0x3F800000;
   1680   1.1     bjh21         }
   1681   1.1     bjh21         return packFloat32( aSign, 0, 0 );
   1682   1.1     bjh21     }
   1683   1.1     bjh21     lastBitMask = 1;
   1684   1.1     bjh21     lastBitMask <<= 0x96 - aExp;
   1685   1.1     bjh21     roundBitsMask = lastBitMask - 1;
   1686   1.1     bjh21     z = a;
   1687   1.1     bjh21     roundingMode = float_rounding_mode;
   1688   1.1     bjh21     if ( roundingMode == float_round_nearest_even ) {
   1689   1.1     bjh21         z += lastBitMask>>1;
   1690   1.1     bjh21         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   1691   1.1     bjh21     }
   1692   1.1     bjh21     else if ( roundingMode != float_round_to_zero ) {
   1693   1.1     bjh21         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   1694   1.1     bjh21             z += roundBitsMask;
   1695   1.1     bjh21         }
   1696   1.1     bjh21     }
   1697   1.1     bjh21     z &= ~ roundBitsMask;
   1698  1.12      matt     if ( z != a ) set_float_exception_inexact_flag();
   1699   1.1     bjh21     return z;
   1700   1.1     bjh21 
   1701   1.1     bjh21 }
   1702   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   1703   1.1     bjh21 
   1704   1.1     bjh21 /*
   1705   1.1     bjh21 -------------------------------------------------------------------------------
   1706   1.1     bjh21 Returns the result of adding the absolute values of the single-precision
   1707   1.1     bjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1708   1.1     bjh21 before being returned.  `zSign' is ignored if the result is a NaN.
   1709   1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
   1710   1.1     bjh21 Floating-Point Arithmetic.
   1711   1.1     bjh21 -------------------------------------------------------------------------------
   1712   1.1     bjh21 */
   1713   1.1     bjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
   1714   1.1     bjh21 {
   1715   1.1     bjh21     int16 aExp, bExp, zExp;
   1716   1.1     bjh21     bits32 aSig, bSig, zSig;
   1717   1.1     bjh21     int16 expDiff;
   1718   1.1     bjh21 
   1719   1.1     bjh21     aSig = extractFloat32Frac( a );
   1720   1.1     bjh21     aExp = extractFloat32Exp( a );
   1721   1.1     bjh21     bSig = extractFloat32Frac( b );
   1722   1.1     bjh21     bExp = extractFloat32Exp( b );
   1723   1.1     bjh21     expDiff = aExp - bExp;
   1724   1.1     bjh21     aSig <<= 6;
   1725   1.1     bjh21     bSig <<= 6;
   1726   1.1     bjh21     if ( 0 < expDiff ) {
   1727   1.1     bjh21         if ( aExp == 0xFF ) {
   1728   1.1     bjh21             if ( aSig ) return propagateFloat32NaN( a, b );
   1729   1.1     bjh21             return a;
   1730   1.1     bjh21         }
   1731   1.1     bjh21         if ( bExp == 0 ) {
   1732   1.1     bjh21             --expDiff;
   1733   1.1     bjh21         }
   1734   1.1     bjh21         else {
   1735   1.1     bjh21             bSig |= 0x20000000;
   1736   1.1     bjh21         }
   1737   1.1     bjh21         shift32RightJamming( bSig, expDiff, &bSig );
   1738   1.1     bjh21         zExp = aExp;
   1739   1.1     bjh21     }
   1740   1.1     bjh21     else if ( expDiff < 0 ) {
   1741   1.1     bjh21         if ( bExp == 0xFF ) {
   1742   1.1     bjh21             if ( bSig ) return propagateFloat32NaN( a, b );
   1743   1.1     bjh21             return packFloat32( zSign, 0xFF, 0 );
   1744   1.1     bjh21         }
   1745   1.1     bjh21         if ( aExp == 0 ) {
   1746   1.1     bjh21             ++expDiff;
   1747   1.1     bjh21         }
   1748   1.1     bjh21         else {
   1749   1.1     bjh21             aSig |= 0x20000000;
   1750   1.1     bjh21         }
   1751   1.1     bjh21         shift32RightJamming( aSig, - expDiff, &aSig );
   1752   1.1     bjh21         zExp = bExp;
   1753   1.1     bjh21     }
   1754   1.1     bjh21     else {
   1755   1.1     bjh21         if ( aExp == 0xFF ) {
   1756   1.1     bjh21             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1757   1.1     bjh21             return a;
   1758   1.1     bjh21         }
   1759   1.1     bjh21         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
   1760   1.1     bjh21         zSig = 0x40000000 + aSig + bSig;
   1761   1.1     bjh21         zExp = aExp;
   1762   1.1     bjh21         goto roundAndPack;
   1763   1.1     bjh21     }
   1764   1.1     bjh21     aSig |= 0x20000000;
   1765   1.1     bjh21     zSig = ( aSig + bSig )<<1;
   1766   1.1     bjh21     --zExp;
   1767   1.1     bjh21     if ( (sbits32) zSig < 0 ) {
   1768   1.1     bjh21         zSig = aSig + bSig;
   1769   1.1     bjh21         ++zExp;
   1770   1.1     bjh21     }
   1771   1.1     bjh21  roundAndPack:
   1772   1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig );
   1773   1.1     bjh21 
   1774   1.1     bjh21 }
   1775   1.1     bjh21 
   1776   1.1     bjh21 /*
   1777   1.1     bjh21 -------------------------------------------------------------------------------
   1778   1.1     bjh21 Returns the result of subtracting the absolute values of the single-
   1779   1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1780   1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
   1781   1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1782   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   1783   1.1     bjh21 -------------------------------------------------------------------------------
   1784   1.1     bjh21 */
   1785   1.1     bjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
   1786   1.1     bjh21 {
   1787   1.1     bjh21     int16 aExp, bExp, zExp;
   1788   1.1     bjh21     bits32 aSig, bSig, zSig;
   1789   1.1     bjh21     int16 expDiff;
   1790   1.1     bjh21 
   1791   1.1     bjh21     aSig = extractFloat32Frac( a );
   1792   1.1     bjh21     aExp = extractFloat32Exp( a );
   1793   1.1     bjh21     bSig = extractFloat32Frac( b );
   1794   1.1     bjh21     bExp = extractFloat32Exp( b );
   1795   1.1     bjh21     expDiff = aExp - bExp;
   1796   1.1     bjh21     aSig <<= 7;
   1797   1.1     bjh21     bSig <<= 7;
   1798   1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
   1799   1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
   1800   1.1     bjh21     if ( aExp == 0xFF ) {
   1801   1.1     bjh21         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
   1802   1.1     bjh21         float_raise( float_flag_invalid );
   1803   1.1     bjh21         return float32_default_nan;
   1804   1.1     bjh21     }
   1805   1.1     bjh21     if ( aExp == 0 ) {
   1806   1.1     bjh21         aExp = 1;
   1807   1.1     bjh21         bExp = 1;
   1808   1.1     bjh21     }
   1809   1.1     bjh21     if ( bSig < aSig ) goto aBigger;
   1810   1.1     bjh21     if ( aSig < bSig ) goto bBigger;
   1811   1.1     bjh21     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
   1812   1.1     bjh21  bExpBigger:
   1813   1.1     bjh21     if ( bExp == 0xFF ) {
   1814   1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1815   1.1     bjh21         return packFloat32( zSign ^ 1, 0xFF, 0 );
   1816   1.1     bjh21     }
   1817   1.1     bjh21     if ( aExp == 0 ) {
   1818   1.1     bjh21         ++expDiff;
   1819   1.1     bjh21     }
   1820   1.1     bjh21     else {
   1821   1.1     bjh21         aSig |= 0x40000000;
   1822   1.1     bjh21     }
   1823   1.1     bjh21     shift32RightJamming( aSig, - expDiff, &aSig );
   1824   1.1     bjh21     bSig |= 0x40000000;
   1825   1.1     bjh21  bBigger:
   1826   1.1     bjh21     zSig = bSig - aSig;
   1827   1.1     bjh21     zExp = bExp;
   1828   1.1     bjh21     zSign ^= 1;
   1829   1.1     bjh21     goto normalizeRoundAndPack;
   1830   1.1     bjh21  aExpBigger:
   1831   1.1     bjh21     if ( aExp == 0xFF ) {
   1832   1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, b );
   1833   1.1     bjh21         return a;
   1834   1.1     bjh21     }
   1835   1.1     bjh21     if ( bExp == 0 ) {
   1836   1.1     bjh21         --expDiff;
   1837   1.1     bjh21     }
   1838   1.1     bjh21     else {
   1839   1.1     bjh21         bSig |= 0x40000000;
   1840   1.1     bjh21     }
   1841   1.1     bjh21     shift32RightJamming( bSig, expDiff, &bSig );
   1842   1.1     bjh21     aSig |= 0x40000000;
   1843   1.1     bjh21  aBigger:
   1844   1.1     bjh21     zSig = aSig - bSig;
   1845   1.1     bjh21     zExp = aExp;
   1846   1.1     bjh21  normalizeRoundAndPack:
   1847   1.1     bjh21     --zExp;
   1848   1.1     bjh21     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
   1849   1.1     bjh21 
   1850   1.1     bjh21 }
   1851   1.1     bjh21 
   1852   1.1     bjh21 /*
   1853   1.1     bjh21 -------------------------------------------------------------------------------
   1854   1.1     bjh21 Returns the result of adding the single-precision floating-point values `a'
   1855   1.1     bjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1856   1.1     bjh21 Binary Floating-Point Arithmetic.
   1857   1.1     bjh21 -------------------------------------------------------------------------------
   1858   1.1     bjh21 */
   1859   1.1     bjh21 float32 float32_add( float32 a, float32 b )
   1860   1.1     bjh21 {
   1861   1.1     bjh21     flag aSign, bSign;
   1862   1.1     bjh21 
   1863   1.1     bjh21     aSign = extractFloat32Sign( a );
   1864   1.1     bjh21     bSign = extractFloat32Sign( b );
   1865   1.1     bjh21     if ( aSign == bSign ) {
   1866   1.1     bjh21         return addFloat32Sigs( a, b, aSign );
   1867   1.1     bjh21     }
   1868   1.1     bjh21     else {
   1869   1.1     bjh21         return subFloat32Sigs( a, b, aSign );
   1870   1.1     bjh21     }
   1871   1.1     bjh21 
   1872   1.1     bjh21 }
   1873   1.1     bjh21 
   1874   1.1     bjh21 /*
   1875   1.1     bjh21 -------------------------------------------------------------------------------
   1876   1.1     bjh21 Returns the result of subtracting the single-precision floating-point values
   1877   1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1878   1.1     bjh21 for Binary Floating-Point Arithmetic.
   1879   1.1     bjh21 -------------------------------------------------------------------------------
   1880   1.1     bjh21 */
   1881   1.1     bjh21 float32 float32_sub( float32 a, float32 b )
   1882   1.1     bjh21 {
   1883   1.1     bjh21     flag aSign, bSign;
   1884   1.1     bjh21 
   1885   1.1     bjh21     aSign = extractFloat32Sign( a );
   1886   1.1     bjh21     bSign = extractFloat32Sign( b );
   1887   1.1     bjh21     if ( aSign == bSign ) {
   1888   1.1     bjh21         return subFloat32Sigs( a, b, aSign );
   1889   1.1     bjh21     }
   1890   1.1     bjh21     else {
   1891   1.1     bjh21         return addFloat32Sigs( a, b, aSign );
   1892   1.1     bjh21     }
   1893   1.1     bjh21 
   1894   1.1     bjh21 }
   1895   1.1     bjh21 
   1896   1.1     bjh21 /*
   1897   1.1     bjh21 -------------------------------------------------------------------------------
   1898   1.1     bjh21 Returns the result of multiplying the single-precision floating-point values
   1899   1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1900   1.1     bjh21 for Binary Floating-Point Arithmetic.
   1901   1.1     bjh21 -------------------------------------------------------------------------------
   1902   1.1     bjh21 */
   1903   1.1     bjh21 float32 float32_mul( float32 a, float32 b )
   1904   1.1     bjh21 {
   1905   1.1     bjh21     flag aSign, bSign, zSign;
   1906   1.1     bjh21     int16 aExp, bExp, zExp;
   1907   1.1     bjh21     bits32 aSig, bSig;
   1908   1.1     bjh21     bits64 zSig64;
   1909   1.1     bjh21     bits32 zSig;
   1910   1.1     bjh21 
   1911   1.1     bjh21     aSig = extractFloat32Frac( a );
   1912   1.1     bjh21     aExp = extractFloat32Exp( a );
   1913   1.1     bjh21     aSign = extractFloat32Sign( a );
   1914   1.1     bjh21     bSig = extractFloat32Frac( b );
   1915   1.1     bjh21     bExp = extractFloat32Exp( b );
   1916   1.1     bjh21     bSign = extractFloat32Sign( b );
   1917   1.1     bjh21     zSign = aSign ^ bSign;
   1918   1.1     bjh21     if ( aExp == 0xFF ) {
   1919   1.1     bjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1920   1.1     bjh21             return propagateFloat32NaN( a, b );
   1921   1.1     bjh21         }
   1922   1.1     bjh21         if ( ( bExp | bSig ) == 0 ) {
   1923   1.1     bjh21             float_raise( float_flag_invalid );
   1924   1.1     bjh21             return float32_default_nan;
   1925   1.1     bjh21         }
   1926   1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1927   1.1     bjh21     }
   1928   1.1     bjh21     if ( bExp == 0xFF ) {
   1929   1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1930   1.1     bjh21         if ( ( aExp | aSig ) == 0 ) {
   1931   1.1     bjh21             float_raise( float_flag_invalid );
   1932   1.1     bjh21             return float32_default_nan;
   1933   1.1     bjh21         }
   1934   1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1935   1.1     bjh21     }
   1936   1.1     bjh21     if ( aExp == 0 ) {
   1937   1.1     bjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1938   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1939   1.1     bjh21     }
   1940   1.1     bjh21     if ( bExp == 0 ) {
   1941   1.1     bjh21         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1942   1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1943   1.1     bjh21     }
   1944   1.1     bjh21     zExp = aExp + bExp - 0x7F;
   1945   1.1     bjh21     aSig = ( aSig | 0x00800000 )<<7;
   1946   1.1     bjh21     bSig = ( bSig | 0x00800000 )<<8;
   1947   1.1     bjh21     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
   1948  1.10  christos     zSig = (bits32)zSig64;
   1949   1.1     bjh21     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
   1950   1.1     bjh21         zSig <<= 1;
   1951   1.1     bjh21         --zExp;
   1952   1.1     bjh21     }
   1953   1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig );
   1954   1.1     bjh21 
   1955   1.1     bjh21 }
   1956   1.1     bjh21 
   1957   1.1     bjh21 /*
   1958   1.1     bjh21 -------------------------------------------------------------------------------
   1959   1.1     bjh21 Returns the result of dividing the single-precision floating-point value `a'
   1960   1.1     bjh21 by the corresponding value `b'.  The operation is performed according to the
   1961   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1962   1.1     bjh21 -------------------------------------------------------------------------------
   1963   1.1     bjh21 */
   1964   1.1     bjh21 float32 float32_div( float32 a, float32 b )
   1965   1.1     bjh21 {
   1966   1.1     bjh21     flag aSign, bSign, zSign;
   1967   1.1     bjh21     int16 aExp, bExp, zExp;
   1968   1.1     bjh21     bits32 aSig, bSig, zSig;
   1969   1.1     bjh21 
   1970   1.1     bjh21     aSig = extractFloat32Frac( a );
   1971   1.1     bjh21     aExp = extractFloat32Exp( a );
   1972   1.1     bjh21     aSign = extractFloat32Sign( a );
   1973   1.1     bjh21     bSig = extractFloat32Frac( b );
   1974   1.1     bjh21     bExp = extractFloat32Exp( b );
   1975   1.1     bjh21     bSign = extractFloat32Sign( b );
   1976   1.1     bjh21     zSign = aSign ^ bSign;
   1977   1.1     bjh21     if ( aExp == 0xFF ) {
   1978   1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, b );
   1979   1.1     bjh21         if ( bExp == 0xFF ) {
   1980   1.1     bjh21             if ( bSig ) return propagateFloat32NaN( a, b );
   1981   1.1     bjh21             float_raise( float_flag_invalid );
   1982   1.1     bjh21             return float32_default_nan;
   1983   1.1     bjh21         }
   1984   1.1     bjh21         return packFloat32( zSign, 0xFF, 0 );
   1985   1.1     bjh21     }
   1986   1.1     bjh21     if ( bExp == 0xFF ) {
   1987   1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   1988   1.1     bjh21         return packFloat32( zSign, 0, 0 );
   1989   1.1     bjh21     }
   1990   1.1     bjh21     if ( bExp == 0 ) {
   1991   1.1     bjh21         if ( bSig == 0 ) {
   1992   1.1     bjh21             if ( ( aExp | aSig ) == 0 ) {
   1993   1.1     bjh21                 float_raise( float_flag_invalid );
   1994   1.1     bjh21                 return float32_default_nan;
   1995   1.1     bjh21             }
   1996   1.1     bjh21             float_raise( float_flag_divbyzero );
   1997   1.1     bjh21             return packFloat32( zSign, 0xFF, 0 );
   1998   1.1     bjh21         }
   1999   1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2000   1.1     bjh21     }
   2001   1.1     bjh21     if ( aExp == 0 ) {
   2002   1.1     bjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   2003   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2004   1.1     bjh21     }
   2005   1.1     bjh21     zExp = aExp - bExp + 0x7D;
   2006   1.1     bjh21     aSig = ( aSig | 0x00800000 )<<7;
   2007   1.1     bjh21     bSig = ( bSig | 0x00800000 )<<8;
   2008   1.1     bjh21     if ( bSig <= ( aSig + aSig ) ) {
   2009   1.1     bjh21         aSig >>= 1;
   2010   1.1     bjh21         ++zExp;
   2011   1.1     bjh21     }
   2012  1.11      matt     zSig = (bits32)((((bits64) aSig) << 32) / bSig);
   2013   1.1     bjh21     if ( ( zSig & 0x3F ) == 0 ) {
   2014   1.1     bjh21         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
   2015   1.1     bjh21     }
   2016   1.1     bjh21     return roundAndPackFloat32( zSign, zExp, zSig );
   2017   1.1     bjh21 
   2018   1.1     bjh21 }
   2019   1.1     bjh21 
   2020   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2021   1.1     bjh21 /*
   2022   1.1     bjh21 -------------------------------------------------------------------------------
   2023   1.1     bjh21 Returns the remainder of the single-precision floating-point value `a'
   2024   1.1     bjh21 with respect to the corresponding value `b'.  The operation is performed
   2025   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2026   1.1     bjh21 -------------------------------------------------------------------------------
   2027   1.1     bjh21 */
   2028   1.1     bjh21 float32 float32_rem( float32 a, float32 b )
   2029   1.1     bjh21 {
   2030   1.1     bjh21     flag aSign, bSign, zSign;
   2031   1.1     bjh21     int16 aExp, bExp, expDiff;
   2032   1.1     bjh21     bits32 aSig, bSig;
   2033   1.1     bjh21     bits32 q;
   2034   1.1     bjh21     bits64 aSig64, bSig64, q64;
   2035   1.1     bjh21     bits32 alternateASig;
   2036   1.1     bjh21     sbits32 sigMean;
   2037   1.1     bjh21 
   2038   1.1     bjh21     aSig = extractFloat32Frac( a );
   2039   1.1     bjh21     aExp = extractFloat32Exp( a );
   2040   1.1     bjh21     aSign = extractFloat32Sign( a );
   2041   1.1     bjh21     bSig = extractFloat32Frac( b );
   2042   1.1     bjh21     bExp = extractFloat32Exp( b );
   2043   1.1     bjh21     bSign = extractFloat32Sign( b );
   2044   1.1     bjh21     if ( aExp == 0xFF ) {
   2045   1.1     bjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   2046   1.1     bjh21             return propagateFloat32NaN( a, b );
   2047   1.1     bjh21         }
   2048   1.1     bjh21         float_raise( float_flag_invalid );
   2049   1.1     bjh21         return float32_default_nan;
   2050   1.1     bjh21     }
   2051   1.1     bjh21     if ( bExp == 0xFF ) {
   2052   1.1     bjh21         if ( bSig ) return propagateFloat32NaN( a, b );
   2053   1.1     bjh21         return a;
   2054   1.1     bjh21     }
   2055   1.1     bjh21     if ( bExp == 0 ) {
   2056   1.1     bjh21         if ( bSig == 0 ) {
   2057   1.1     bjh21             float_raise( float_flag_invalid );
   2058   1.1     bjh21             return float32_default_nan;
   2059   1.1     bjh21         }
   2060   1.1     bjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   2061   1.1     bjh21     }
   2062   1.1     bjh21     if ( aExp == 0 ) {
   2063   1.1     bjh21         if ( aSig == 0 ) return a;
   2064   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2065   1.1     bjh21     }
   2066   1.1     bjh21     expDiff = aExp - bExp;
   2067   1.1     bjh21     aSig |= 0x00800000;
   2068   1.1     bjh21     bSig |= 0x00800000;
   2069   1.1     bjh21     if ( expDiff < 32 ) {
   2070   1.1     bjh21         aSig <<= 8;
   2071   1.1     bjh21         bSig <<= 8;
   2072   1.1     bjh21         if ( expDiff < 0 ) {
   2073   1.1     bjh21             if ( expDiff < -1 ) return a;
   2074   1.1     bjh21             aSig >>= 1;
   2075   1.1     bjh21         }
   2076   1.1     bjh21         q = ( bSig <= aSig );
   2077   1.1     bjh21         if ( q ) aSig -= bSig;
   2078   1.1     bjh21         if ( 0 < expDiff ) {
   2079   1.1     bjh21             q = ( ( (bits64) aSig )<<32 ) / bSig;
   2080   1.1     bjh21             q >>= 32 - expDiff;
   2081   1.1     bjh21             bSig >>= 2;
   2082   1.1     bjh21             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   2083   1.1     bjh21         }
   2084   1.1     bjh21         else {
   2085   1.1     bjh21             aSig >>= 2;
   2086   1.1     bjh21             bSig >>= 2;
   2087   1.1     bjh21         }
   2088   1.1     bjh21     }
   2089   1.1     bjh21     else {
   2090   1.1     bjh21         if ( bSig <= aSig ) aSig -= bSig;
   2091   1.1     bjh21         aSig64 = ( (bits64) aSig )<<40;
   2092   1.1     bjh21         bSig64 = ( (bits64) bSig )<<40;
   2093   1.1     bjh21         expDiff -= 64;
   2094   1.1     bjh21         while ( 0 < expDiff ) {
   2095   1.1     bjh21             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2096   1.1     bjh21             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2097   1.1     bjh21             aSig64 = - ( ( bSig * q64 )<<38 );
   2098   1.1     bjh21             expDiff -= 62;
   2099   1.1     bjh21         }
   2100   1.1     bjh21         expDiff += 64;
   2101   1.1     bjh21         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
   2102   1.1     bjh21         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
   2103   1.1     bjh21         q = q64>>( 64 - expDiff );
   2104   1.1     bjh21         bSig <<= 6;
   2105   1.1     bjh21         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
   2106   1.1     bjh21     }
   2107   1.1     bjh21     do {
   2108   1.1     bjh21         alternateASig = aSig;
   2109   1.1     bjh21         ++q;
   2110   1.1     bjh21         aSig -= bSig;
   2111   1.1     bjh21     } while ( 0 <= (sbits32) aSig );
   2112   1.1     bjh21     sigMean = aSig + alternateASig;
   2113   1.1     bjh21     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   2114   1.1     bjh21         aSig = alternateASig;
   2115   1.1     bjh21     }
   2116   1.1     bjh21     zSign = ( (sbits32) aSig < 0 );
   2117   1.1     bjh21     if ( zSign ) aSig = - aSig;
   2118   1.1     bjh21     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   2119   1.1     bjh21 
   2120   1.1     bjh21 }
   2121   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   2122   1.1     bjh21 
   2123   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2124   1.1     bjh21 /*
   2125   1.1     bjh21 -------------------------------------------------------------------------------
   2126   1.1     bjh21 Returns the square root of the single-precision floating-point value `a'.
   2127   1.1     bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
   2128   1.1     bjh21 Floating-Point Arithmetic.
   2129   1.1     bjh21 -------------------------------------------------------------------------------
   2130   1.1     bjh21 */
   2131   1.1     bjh21 float32 float32_sqrt( float32 a )
   2132   1.1     bjh21 {
   2133   1.1     bjh21     flag aSign;
   2134   1.1     bjh21     int16 aExp, zExp;
   2135   1.1     bjh21     bits32 aSig, zSig;
   2136   1.1     bjh21     bits64 rem, term;
   2137   1.1     bjh21 
   2138   1.1     bjh21     aSig = extractFloat32Frac( a );
   2139   1.1     bjh21     aExp = extractFloat32Exp( a );
   2140   1.1     bjh21     aSign = extractFloat32Sign( a );
   2141   1.1     bjh21     if ( aExp == 0xFF ) {
   2142   1.1     bjh21         if ( aSig ) return propagateFloat32NaN( a, 0 );
   2143   1.1     bjh21         if ( ! aSign ) return a;
   2144   1.1     bjh21         float_raise( float_flag_invalid );
   2145   1.1     bjh21         return float32_default_nan;
   2146   1.1     bjh21     }
   2147   1.1     bjh21     if ( aSign ) {
   2148   1.1     bjh21         if ( ( aExp | aSig ) == 0 ) return a;
   2149   1.1     bjh21         float_raise( float_flag_invalid );
   2150   1.1     bjh21         return float32_default_nan;
   2151   1.1     bjh21     }
   2152   1.1     bjh21     if ( aExp == 0 ) {
   2153   1.1     bjh21         if ( aSig == 0 ) return 0;
   2154   1.1     bjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   2155   1.1     bjh21     }
   2156   1.1     bjh21     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   2157   1.1     bjh21     aSig = ( aSig | 0x00800000 )<<8;
   2158   1.1     bjh21     zSig = estimateSqrt32( aExp, aSig ) + 2;
   2159   1.1     bjh21     if ( ( zSig & 0x7F ) <= 5 ) {
   2160   1.1     bjh21         if ( zSig < 2 ) {
   2161   1.1     bjh21             zSig = 0x7FFFFFFF;
   2162   1.1     bjh21             goto roundAndPack;
   2163   1.1     bjh21         }
   2164   1.1     bjh21         aSig >>= aExp & 1;
   2165   1.1     bjh21         term = ( (bits64) zSig ) * zSig;
   2166   1.1     bjh21         rem = ( ( (bits64) aSig )<<32 ) - term;
   2167   1.1     bjh21         while ( (sbits64) rem < 0 ) {
   2168   1.1     bjh21             --zSig;
   2169   1.1     bjh21             rem += ( ( (bits64) zSig )<<1 ) | 1;
   2170   1.1     bjh21         }
   2171   1.1     bjh21         zSig |= ( rem != 0 );
   2172   1.1     bjh21     }
   2173   1.1     bjh21     shift32RightJamming( zSig, 1, &zSig );
   2174   1.1     bjh21  roundAndPack:
   2175   1.1     bjh21     return roundAndPackFloat32( 0, zExp, zSig );
   2176   1.1     bjh21 
   2177   1.1     bjh21 }
   2178   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   2179   1.1     bjh21 
   2180   1.1     bjh21 /*
   2181   1.1     bjh21 -------------------------------------------------------------------------------
   2182   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
   2183   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2184   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2185   1.1     bjh21 -------------------------------------------------------------------------------
   2186   1.1     bjh21 */
   2187   1.1     bjh21 flag float32_eq( float32 a, float32 b )
   2188   1.1     bjh21 {
   2189   1.1     bjh21 
   2190   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2191   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2192   1.1     bjh21        ) {
   2193   1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2194   1.1     bjh21             float_raise( float_flag_invalid );
   2195   1.1     bjh21         }
   2196   1.1     bjh21         return 0;
   2197   1.1     bjh21     }
   2198   1.1     bjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2199   1.1     bjh21 
   2200   1.1     bjh21 }
   2201   1.1     bjh21 
   2202   1.1     bjh21 /*
   2203   1.1     bjh21 -------------------------------------------------------------------------------
   2204   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   2205   1.1     bjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2206   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2207   1.1     bjh21 Arithmetic.
   2208   1.1     bjh21 -------------------------------------------------------------------------------
   2209   1.1     bjh21 */
   2210   1.1     bjh21 flag float32_le( float32 a, float32 b )
   2211   1.1     bjh21 {
   2212   1.1     bjh21     flag aSign, bSign;
   2213   1.1     bjh21 
   2214   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2215   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2216   1.1     bjh21        ) {
   2217   1.1     bjh21         float_raise( float_flag_invalid );
   2218   1.1     bjh21         return 0;
   2219   1.1     bjh21     }
   2220   1.1     bjh21     aSign = extractFloat32Sign( a );
   2221   1.1     bjh21     bSign = extractFloat32Sign( b );
   2222   1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2223   1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   2224   1.1     bjh21 
   2225   1.1     bjh21 }
   2226   1.1     bjh21 
   2227   1.1     bjh21 /*
   2228   1.1     bjh21 -------------------------------------------------------------------------------
   2229   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   2230   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2231   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2232   1.1     bjh21 -------------------------------------------------------------------------------
   2233   1.1     bjh21 */
   2234   1.1     bjh21 flag float32_lt( float32 a, float32 b )
   2235   1.1     bjh21 {
   2236   1.1     bjh21     flag aSign, bSign;
   2237   1.1     bjh21 
   2238   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2239   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2240   1.1     bjh21        ) {
   2241   1.1     bjh21         float_raise( float_flag_invalid );
   2242   1.1     bjh21         return 0;
   2243   1.1     bjh21     }
   2244   1.1     bjh21     aSign = extractFloat32Sign( a );
   2245   1.1     bjh21     bSign = extractFloat32Sign( b );
   2246   1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2247   1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   2248   1.1     bjh21 
   2249   1.1     bjh21 }
   2250   1.1     bjh21 
   2251   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2252   1.1     bjh21 /*
   2253   1.1     bjh21 -------------------------------------------------------------------------------
   2254   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is equal to
   2255   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2256   1.1     bjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2257   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2258   1.1     bjh21 -------------------------------------------------------------------------------
   2259   1.1     bjh21 */
   2260   1.1     bjh21 flag float32_eq_signaling( float32 a, float32 b )
   2261   1.1     bjh21 {
   2262   1.1     bjh21 
   2263   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2264   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2265   1.1     bjh21        ) {
   2266   1.1     bjh21         float_raise( float_flag_invalid );
   2267   1.1     bjh21         return 0;
   2268   1.1     bjh21     }
   2269   1.1     bjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2270   1.1     bjh21 
   2271   1.1     bjh21 }
   2272   1.1     bjh21 
   2273   1.1     bjh21 /*
   2274   1.1     bjh21 -------------------------------------------------------------------------------
   2275   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than or
   2276   1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2277   1.1     bjh21 cause an exception.  Otherwise, the comparison is performed according to the
   2278   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2279   1.1     bjh21 -------------------------------------------------------------------------------
   2280   1.1     bjh21 */
   2281   1.1     bjh21 flag float32_le_quiet( float32 a, float32 b )
   2282   1.1     bjh21 {
   2283   1.1     bjh21     flag aSign, bSign;
   2284   1.1     bjh21 
   2285   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2286   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2287   1.1     bjh21        ) {
   2288   1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2289   1.1     bjh21             float_raise( float_flag_invalid );
   2290   1.1     bjh21         }
   2291   1.1     bjh21         return 0;
   2292   1.1     bjh21     }
   2293   1.1     bjh21     aSign = extractFloat32Sign( a );
   2294   1.1     bjh21     bSign = extractFloat32Sign( b );
   2295   1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   2296   1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   2297   1.1     bjh21 
   2298   1.1     bjh21 }
   2299   1.1     bjh21 
   2300   1.1     bjh21 /*
   2301   1.1     bjh21 -------------------------------------------------------------------------------
   2302   1.1     bjh21 Returns 1 if the single-precision floating-point value `a' is less than
   2303   1.1     bjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2304   1.1     bjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2305   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   2306   1.1     bjh21 -------------------------------------------------------------------------------
   2307   1.1     bjh21 */
   2308   1.1     bjh21 flag float32_lt_quiet( float32 a, float32 b )
   2309   1.1     bjh21 {
   2310   1.1     bjh21     flag aSign, bSign;
   2311   1.1     bjh21 
   2312   1.1     bjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   2313   1.1     bjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   2314   1.1     bjh21        ) {
   2315   1.1     bjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   2316   1.1     bjh21             float_raise( float_flag_invalid );
   2317   1.1     bjh21         }
   2318   1.1     bjh21         return 0;
   2319   1.1     bjh21     }
   2320   1.1     bjh21     aSign = extractFloat32Sign( a );
   2321   1.1     bjh21     bSign = extractFloat32Sign( b );
   2322   1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   2323   1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   2324   1.1     bjh21 
   2325   1.1     bjh21 }
   2326   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   2327   1.1     bjh21 
   2328   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2329   1.1     bjh21 /*
   2330   1.1     bjh21 -------------------------------------------------------------------------------
   2331   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2332   1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   2333   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2334   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   2335   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   2336   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   2337   1.1     bjh21 largest integer with the same sign as `a' is returned.
   2338   1.1     bjh21 -------------------------------------------------------------------------------
   2339   1.1     bjh21 */
   2340   1.1     bjh21 int32 float64_to_int32( float64 a )
   2341   1.1     bjh21 {
   2342   1.1     bjh21     flag aSign;
   2343   1.1     bjh21     int16 aExp, shiftCount;
   2344   1.1     bjh21     bits64 aSig;
   2345   1.1     bjh21 
   2346   1.1     bjh21     aSig = extractFloat64Frac( a );
   2347   1.1     bjh21     aExp = extractFloat64Exp( a );
   2348   1.1     bjh21     aSign = extractFloat64Sign( a );
   2349   1.1     bjh21     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2350   1.1     bjh21     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2351   1.1     bjh21     shiftCount = 0x42C - aExp;
   2352   1.1     bjh21     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
   2353   1.1     bjh21     return roundAndPackInt32( aSign, aSig );
   2354   1.1     bjh21 
   2355   1.1     bjh21 }
   2356   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   2357   1.1     bjh21 
   2358   1.1     bjh21 /*
   2359   1.1     bjh21 -------------------------------------------------------------------------------
   2360   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2361   1.1     bjh21 `a' to the 32-bit two's complement integer format.  The conversion is
   2362   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2363   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
   2364   1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2365   1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
   2366   1.1     bjh21 returned.
   2367   1.1     bjh21 -------------------------------------------------------------------------------
   2368   1.1     bjh21 */
   2369   1.1     bjh21 int32 float64_to_int32_round_to_zero( float64 a )
   2370   1.1     bjh21 {
   2371   1.1     bjh21     flag aSign;
   2372   1.1     bjh21     int16 aExp, shiftCount;
   2373   1.1     bjh21     bits64 aSig, savedASig;
   2374   1.1     bjh21     int32 z;
   2375   1.1     bjh21 
   2376   1.1     bjh21     aSig = extractFloat64Frac( a );
   2377   1.1     bjh21     aExp = extractFloat64Exp( a );
   2378   1.1     bjh21     aSign = extractFloat64Sign( a );
   2379   1.1     bjh21     if ( 0x41E < aExp ) {
   2380   1.1     bjh21         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
   2381   1.1     bjh21         goto invalid;
   2382   1.1     bjh21     }
   2383   1.1     bjh21     else if ( aExp < 0x3FF ) {
   2384  1.12      matt         if ( aExp || aSig ) set_float_exception_inexact_flag();
   2385   1.1     bjh21         return 0;
   2386   1.1     bjh21     }
   2387   1.1     bjh21     aSig |= LIT64( 0x0010000000000000 );
   2388   1.1     bjh21     shiftCount = 0x433 - aExp;
   2389   1.1     bjh21     savedASig = aSig;
   2390   1.1     bjh21     aSig >>= shiftCount;
   2391  1.10  christos     z = (int32)aSig;
   2392   1.1     bjh21     if ( aSign ) z = - z;
   2393   1.1     bjh21     if ( ( z < 0 ) ^ aSign ) {
   2394   1.1     bjh21  invalid:
   2395   1.1     bjh21         float_raise( float_flag_invalid );
   2396   1.1     bjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   2397   1.1     bjh21     }
   2398   1.1     bjh21     if ( ( aSig<<shiftCount ) != savedASig ) {
   2399  1.12      matt         set_float_exception_inexact_flag();
   2400   1.1     bjh21     }
   2401   1.1     bjh21     return z;
   2402   1.1     bjh21 
   2403   1.1     bjh21 }
   2404   1.1     bjh21 
   2405   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   2406   1.1     bjh21 /*
   2407   1.1     bjh21 -------------------------------------------------------------------------------
   2408   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2409   1.1     bjh21 `a' to the 64-bit two's complement integer format.  The conversion is
   2410   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2411   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   2412   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   2413   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   2414   1.1     bjh21 largest integer with the same sign as `a' is returned.
   2415   1.1     bjh21 -------------------------------------------------------------------------------
   2416   1.1     bjh21 */
   2417   1.1     bjh21 int64 float64_to_int64( float64 a )
   2418   1.1     bjh21 {
   2419   1.1     bjh21     flag aSign;
   2420   1.1     bjh21     int16 aExp, shiftCount;
   2421   1.1     bjh21     bits64 aSig, aSigExtra;
   2422   1.1     bjh21 
   2423   1.1     bjh21     aSig = extractFloat64Frac( a );
   2424   1.1     bjh21     aExp = extractFloat64Exp( a );
   2425   1.1     bjh21     aSign = extractFloat64Sign( a );
   2426   1.1     bjh21     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2427   1.1     bjh21     shiftCount = 0x433 - aExp;
   2428   1.1     bjh21     if ( shiftCount <= 0 ) {
   2429   1.1     bjh21         if ( 0x43E < aExp ) {
   2430   1.1     bjh21             float_raise( float_flag_invalid );
   2431   1.1     bjh21             if (    ! aSign
   2432   1.1     bjh21                  || (    ( aExp == 0x7FF )
   2433   1.1     bjh21                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2434   1.1     bjh21                ) {
   2435   1.1     bjh21                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   2436   1.1     bjh21             }
   2437   1.1     bjh21             return (sbits64) LIT64( 0x8000000000000000 );
   2438   1.1     bjh21         }
   2439   1.1     bjh21         aSigExtra = 0;
   2440   1.1     bjh21         aSig <<= - shiftCount;
   2441   1.1     bjh21     }
   2442   1.1     bjh21     else {
   2443   1.1     bjh21         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   2444   1.1     bjh21     }
   2445   1.1     bjh21     return roundAndPackInt64( aSign, aSig, aSigExtra );
   2446   1.1     bjh21 
   2447   1.1     bjh21 }
   2448   1.1     bjh21 
   2449   1.1     bjh21 /*
   2450   1.1     bjh21 -------------------------------------------------------------------------------
   2451   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2452   1.1     bjh21 `a' to the 64-bit two's complement integer format.  The conversion is
   2453   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2454   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
   2455   1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   2456   1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
   2457   1.1     bjh21 returned.
   2458   1.1     bjh21 -------------------------------------------------------------------------------
   2459   1.1     bjh21 */
   2460   1.1     bjh21 int64 float64_to_int64_round_to_zero( float64 a )
   2461   1.1     bjh21 {
   2462   1.1     bjh21     flag aSign;
   2463   1.1     bjh21     int16 aExp, shiftCount;
   2464   1.1     bjh21     bits64 aSig;
   2465   1.1     bjh21     int64 z;
   2466   1.1     bjh21 
   2467   1.1     bjh21     aSig = extractFloat64Frac( a );
   2468   1.1     bjh21     aExp = extractFloat64Exp( a );
   2469   1.1     bjh21     aSign = extractFloat64Sign( a );
   2470   1.1     bjh21     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
   2471   1.1     bjh21     shiftCount = aExp - 0x433;
   2472   1.1     bjh21     if ( 0 <= shiftCount ) {
   2473   1.1     bjh21         if ( 0x43E <= aExp ) {
   2474   1.1     bjh21             if ( a != LIT64( 0xC3E0000000000000 ) ) {
   2475   1.1     bjh21                 float_raise( float_flag_invalid );
   2476   1.1     bjh21                 if (    ! aSign
   2477   1.1     bjh21                      || (    ( aExp == 0x7FF )
   2478   1.1     bjh21                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
   2479   1.1     bjh21                    ) {
   2480   1.1     bjh21                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   2481   1.1     bjh21                 }
   2482   1.1     bjh21             }
   2483   1.1     bjh21             return (sbits64) LIT64( 0x8000000000000000 );
   2484   1.1     bjh21         }
   2485   1.1     bjh21         z = aSig<<shiftCount;
   2486   1.1     bjh21     }
   2487   1.1     bjh21     else {
   2488   1.1     bjh21         if ( aExp < 0x3FE ) {
   2489  1.12      matt             if ( aExp | aSig ) set_float_exception_inexact_flag();
   2490   1.1     bjh21             return 0;
   2491   1.1     bjh21         }
   2492   1.1     bjh21         z = aSig>>( - shiftCount );
   2493   1.1     bjh21         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   2494  1.12      matt             set_float_exception_inexact_flag();
   2495   1.1     bjh21         }
   2496   1.1     bjh21     }
   2497   1.1     bjh21     if ( aSign ) z = - z;
   2498   1.1     bjh21     return z;
   2499   1.1     bjh21 
   2500   1.1     bjh21 }
   2501   1.1     bjh21 #endif /* !SOFTFLOAT_FOR_GCC */
   2502   1.1     bjh21 
   2503   1.1     bjh21 /*
   2504   1.1     bjh21 -------------------------------------------------------------------------------
   2505   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2506   1.1     bjh21 `a' to the single-precision floating-point format.  The conversion is
   2507   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2508   1.1     bjh21 Arithmetic.
   2509   1.1     bjh21 -------------------------------------------------------------------------------
   2510   1.1     bjh21 */
   2511   1.1     bjh21 float32 float64_to_float32( float64 a )
   2512   1.1     bjh21 {
   2513   1.1     bjh21     flag aSign;
   2514   1.1     bjh21     int16 aExp;
   2515   1.1     bjh21     bits64 aSig;
   2516   1.1     bjh21     bits32 zSig;
   2517   1.1     bjh21 
   2518   1.1     bjh21     aSig = extractFloat64Frac( a );
   2519   1.1     bjh21     aExp = extractFloat64Exp( a );
   2520   1.1     bjh21     aSign = extractFloat64Sign( a );
   2521   1.1     bjh21     if ( aExp == 0x7FF ) {
   2522   1.1     bjh21         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
   2523   1.1     bjh21         return packFloat32( aSign, 0xFF, 0 );
   2524   1.1     bjh21     }
   2525   1.1     bjh21     shift64RightJamming( aSig, 22, &aSig );
   2526  1.10  christos     zSig = (bits32)aSig;
   2527   1.1     bjh21     if ( aExp || zSig ) {
   2528   1.1     bjh21         zSig |= 0x40000000;
   2529   1.1     bjh21         aExp -= 0x381;
   2530   1.1     bjh21     }
   2531   1.1     bjh21     return roundAndPackFloat32( aSign, aExp, zSig );
   2532   1.1     bjh21 
   2533   1.1     bjh21 }
   2534   1.1     bjh21 
   2535   1.1     bjh21 #ifdef FLOATX80
   2536   1.1     bjh21 
   2537   1.1     bjh21 /*
   2538   1.1     bjh21 -------------------------------------------------------------------------------
   2539   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2540   1.1     bjh21 `a' to the extended double-precision floating-point format.  The conversion
   2541   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2542   1.1     bjh21 Arithmetic.
   2543   1.1     bjh21 -------------------------------------------------------------------------------
   2544   1.1     bjh21 */
   2545   1.1     bjh21 floatx80 float64_to_floatx80( float64 a )
   2546   1.1     bjh21 {
   2547   1.1     bjh21     flag aSign;
   2548   1.1     bjh21     int16 aExp;
   2549   1.1     bjh21     bits64 aSig;
   2550   1.1     bjh21 
   2551   1.1     bjh21     aSig = extractFloat64Frac( a );
   2552   1.1     bjh21     aExp = extractFloat64Exp( a );
   2553   1.1     bjh21     aSign = extractFloat64Sign( a );
   2554   1.1     bjh21     if ( aExp == 0x7FF ) {
   2555   1.1     bjh21         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
   2556   1.1     bjh21         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   2557   1.1     bjh21     }
   2558   1.1     bjh21     if ( aExp == 0 ) {
   2559   1.1     bjh21         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
   2560   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2561   1.1     bjh21     }
   2562   1.1     bjh21     return
   2563   1.1     bjh21         packFloatx80(
   2564   1.1     bjh21             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
   2565   1.1     bjh21 
   2566   1.1     bjh21 }
   2567   1.1     bjh21 
   2568   1.1     bjh21 #endif
   2569   1.1     bjh21 
   2570   1.1     bjh21 #ifdef FLOAT128
   2571   1.1     bjh21 
   2572   1.1     bjh21 /*
   2573   1.1     bjh21 -------------------------------------------------------------------------------
   2574   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   2575   1.1     bjh21 `a' to the quadruple-precision floating-point format.  The conversion is
   2576   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   2577   1.1     bjh21 Arithmetic.
   2578   1.1     bjh21 -------------------------------------------------------------------------------
   2579   1.1     bjh21 */
   2580   1.1     bjh21 float128 float64_to_float128( float64 a )
   2581   1.1     bjh21 {
   2582   1.1     bjh21     flag aSign;
   2583   1.1     bjh21     int16 aExp;
   2584   1.1     bjh21     bits64 aSig, zSig0, zSig1;
   2585   1.1     bjh21 
   2586   1.1     bjh21     aSig = extractFloat64Frac( a );
   2587   1.1     bjh21     aExp = extractFloat64Exp( a );
   2588   1.1     bjh21     aSign = extractFloat64Sign( a );
   2589   1.1     bjh21     if ( aExp == 0x7FF ) {
   2590   1.1     bjh21         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
   2591   1.1     bjh21         return packFloat128( aSign, 0x7FFF, 0, 0 );
   2592   1.1     bjh21     }
   2593   1.1     bjh21     if ( aExp == 0 ) {
   2594   1.1     bjh21         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
   2595   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2596   1.1     bjh21         --aExp;
   2597   1.1     bjh21     }
   2598   1.1     bjh21     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
   2599   1.1     bjh21     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
   2600   1.1     bjh21 
   2601   1.1     bjh21 }
   2602   1.1     bjh21 
   2603   1.1     bjh21 #endif
   2604   1.1     bjh21 
   2605   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   2606   1.1     bjh21 /*
   2607   1.1     bjh21 -------------------------------------------------------------------------------
   2608   1.1     bjh21 Rounds the double-precision floating-point value `a' to an integer, and
   2609   1.1     bjh21 returns the result as a double-precision floating-point value.  The
   2610   1.1     bjh21 operation is performed according to the IEC/IEEE Standard for Binary
   2611   1.1     bjh21 Floating-Point Arithmetic.
   2612   1.1     bjh21 -------------------------------------------------------------------------------
   2613   1.1     bjh21 */
   2614   1.1     bjh21 float64 float64_round_to_int( float64 a )
   2615   1.1     bjh21 {
   2616   1.1     bjh21     flag aSign;
   2617   1.1     bjh21     int16 aExp;
   2618   1.1     bjh21     bits64 lastBitMask, roundBitsMask;
   2619   1.1     bjh21     int8 roundingMode;
   2620   1.1     bjh21     float64 z;
   2621   1.1     bjh21 
   2622   1.1     bjh21     aExp = extractFloat64Exp( a );
   2623   1.1     bjh21     if ( 0x433 <= aExp ) {
   2624   1.1     bjh21         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
   2625   1.1     bjh21             return propagateFloat64NaN( a, a );
   2626   1.1     bjh21         }
   2627   1.1     bjh21         return a;
   2628   1.1     bjh21     }
   2629   1.1     bjh21     if ( aExp < 0x3FF ) {
   2630   1.1     bjh21         if ( (bits64) ( a<<1 ) == 0 ) return a;
   2631  1.12      matt         set_float_exception_inexact_flag();
   2632   1.1     bjh21         aSign = extractFloat64Sign( a );
   2633   1.1     bjh21         switch ( float_rounding_mode ) {
   2634   1.1     bjh21          case float_round_nearest_even:
   2635   1.1     bjh21             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
   2636   1.1     bjh21                 return packFloat64( aSign, 0x3FF, 0 );
   2637   1.1     bjh21             }
   2638   1.1     bjh21             break;
   2639   1.1     bjh21 	 case float_round_to_zero:
   2640   1.1     bjh21 	    break;
   2641   1.1     bjh21          case float_round_down:
   2642   1.1     bjh21             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
   2643   1.1     bjh21          case float_round_up:
   2644   1.1     bjh21             return
   2645   1.1     bjh21             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
   2646   1.1     bjh21         }
   2647   1.1     bjh21         return packFloat64( aSign, 0, 0 );
   2648   1.1     bjh21     }
   2649   1.1     bjh21     lastBitMask = 1;
   2650   1.1     bjh21     lastBitMask <<= 0x433 - aExp;
   2651   1.1     bjh21     roundBitsMask = lastBitMask - 1;
   2652   1.1     bjh21     z = a;
   2653   1.1     bjh21     roundingMode = float_rounding_mode;
   2654   1.1     bjh21     if ( roundingMode == float_round_nearest_even ) {
   2655   1.1     bjh21         z += lastBitMask>>1;
   2656   1.1     bjh21         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
   2657   1.1     bjh21     }
   2658   1.1     bjh21     else if ( roundingMode != float_round_to_zero ) {
   2659   1.1     bjh21         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   2660   1.1     bjh21             z += roundBitsMask;
   2661   1.1     bjh21         }
   2662   1.1     bjh21     }
   2663   1.1     bjh21     z &= ~ roundBitsMask;
   2664  1.12      matt     if ( z != a ) set_float_exception_inexact_flag();
   2665   1.1     bjh21     return z;
   2666   1.1     bjh21 
   2667   1.1     bjh21 }
   2668   1.1     bjh21 #endif
   2669   1.1     bjh21 
   2670   1.1     bjh21 /*
   2671   1.1     bjh21 -------------------------------------------------------------------------------
   2672   1.1     bjh21 Returns the result of adding the absolute values of the double-precision
   2673   1.1     bjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   2674   1.1     bjh21 before being returned.  `zSign' is ignored if the result is a NaN.
   2675   1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
   2676   1.1     bjh21 Floating-Point Arithmetic.
   2677   1.1     bjh21 -------------------------------------------------------------------------------
   2678   1.1     bjh21 */
   2679   1.1     bjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   2680   1.1     bjh21 {
   2681   1.1     bjh21     int16 aExp, bExp, zExp;
   2682   1.1     bjh21     bits64 aSig, bSig, zSig;
   2683   1.1     bjh21     int16 expDiff;
   2684   1.1     bjh21 
   2685   1.1     bjh21     aSig = extractFloat64Frac( a );
   2686   1.1     bjh21     aExp = extractFloat64Exp( a );
   2687   1.1     bjh21     bSig = extractFloat64Frac( b );
   2688   1.1     bjh21     bExp = extractFloat64Exp( b );
   2689   1.1     bjh21     expDiff = aExp - bExp;
   2690   1.1     bjh21     aSig <<= 9;
   2691   1.1     bjh21     bSig <<= 9;
   2692   1.1     bjh21     if ( 0 < expDiff ) {
   2693   1.1     bjh21         if ( aExp == 0x7FF ) {
   2694   1.1     bjh21             if ( aSig ) return propagateFloat64NaN( a, b );
   2695   1.1     bjh21             return a;
   2696   1.1     bjh21         }
   2697   1.1     bjh21         if ( bExp == 0 ) {
   2698   1.1     bjh21             --expDiff;
   2699   1.1     bjh21         }
   2700   1.1     bjh21         else {
   2701   1.1     bjh21             bSig |= LIT64( 0x2000000000000000 );
   2702   1.1     bjh21         }
   2703   1.1     bjh21         shift64RightJamming( bSig, expDiff, &bSig );
   2704   1.1     bjh21         zExp = aExp;
   2705   1.1     bjh21     }
   2706   1.1     bjh21     else if ( expDiff < 0 ) {
   2707   1.1     bjh21         if ( bExp == 0x7FF ) {
   2708   1.1     bjh21             if ( bSig ) return propagateFloat64NaN( a, b );
   2709   1.1     bjh21             return packFloat64( zSign, 0x7FF, 0 );
   2710   1.1     bjh21         }
   2711   1.1     bjh21         if ( aExp == 0 ) {
   2712   1.1     bjh21             ++expDiff;
   2713   1.1     bjh21         }
   2714   1.1     bjh21         else {
   2715   1.1     bjh21             aSig |= LIT64( 0x2000000000000000 );
   2716   1.1     bjh21         }
   2717   1.1     bjh21         shift64RightJamming( aSig, - expDiff, &aSig );
   2718   1.1     bjh21         zExp = bExp;
   2719   1.1     bjh21     }
   2720   1.1     bjh21     else {
   2721   1.1     bjh21         if ( aExp == 0x7FF ) {
   2722   1.1     bjh21             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2723   1.1     bjh21             return a;
   2724   1.1     bjh21         }
   2725   1.1     bjh21         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
   2726   1.1     bjh21         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
   2727   1.1     bjh21         zExp = aExp;
   2728   1.1     bjh21         goto roundAndPack;
   2729   1.1     bjh21     }
   2730   1.1     bjh21     aSig |= LIT64( 0x2000000000000000 );
   2731   1.1     bjh21     zSig = ( aSig + bSig )<<1;
   2732   1.1     bjh21     --zExp;
   2733   1.1     bjh21     if ( (sbits64) zSig < 0 ) {
   2734   1.1     bjh21         zSig = aSig + bSig;
   2735   1.1     bjh21         ++zExp;
   2736   1.1     bjh21     }
   2737   1.1     bjh21  roundAndPack:
   2738   1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig );
   2739   1.1     bjh21 
   2740   1.1     bjh21 }
   2741   1.1     bjh21 
   2742   1.1     bjh21 /*
   2743   1.1     bjh21 -------------------------------------------------------------------------------
   2744   1.1     bjh21 Returns the result of subtracting the absolute values of the double-
   2745   1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   2746   1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
   2747   1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   2748   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   2749   1.1     bjh21 -------------------------------------------------------------------------------
   2750   1.1     bjh21 */
   2751   1.1     bjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   2752   1.1     bjh21 {
   2753   1.1     bjh21     int16 aExp, bExp, zExp;
   2754   1.1     bjh21     bits64 aSig, bSig, zSig;
   2755   1.1     bjh21     int16 expDiff;
   2756   1.1     bjh21 
   2757   1.1     bjh21     aSig = extractFloat64Frac( a );
   2758   1.1     bjh21     aExp = extractFloat64Exp( a );
   2759   1.1     bjh21     bSig = extractFloat64Frac( b );
   2760   1.1     bjh21     bExp = extractFloat64Exp( b );
   2761   1.1     bjh21     expDiff = aExp - bExp;
   2762   1.1     bjh21     aSig <<= 10;
   2763   1.1     bjh21     bSig <<= 10;
   2764   1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
   2765   1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
   2766   1.1     bjh21     if ( aExp == 0x7FF ) {
   2767   1.1     bjh21         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
   2768   1.1     bjh21         float_raise( float_flag_invalid );
   2769   1.1     bjh21         return float64_default_nan;
   2770   1.1     bjh21     }
   2771   1.1     bjh21     if ( aExp == 0 ) {
   2772   1.1     bjh21         aExp = 1;
   2773   1.1     bjh21         bExp = 1;
   2774   1.1     bjh21     }
   2775   1.1     bjh21     if ( bSig < aSig ) goto aBigger;
   2776   1.1     bjh21     if ( aSig < bSig ) goto bBigger;
   2777   1.1     bjh21     return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
   2778   1.1     bjh21  bExpBigger:
   2779   1.1     bjh21     if ( bExp == 0x7FF ) {
   2780   1.1     bjh21         if ( bSig ) return propagateFloat64NaN( a, b );
   2781   1.1     bjh21         return packFloat64( zSign ^ 1, 0x7FF, 0 );
   2782   1.1     bjh21     }
   2783   1.1     bjh21     if ( aExp == 0 ) {
   2784   1.1     bjh21         ++expDiff;
   2785   1.1     bjh21     }
   2786   1.1     bjh21     else {
   2787   1.1     bjh21         aSig |= LIT64( 0x4000000000000000 );
   2788   1.1     bjh21     }
   2789   1.1     bjh21     shift64RightJamming( aSig, - expDiff, &aSig );
   2790   1.1     bjh21     bSig |= LIT64( 0x4000000000000000 );
   2791   1.1     bjh21  bBigger:
   2792   1.1     bjh21     zSig = bSig - aSig;
   2793   1.1     bjh21     zExp = bExp;
   2794   1.1     bjh21     zSign ^= 1;
   2795   1.1     bjh21     goto normalizeRoundAndPack;
   2796   1.1     bjh21  aExpBigger:
   2797   1.1     bjh21     if ( aExp == 0x7FF ) {
   2798   1.1     bjh21         if ( aSig ) return propagateFloat64NaN( a, b );
   2799   1.1     bjh21         return a;
   2800   1.1     bjh21     }
   2801   1.1     bjh21     if ( bExp == 0 ) {
   2802   1.1     bjh21         --expDiff;
   2803   1.1     bjh21     }
   2804   1.1     bjh21     else {
   2805   1.1     bjh21         bSig |= LIT64( 0x4000000000000000 );
   2806   1.1     bjh21     }
   2807   1.1     bjh21     shift64RightJamming( bSig, expDiff, &bSig );
   2808   1.1     bjh21     aSig |= LIT64( 0x4000000000000000 );
   2809   1.1     bjh21  aBigger:
   2810   1.1     bjh21     zSig = aSig - bSig;
   2811   1.1     bjh21     zExp = aExp;
   2812   1.1     bjh21  normalizeRoundAndPack:
   2813   1.1     bjh21     --zExp;
   2814   1.1     bjh21     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
   2815   1.1     bjh21 
   2816   1.1     bjh21 }
   2817   1.1     bjh21 
   2818   1.1     bjh21 /*
   2819   1.1     bjh21 -------------------------------------------------------------------------------
   2820   1.1     bjh21 Returns the result of adding the double-precision floating-point values `a'
   2821   1.1     bjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   2822   1.1     bjh21 Binary Floating-Point Arithmetic.
   2823   1.1     bjh21 -------------------------------------------------------------------------------
   2824   1.1     bjh21 */
   2825   1.1     bjh21 float64 float64_add( float64 a, float64 b )
   2826   1.1     bjh21 {
   2827   1.1     bjh21     flag aSign, bSign;
   2828   1.1     bjh21 
   2829   1.1     bjh21     aSign = extractFloat64Sign( a );
   2830   1.1     bjh21     bSign = extractFloat64Sign( b );
   2831   1.1     bjh21     if ( aSign == bSign ) {
   2832   1.1     bjh21         return addFloat64Sigs( a, b, aSign );
   2833   1.1     bjh21     }
   2834   1.1     bjh21     else {
   2835   1.1     bjh21         return subFloat64Sigs( a, b, aSign );
   2836   1.1     bjh21     }
   2837   1.1     bjh21 
   2838   1.1     bjh21 }
   2839   1.1     bjh21 
   2840   1.1     bjh21 /*
   2841   1.1     bjh21 -------------------------------------------------------------------------------
   2842   1.1     bjh21 Returns the result of subtracting the double-precision floating-point values
   2843   1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2844   1.1     bjh21 for Binary Floating-Point Arithmetic.
   2845   1.1     bjh21 -------------------------------------------------------------------------------
   2846   1.1     bjh21 */
   2847   1.1     bjh21 float64 float64_sub( float64 a, float64 b )
   2848   1.1     bjh21 {
   2849   1.1     bjh21     flag aSign, bSign;
   2850   1.1     bjh21 
   2851   1.1     bjh21     aSign = extractFloat64Sign( a );
   2852   1.1     bjh21     bSign = extractFloat64Sign( b );
   2853   1.1     bjh21     if ( aSign == bSign ) {
   2854   1.1     bjh21         return subFloat64Sigs( a, b, aSign );
   2855   1.1     bjh21     }
   2856   1.1     bjh21     else {
   2857   1.1     bjh21         return addFloat64Sigs( a, b, aSign );
   2858   1.1     bjh21     }
   2859   1.1     bjh21 
   2860   1.1     bjh21 }
   2861   1.1     bjh21 
   2862   1.1     bjh21 /*
   2863   1.1     bjh21 -------------------------------------------------------------------------------
   2864   1.1     bjh21 Returns the result of multiplying the double-precision floating-point values
   2865   1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   2866   1.1     bjh21 for Binary Floating-Point Arithmetic.
   2867   1.1     bjh21 -------------------------------------------------------------------------------
   2868   1.1     bjh21 */
   2869   1.1     bjh21 float64 float64_mul( float64 a, float64 b )
   2870   1.1     bjh21 {
   2871   1.1     bjh21     flag aSign, bSign, zSign;
   2872   1.1     bjh21     int16 aExp, bExp, zExp;
   2873   1.1     bjh21     bits64 aSig, bSig, zSig0, zSig1;
   2874   1.1     bjh21 
   2875   1.1     bjh21     aSig = extractFloat64Frac( a );
   2876   1.1     bjh21     aExp = extractFloat64Exp( a );
   2877   1.1     bjh21     aSign = extractFloat64Sign( a );
   2878   1.1     bjh21     bSig = extractFloat64Frac( b );
   2879   1.1     bjh21     bExp = extractFloat64Exp( b );
   2880   1.1     bjh21     bSign = extractFloat64Sign( b );
   2881   1.1     bjh21     zSign = aSign ^ bSign;
   2882   1.1     bjh21     if ( aExp == 0x7FF ) {
   2883   1.1     bjh21         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   2884   1.1     bjh21             return propagateFloat64NaN( a, b );
   2885   1.1     bjh21         }
   2886   1.1     bjh21         if ( ( bExp | bSig ) == 0 ) {
   2887   1.1     bjh21             float_raise( float_flag_invalid );
   2888   1.1     bjh21             return float64_default_nan;
   2889   1.1     bjh21         }
   2890   1.1     bjh21         return packFloat64( zSign, 0x7FF, 0 );
   2891   1.1     bjh21     }
   2892   1.1     bjh21     if ( bExp == 0x7FF ) {
   2893   1.1     bjh21         if ( bSig ) return propagateFloat64NaN( a, b );
   2894   1.1     bjh21         if ( ( aExp | aSig ) == 0 ) {
   2895   1.1     bjh21             float_raise( float_flag_invalid );
   2896   1.1     bjh21             return float64_default_nan;
   2897   1.1     bjh21         }
   2898   1.1     bjh21         return packFloat64( zSign, 0x7FF, 0 );
   2899   1.1     bjh21     }
   2900   1.1     bjh21     if ( aExp == 0 ) {
   2901   1.1     bjh21         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2902   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2903   1.1     bjh21     }
   2904   1.1     bjh21     if ( bExp == 0 ) {
   2905   1.1     bjh21         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
   2906   1.1     bjh21         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2907   1.1     bjh21     }
   2908   1.1     bjh21     zExp = aExp + bExp - 0x3FF;
   2909   1.1     bjh21     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2910   1.1     bjh21     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2911   1.1     bjh21     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   2912   1.1     bjh21     zSig0 |= ( zSig1 != 0 );
   2913   1.1     bjh21     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
   2914   1.1     bjh21         zSig0 <<= 1;
   2915   1.1     bjh21         --zExp;
   2916   1.1     bjh21     }
   2917   1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig0 );
   2918   1.1     bjh21 
   2919   1.1     bjh21 }
   2920   1.1     bjh21 
   2921   1.1     bjh21 /*
   2922   1.1     bjh21 -------------------------------------------------------------------------------
   2923   1.1     bjh21 Returns the result of dividing the double-precision floating-point value `a'
   2924   1.1     bjh21 by the corresponding value `b'.  The operation is performed according to
   2925   1.1     bjh21 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2926   1.1     bjh21 -------------------------------------------------------------------------------
   2927   1.1     bjh21 */
   2928   1.1     bjh21 float64 float64_div( float64 a, float64 b )
   2929   1.1     bjh21 {
   2930   1.1     bjh21     flag aSign, bSign, zSign;
   2931   1.1     bjh21     int16 aExp, bExp, zExp;
   2932   1.1     bjh21     bits64 aSig, bSig, zSig;
   2933   1.1     bjh21     bits64 rem0, rem1;
   2934   1.1     bjh21     bits64 term0, term1;
   2935   1.1     bjh21 
   2936   1.1     bjh21     aSig = extractFloat64Frac( a );
   2937   1.1     bjh21     aExp = extractFloat64Exp( a );
   2938   1.1     bjh21     aSign = extractFloat64Sign( a );
   2939   1.1     bjh21     bSig = extractFloat64Frac( b );
   2940   1.1     bjh21     bExp = extractFloat64Exp( b );
   2941   1.1     bjh21     bSign = extractFloat64Sign( b );
   2942   1.1     bjh21     zSign = aSign ^ bSign;
   2943   1.1     bjh21     if ( aExp == 0x7FF ) {
   2944   1.1     bjh21         if ( aSig ) return propagateFloat64NaN( a, b );
   2945   1.1     bjh21         if ( bExp == 0x7FF ) {
   2946   1.1     bjh21             if ( bSig ) return propagateFloat64NaN( a, b );
   2947   1.1     bjh21             float_raise( float_flag_invalid );
   2948   1.1     bjh21             return float64_default_nan;
   2949   1.1     bjh21         }
   2950   1.1     bjh21         return packFloat64( zSign, 0x7FF, 0 );
   2951   1.1     bjh21     }
   2952   1.1     bjh21     if ( bExp == 0x7FF ) {
   2953   1.1     bjh21         if ( bSig ) return propagateFloat64NaN( a, b );
   2954   1.1     bjh21         return packFloat64( zSign, 0, 0 );
   2955   1.1     bjh21     }
   2956   1.1     bjh21     if ( bExp == 0 ) {
   2957   1.1     bjh21         if ( bSig == 0 ) {
   2958   1.1     bjh21             if ( ( aExp | aSig ) == 0 ) {
   2959   1.1     bjh21                 float_raise( float_flag_invalid );
   2960   1.1     bjh21                 return float64_default_nan;
   2961   1.1     bjh21             }
   2962   1.1     bjh21             float_raise( float_flag_divbyzero );
   2963   1.1     bjh21             return packFloat64( zSign, 0x7FF, 0 );
   2964   1.1     bjh21         }
   2965   1.1     bjh21         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   2966   1.1     bjh21     }
   2967   1.1     bjh21     if ( aExp == 0 ) {
   2968   1.1     bjh21         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
   2969   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   2970   1.1     bjh21     }
   2971   1.1     bjh21     zExp = aExp - bExp + 0x3FD;
   2972   1.1     bjh21     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
   2973   1.1     bjh21     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   2974   1.1     bjh21     if ( bSig <= ( aSig + aSig ) ) {
   2975   1.1     bjh21         aSig >>= 1;
   2976   1.1     bjh21         ++zExp;
   2977   1.1     bjh21     }
   2978   1.1     bjh21     zSig = estimateDiv128To64( aSig, 0, bSig );
   2979   1.1     bjh21     if ( ( zSig & 0x1FF ) <= 2 ) {
   2980   1.1     bjh21         mul64To128( bSig, zSig, &term0, &term1 );
   2981   1.1     bjh21         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   2982   1.1     bjh21         while ( (sbits64) rem0 < 0 ) {
   2983   1.1     bjh21             --zSig;
   2984   1.1     bjh21             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   2985   1.1     bjh21         }
   2986   1.1     bjh21         zSig |= ( rem1 != 0 );
   2987   1.1     bjh21     }
   2988   1.1     bjh21     return roundAndPackFloat64( zSign, zExp, zSig );
   2989   1.1     bjh21 
   2990   1.1     bjh21 }
   2991   1.1     bjh21 
   2992   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   2993   1.1     bjh21 /*
   2994   1.1     bjh21 -------------------------------------------------------------------------------
   2995   1.1     bjh21 Returns the remainder of the double-precision floating-point value `a'
   2996   1.1     bjh21 with respect to the corresponding value `b'.  The operation is performed
   2997   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2998   1.1     bjh21 -------------------------------------------------------------------------------
   2999   1.1     bjh21 */
   3000   1.1     bjh21 float64 float64_rem( float64 a, float64 b )
   3001   1.1     bjh21 {
   3002   1.1     bjh21     flag aSign, bSign, zSign;
   3003   1.1     bjh21     int16 aExp, bExp, expDiff;
   3004   1.1     bjh21     bits64 aSig, bSig;
   3005   1.1     bjh21     bits64 q, alternateASig;
   3006   1.1     bjh21     sbits64 sigMean;
   3007   1.1     bjh21 
   3008   1.1     bjh21     aSig = extractFloat64Frac( a );
   3009   1.1     bjh21     aExp = extractFloat64Exp( a );
   3010   1.1     bjh21     aSign = extractFloat64Sign( a );
   3011   1.1     bjh21     bSig = extractFloat64Frac( b );
   3012   1.1     bjh21     bExp = extractFloat64Exp( b );
   3013   1.1     bjh21     bSign = extractFloat64Sign( b );
   3014   1.1     bjh21     if ( aExp == 0x7FF ) {
   3015   1.1     bjh21         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
   3016   1.1     bjh21             return propagateFloat64NaN( a, b );
   3017   1.1     bjh21         }
   3018   1.1     bjh21         float_raise( float_flag_invalid );
   3019   1.1     bjh21         return float64_default_nan;
   3020   1.1     bjh21     }
   3021   1.1     bjh21     if ( bExp == 0x7FF ) {
   3022   1.1     bjh21         if ( bSig ) return propagateFloat64NaN( a, b );
   3023   1.1     bjh21         return a;
   3024   1.1     bjh21     }
   3025   1.1     bjh21     if ( bExp == 0 ) {
   3026   1.1     bjh21         if ( bSig == 0 ) {
   3027   1.1     bjh21             float_raise( float_flag_invalid );
   3028   1.1     bjh21             return float64_default_nan;
   3029   1.1     bjh21         }
   3030   1.1     bjh21         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
   3031   1.1     bjh21     }
   3032   1.1     bjh21     if ( aExp == 0 ) {
   3033   1.1     bjh21         if ( aSig == 0 ) return a;
   3034   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3035   1.1     bjh21     }
   3036   1.1     bjh21     expDiff = aExp - bExp;
   3037   1.1     bjh21     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
   3038   1.1     bjh21     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
   3039   1.1     bjh21     if ( expDiff < 0 ) {
   3040   1.1     bjh21         if ( expDiff < -1 ) return a;
   3041   1.1     bjh21         aSig >>= 1;
   3042   1.1     bjh21     }
   3043   1.1     bjh21     q = ( bSig <= aSig );
   3044   1.1     bjh21     if ( q ) aSig -= bSig;
   3045   1.1     bjh21     expDiff -= 64;
   3046   1.1     bjh21     while ( 0 < expDiff ) {
   3047   1.1     bjh21         q = estimateDiv128To64( aSig, 0, bSig );
   3048   1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   3049   1.1     bjh21         aSig = - ( ( bSig>>2 ) * q );
   3050   1.1     bjh21         expDiff -= 62;
   3051   1.1     bjh21     }
   3052   1.1     bjh21     expDiff += 64;
   3053   1.1     bjh21     if ( 0 < expDiff ) {
   3054   1.1     bjh21         q = estimateDiv128To64( aSig, 0, bSig );
   3055   1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   3056   1.1     bjh21         q >>= 64 - expDiff;
   3057   1.1     bjh21         bSig >>= 2;
   3058   1.1     bjh21         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   3059   1.1     bjh21     }
   3060   1.1     bjh21     else {
   3061   1.1     bjh21         aSig >>= 2;
   3062   1.1     bjh21         bSig >>= 2;
   3063   1.1     bjh21     }
   3064   1.1     bjh21     do {
   3065   1.1     bjh21         alternateASig = aSig;
   3066   1.1     bjh21         ++q;
   3067   1.1     bjh21         aSig -= bSig;
   3068   1.1     bjh21     } while ( 0 <= (sbits64) aSig );
   3069   1.1     bjh21     sigMean = aSig + alternateASig;
   3070   1.1     bjh21     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   3071   1.1     bjh21         aSig = alternateASig;
   3072   1.1     bjh21     }
   3073   1.1     bjh21     zSign = ( (sbits64) aSig < 0 );
   3074   1.1     bjh21     if ( zSign ) aSig = - aSig;
   3075   1.1     bjh21     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
   3076   1.1     bjh21 
   3077   1.1     bjh21 }
   3078   1.1     bjh21 
   3079   1.1     bjh21 /*
   3080   1.1     bjh21 -------------------------------------------------------------------------------
   3081   1.1     bjh21 Returns the square root of the double-precision floating-point value `a'.
   3082   1.1     bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
   3083   1.1     bjh21 Floating-Point Arithmetic.
   3084   1.1     bjh21 -------------------------------------------------------------------------------
   3085   1.1     bjh21 */
   3086   1.1     bjh21 float64 float64_sqrt( float64 a )
   3087   1.1     bjh21 {
   3088   1.1     bjh21     flag aSign;
   3089   1.1     bjh21     int16 aExp, zExp;
   3090   1.1     bjh21     bits64 aSig, zSig, doubleZSig;
   3091   1.1     bjh21     bits64 rem0, rem1, term0, term1;
   3092   1.1     bjh21 
   3093   1.1     bjh21     aSig = extractFloat64Frac( a );
   3094   1.1     bjh21     aExp = extractFloat64Exp( a );
   3095   1.1     bjh21     aSign = extractFloat64Sign( a );
   3096   1.1     bjh21     if ( aExp == 0x7FF ) {
   3097   1.1     bjh21         if ( aSig ) return propagateFloat64NaN( a, a );
   3098   1.1     bjh21         if ( ! aSign ) return a;
   3099   1.1     bjh21         float_raise( float_flag_invalid );
   3100   1.1     bjh21         return float64_default_nan;
   3101   1.1     bjh21     }
   3102   1.1     bjh21     if ( aSign ) {
   3103   1.1     bjh21         if ( ( aExp | aSig ) == 0 ) return a;
   3104   1.1     bjh21         float_raise( float_flag_invalid );
   3105   1.1     bjh21         return float64_default_nan;
   3106   1.1     bjh21     }
   3107   1.1     bjh21     if ( aExp == 0 ) {
   3108   1.1     bjh21         if ( aSig == 0 ) return 0;
   3109   1.1     bjh21         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
   3110   1.1     bjh21     }
   3111   1.1     bjh21     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   3112   1.1     bjh21     aSig |= LIT64( 0x0010000000000000 );
   3113   1.1     bjh21     zSig = estimateSqrt32( aExp, aSig>>21 );
   3114   1.1     bjh21     aSig <<= 9 - ( aExp & 1 );
   3115   1.1     bjh21     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
   3116   1.1     bjh21     if ( ( zSig & 0x1FF ) <= 5 ) {
   3117   1.1     bjh21         doubleZSig = zSig<<1;
   3118   1.1     bjh21         mul64To128( zSig, zSig, &term0, &term1 );
   3119   1.1     bjh21         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
   3120   1.1     bjh21         while ( (sbits64) rem0 < 0 ) {
   3121   1.1     bjh21             --zSig;
   3122   1.1     bjh21             doubleZSig -= 2;
   3123   1.1     bjh21             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
   3124   1.1     bjh21         }
   3125   1.1     bjh21         zSig |= ( ( rem0 | rem1 ) != 0 );
   3126   1.1     bjh21     }
   3127   1.1     bjh21     return roundAndPackFloat64( 0, zExp, zSig );
   3128   1.1     bjh21 
   3129   1.1     bjh21 }
   3130   1.1     bjh21 #endif
   3131   1.1     bjh21 
   3132   1.1     bjh21 /*
   3133   1.1     bjh21 -------------------------------------------------------------------------------
   3134   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is equal to the
   3135   1.1     bjh21 corresponding value `b', and 0 otherwise.  The comparison is performed
   3136   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3137   1.1     bjh21 -------------------------------------------------------------------------------
   3138   1.1     bjh21 */
   3139   1.1     bjh21 flag float64_eq( float64 a, float64 b )
   3140   1.1     bjh21 {
   3141   1.1     bjh21 
   3142   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3143   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3144   1.1     bjh21        ) {
   3145   1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3146   1.1     bjh21             float_raise( float_flag_invalid );
   3147   1.1     bjh21         }
   3148   1.1     bjh21         return 0;
   3149   1.1     bjh21     }
   3150   1.1     bjh21     return ( a == b ) ||
   3151   1.1     bjh21 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   3152   1.1     bjh21 
   3153   1.1     bjh21 }
   3154   1.1     bjh21 
   3155   1.1     bjh21 /*
   3156   1.1     bjh21 -------------------------------------------------------------------------------
   3157   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than or
   3158   1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   3159   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   3160   1.1     bjh21 Arithmetic.
   3161   1.1     bjh21 -------------------------------------------------------------------------------
   3162   1.1     bjh21 */
   3163   1.1     bjh21 flag float64_le( float64 a, float64 b )
   3164   1.1     bjh21 {
   3165   1.1     bjh21     flag aSign, bSign;
   3166   1.1     bjh21 
   3167   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3168   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3169   1.1     bjh21        ) {
   3170   1.1     bjh21         float_raise( float_flag_invalid );
   3171   1.1     bjh21         return 0;
   3172   1.1     bjh21     }
   3173   1.1     bjh21     aSign = extractFloat64Sign( a );
   3174   1.1     bjh21     bSign = extractFloat64Sign( b );
   3175   1.1     bjh21     if ( aSign != bSign )
   3176   1.1     bjh21 	return aSign ||
   3177   1.1     bjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   3178   1.1     bjh21 	      0 );
   3179   1.1     bjh21     return ( a == b ) ||
   3180   1.1     bjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3181   1.1     bjh21 
   3182   1.1     bjh21 }
   3183   1.1     bjh21 
   3184   1.1     bjh21 /*
   3185   1.1     bjh21 -------------------------------------------------------------------------------
   3186   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than
   3187   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   3188   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3189   1.1     bjh21 -------------------------------------------------------------------------------
   3190   1.1     bjh21 */
   3191   1.1     bjh21 flag float64_lt( float64 a, float64 b )
   3192   1.1     bjh21 {
   3193   1.1     bjh21     flag aSign, bSign;
   3194   1.1     bjh21 
   3195   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3196   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3197   1.1     bjh21        ) {
   3198   1.1     bjh21         float_raise( float_flag_invalid );
   3199   1.1     bjh21         return 0;
   3200   1.1     bjh21     }
   3201   1.1     bjh21     aSign = extractFloat64Sign( a );
   3202   1.1     bjh21     bSign = extractFloat64Sign( b );
   3203   1.1     bjh21     if ( aSign != bSign )
   3204   1.1     bjh21 	return aSign &&
   3205   1.1     bjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   3206   1.1     bjh21 	      0 );
   3207   1.1     bjh21     return ( a != b ) &&
   3208   1.1     bjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   3209   1.1     bjh21 
   3210   1.1     bjh21 }
   3211   1.1     bjh21 
   3212   1.1     bjh21 #ifndef SOFTFLOAT_FOR_GCC
   3213   1.1     bjh21 /*
   3214   1.1     bjh21 -------------------------------------------------------------------------------
   3215   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is equal to the
   3216   1.1     bjh21 corresponding value `b', and 0 otherwise.  The invalid exception is raised
   3217   1.1     bjh21 if either operand is a NaN.  Otherwise, the comparison is performed
   3218   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3219   1.1     bjh21 -------------------------------------------------------------------------------
   3220   1.1     bjh21 */
   3221   1.1     bjh21 flag float64_eq_signaling( float64 a, float64 b )
   3222   1.1     bjh21 {
   3223   1.1     bjh21 
   3224   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3225   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3226   1.1     bjh21        ) {
   3227   1.1     bjh21         float_raise( float_flag_invalid );
   3228   1.1     bjh21         return 0;
   3229   1.1     bjh21     }
   3230   1.1     bjh21     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3231   1.1     bjh21 
   3232   1.1     bjh21 }
   3233   1.1     bjh21 
   3234   1.1     bjh21 /*
   3235   1.1     bjh21 -------------------------------------------------------------------------------
   3236   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than or
   3237   1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   3238   1.1     bjh21 cause an exception.  Otherwise, the comparison is performed according to the
   3239   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3240   1.1     bjh21 -------------------------------------------------------------------------------
   3241   1.1     bjh21 */
   3242   1.1     bjh21 flag float64_le_quiet( float64 a, float64 b )
   3243   1.1     bjh21 {
   3244   1.1     bjh21     flag aSign, bSign;
   3245   1.1     bjh21 
   3246   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3247   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3248   1.1     bjh21        ) {
   3249   1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3250   1.1     bjh21             float_raise( float_flag_invalid );
   3251   1.1     bjh21         }
   3252   1.1     bjh21         return 0;
   3253   1.1     bjh21     }
   3254   1.1     bjh21     aSign = extractFloat64Sign( a );
   3255   1.1     bjh21     bSign = extractFloat64Sign( b );
   3256   1.1     bjh21     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   3257   1.1     bjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
   3258   1.1     bjh21 
   3259   1.1     bjh21 }
   3260   1.1     bjh21 
   3261   1.1     bjh21 /*
   3262   1.1     bjh21 -------------------------------------------------------------------------------
   3263   1.1     bjh21 Returns 1 if the double-precision floating-point value `a' is less than
   3264   1.1     bjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   3265   1.1     bjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   3266   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   3267   1.1     bjh21 -------------------------------------------------------------------------------
   3268   1.1     bjh21 */
   3269   1.1     bjh21 flag float64_lt_quiet( float64 a, float64 b )
   3270   1.1     bjh21 {
   3271   1.1     bjh21     flag aSign, bSign;
   3272   1.1     bjh21 
   3273   1.1     bjh21     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
   3274   1.1     bjh21          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
   3275   1.1     bjh21        ) {
   3276   1.1     bjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   3277   1.1     bjh21             float_raise( float_flag_invalid );
   3278   1.1     bjh21         }
   3279   1.1     bjh21         return 0;
   3280   1.1     bjh21     }
   3281   1.1     bjh21     aSign = extractFloat64Sign( a );
   3282   1.1     bjh21     bSign = extractFloat64Sign( b );
   3283   1.1     bjh21     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   3284   1.1     bjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
   3285   1.1     bjh21 
   3286   1.1     bjh21 }
   3287   1.1     bjh21 #endif
   3288   1.1     bjh21 
   3289   1.1     bjh21 #ifdef FLOATX80
   3290   1.1     bjh21 
   3291   1.1     bjh21 /*
   3292   1.1     bjh21 -------------------------------------------------------------------------------
   3293   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3294   1.1     bjh21 point value `a' to the 32-bit two's complement integer format.  The
   3295   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3296   1.1     bjh21 Floating-Point Arithmetic---which means in particular that the conversion
   3297   1.1     bjh21 is rounded according to the current rounding mode.  If `a' is a NaN, the
   3298   1.1     bjh21 largest positive integer is returned.  Otherwise, if the conversion
   3299   1.1     bjh21 overflows, the largest integer with the same sign as `a' is returned.
   3300   1.1     bjh21 -------------------------------------------------------------------------------
   3301   1.1     bjh21 */
   3302   1.1     bjh21 int32 floatx80_to_int32( floatx80 a )
   3303   1.1     bjh21 {
   3304   1.1     bjh21     flag aSign;
   3305   1.1     bjh21     int32 aExp, shiftCount;
   3306   1.1     bjh21     bits64 aSig;
   3307   1.1     bjh21 
   3308   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3309   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3310   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3311   1.1     bjh21     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3312   1.1     bjh21     shiftCount = 0x4037 - aExp;
   3313   1.1     bjh21     if ( shiftCount <= 0 ) shiftCount = 1;
   3314   1.1     bjh21     shift64RightJamming( aSig, shiftCount, &aSig );
   3315   1.1     bjh21     return roundAndPackInt32( aSign, aSig );
   3316   1.1     bjh21 
   3317   1.1     bjh21 }
   3318   1.1     bjh21 
   3319   1.1     bjh21 /*
   3320   1.1     bjh21 -------------------------------------------------------------------------------
   3321   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3322   1.1     bjh21 point value `a' to the 32-bit two's complement integer format.  The
   3323   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3324   1.1     bjh21 Floating-Point Arithmetic, except that the conversion is always rounded
   3325   1.1     bjh21 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3326   1.1     bjh21 Otherwise, if the conversion overflows, the largest integer with the same
   3327   1.1     bjh21 sign as `a' is returned.
   3328   1.1     bjh21 -------------------------------------------------------------------------------
   3329   1.1     bjh21 */
   3330   1.1     bjh21 int32 floatx80_to_int32_round_to_zero( floatx80 a )
   3331   1.1     bjh21 {
   3332   1.1     bjh21     flag aSign;
   3333   1.1     bjh21     int32 aExp, shiftCount;
   3334   1.1     bjh21     bits64 aSig, savedASig;
   3335   1.1     bjh21     int32 z;
   3336   1.1     bjh21 
   3337   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3338   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3339   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3340   1.1     bjh21     if ( 0x401E < aExp ) {
   3341   1.1     bjh21         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
   3342   1.1     bjh21         goto invalid;
   3343   1.1     bjh21     }
   3344   1.1     bjh21     else if ( aExp < 0x3FFF ) {
   3345  1.12      matt         if ( aExp || aSig ) set_float_exception_inexact_flag();
   3346   1.1     bjh21         return 0;
   3347   1.1     bjh21     }
   3348   1.1     bjh21     shiftCount = 0x403E - aExp;
   3349   1.1     bjh21     savedASig = aSig;
   3350   1.1     bjh21     aSig >>= shiftCount;
   3351   1.1     bjh21     z = aSig;
   3352   1.1     bjh21     if ( aSign ) z = - z;
   3353   1.1     bjh21     if ( ( z < 0 ) ^ aSign ) {
   3354   1.1     bjh21  invalid:
   3355   1.1     bjh21         float_raise( float_flag_invalid );
   3356   1.1     bjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   3357   1.1     bjh21     }
   3358   1.1     bjh21     if ( ( aSig<<shiftCount ) != savedASig ) {
   3359  1.12      matt         set_float_exception_inexact_flag();
   3360   1.1     bjh21     }
   3361   1.1     bjh21     return z;
   3362   1.1     bjh21 
   3363   1.1     bjh21 }
   3364   1.1     bjh21 
   3365   1.1     bjh21 /*
   3366   1.1     bjh21 -------------------------------------------------------------------------------
   3367   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3368   1.1     bjh21 point value `a' to the 64-bit two's complement integer format.  The
   3369   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3370   1.1     bjh21 Floating-Point Arithmetic---which means in particular that the conversion
   3371   1.1     bjh21 is rounded according to the current rounding mode.  If `a' is a NaN,
   3372   1.1     bjh21 the largest positive integer is returned.  Otherwise, if the conversion
   3373   1.1     bjh21 overflows, the largest integer with the same sign as `a' is returned.
   3374   1.1     bjh21 -------------------------------------------------------------------------------
   3375   1.1     bjh21 */
   3376   1.1     bjh21 int64 floatx80_to_int64( floatx80 a )
   3377   1.1     bjh21 {
   3378   1.1     bjh21     flag aSign;
   3379   1.1     bjh21     int32 aExp, shiftCount;
   3380   1.1     bjh21     bits64 aSig, aSigExtra;
   3381   1.1     bjh21 
   3382   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3383   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3384   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3385   1.1     bjh21     shiftCount = 0x403E - aExp;
   3386   1.1     bjh21     if ( shiftCount <= 0 ) {
   3387   1.1     bjh21         if ( shiftCount ) {
   3388   1.1     bjh21             float_raise( float_flag_invalid );
   3389   1.1     bjh21             if (    ! aSign
   3390   1.1     bjh21                  || (    ( aExp == 0x7FFF )
   3391   1.1     bjh21                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
   3392   1.1     bjh21                ) {
   3393   1.1     bjh21                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3394   1.1     bjh21             }
   3395   1.1     bjh21             return (sbits64) LIT64( 0x8000000000000000 );
   3396   1.1     bjh21         }
   3397   1.1     bjh21         aSigExtra = 0;
   3398   1.1     bjh21     }
   3399   1.1     bjh21     else {
   3400   1.1     bjh21         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
   3401   1.1     bjh21     }
   3402   1.1     bjh21     return roundAndPackInt64( aSign, aSig, aSigExtra );
   3403   1.1     bjh21 
   3404   1.1     bjh21 }
   3405   1.1     bjh21 
   3406   1.1     bjh21 /*
   3407   1.1     bjh21 -------------------------------------------------------------------------------
   3408   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3409   1.1     bjh21 point value `a' to the 64-bit two's complement integer format.  The
   3410   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3411   1.1     bjh21 Floating-Point Arithmetic, except that the conversion is always rounded
   3412   1.1     bjh21 toward zero.  If `a' is a NaN, the largest positive integer is returned.
   3413   1.1     bjh21 Otherwise, if the conversion overflows, the largest integer with the same
   3414   1.1     bjh21 sign as `a' is returned.
   3415   1.1     bjh21 -------------------------------------------------------------------------------
   3416   1.1     bjh21 */
   3417   1.1     bjh21 int64 floatx80_to_int64_round_to_zero( floatx80 a )
   3418   1.1     bjh21 {
   3419   1.1     bjh21     flag aSign;
   3420   1.1     bjh21     int32 aExp, shiftCount;
   3421   1.1     bjh21     bits64 aSig;
   3422   1.1     bjh21     int64 z;
   3423   1.1     bjh21 
   3424   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3425   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3426   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3427   1.1     bjh21     shiftCount = aExp - 0x403E;
   3428   1.1     bjh21     if ( 0 <= shiftCount ) {
   3429   1.1     bjh21         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
   3430  1.16       nat         if ( ( (a.high>>X80SHIFT) != 0xC03E ) || aSig ) {
   3431   1.1     bjh21             float_raise( float_flag_invalid );
   3432   1.1     bjh21             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
   3433   1.1     bjh21                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   3434   1.1     bjh21             }
   3435   1.1     bjh21         }
   3436   1.1     bjh21         return (sbits64) LIT64( 0x8000000000000000 );
   3437   1.1     bjh21     }
   3438   1.1     bjh21     else if ( aExp < 0x3FFF ) {
   3439  1.12      matt         if ( aExp | aSig ) set_float_exception_inexact_flag();
   3440   1.1     bjh21         return 0;
   3441   1.1     bjh21     }
   3442   1.1     bjh21     z = aSig>>( - shiftCount );
   3443   1.1     bjh21     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
   3444  1.12      matt         set_float_exception_inexact_flag();
   3445   1.1     bjh21     }
   3446   1.1     bjh21     if ( aSign ) z = - z;
   3447   1.1     bjh21     return z;
   3448   1.1     bjh21 
   3449   1.1     bjh21 }
   3450   1.1     bjh21 
   3451   1.1     bjh21 /*
   3452   1.1     bjh21 -------------------------------------------------------------------------------
   3453   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3454   1.1     bjh21 point value `a' to the single-precision floating-point format.  The
   3455   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3456   1.1     bjh21 Floating-Point Arithmetic.
   3457   1.1     bjh21 -------------------------------------------------------------------------------
   3458   1.1     bjh21 */
   3459   1.1     bjh21 float32 floatx80_to_float32( floatx80 a )
   3460   1.1     bjh21 {
   3461   1.1     bjh21     flag aSign;
   3462   1.1     bjh21     int32 aExp;
   3463   1.1     bjh21     bits64 aSig;
   3464   1.1     bjh21 
   3465   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3466   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3467   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3468   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3469   1.1     bjh21         if ( (bits64) ( aSig<<1 ) ) {
   3470   1.1     bjh21             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
   3471   1.1     bjh21         }
   3472   1.1     bjh21         return packFloat32( aSign, 0xFF, 0 );
   3473   1.1     bjh21     }
   3474   1.1     bjh21     shift64RightJamming( aSig, 33, &aSig );
   3475   1.1     bjh21     if ( aExp || aSig ) aExp -= 0x3F81;
   3476   1.1     bjh21     return roundAndPackFloat32( aSign, aExp, aSig );
   3477   1.1     bjh21 
   3478   1.1     bjh21 }
   3479   1.1     bjh21 
   3480   1.1     bjh21 /*
   3481   1.1     bjh21 -------------------------------------------------------------------------------
   3482   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3483   1.1     bjh21 point value `a' to the double-precision floating-point format.  The
   3484   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3485   1.1     bjh21 Floating-Point Arithmetic.
   3486   1.1     bjh21 -------------------------------------------------------------------------------
   3487   1.1     bjh21 */
   3488   1.1     bjh21 float64 floatx80_to_float64( floatx80 a )
   3489   1.1     bjh21 {
   3490   1.1     bjh21     flag aSign;
   3491   1.1     bjh21     int32 aExp;
   3492   1.1     bjh21     bits64 aSig, zSig;
   3493   1.1     bjh21 
   3494   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3495   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3496   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3497   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3498   1.1     bjh21         if ( (bits64) ( aSig<<1 ) ) {
   3499   1.1     bjh21             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
   3500   1.1     bjh21         }
   3501   1.1     bjh21         return packFloat64( aSign, 0x7FF, 0 );
   3502   1.1     bjh21     }
   3503   1.1     bjh21     shift64RightJamming( aSig, 1, &zSig );
   3504   1.1     bjh21     if ( aExp || aSig ) aExp -= 0x3C01;
   3505   1.1     bjh21     return roundAndPackFloat64( aSign, aExp, zSig );
   3506   1.1     bjh21 
   3507   1.1     bjh21 }
   3508   1.1     bjh21 
   3509   1.1     bjh21 #ifdef FLOAT128
   3510   1.1     bjh21 
   3511   1.1     bjh21 /*
   3512   1.1     bjh21 -------------------------------------------------------------------------------
   3513   1.1     bjh21 Returns the result of converting the extended double-precision floating-
   3514   1.1     bjh21 point value `a' to the quadruple-precision floating-point format.  The
   3515   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   3516   1.1     bjh21 Floating-Point Arithmetic.
   3517   1.1     bjh21 -------------------------------------------------------------------------------
   3518   1.1     bjh21 */
   3519   1.1     bjh21 float128 floatx80_to_float128( floatx80 a )
   3520   1.1     bjh21 {
   3521   1.1     bjh21     flag aSign;
   3522   1.1     bjh21     int16 aExp;
   3523   1.1     bjh21     bits64 aSig, zSig0, zSig1;
   3524   1.1     bjh21 
   3525   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3526   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3527   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3528   1.1     bjh21     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
   3529   1.1     bjh21         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
   3530   1.1     bjh21     }
   3531   1.1     bjh21     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
   3532   1.1     bjh21     return packFloat128( aSign, aExp, zSig0, zSig1 );
   3533   1.1     bjh21 
   3534   1.1     bjh21 }
   3535   1.1     bjh21 
   3536   1.1     bjh21 #endif
   3537   1.1     bjh21 
   3538   1.1     bjh21 /*
   3539   1.1     bjh21 -------------------------------------------------------------------------------
   3540   1.1     bjh21 Rounds the extended double-precision floating-point value `a' to an integer,
   3541   1.1     bjh21 and returns the result as an extended quadruple-precision floating-point
   3542   1.1     bjh21 value.  The operation is performed according to the IEC/IEEE Standard for
   3543   1.1     bjh21 Binary Floating-Point Arithmetic.
   3544   1.1     bjh21 -------------------------------------------------------------------------------
   3545   1.1     bjh21 */
   3546   1.1     bjh21 floatx80 floatx80_round_to_int( floatx80 a )
   3547   1.1     bjh21 {
   3548   1.1     bjh21     flag aSign;
   3549   1.1     bjh21     int32 aExp;
   3550   1.1     bjh21     bits64 lastBitMask, roundBitsMask;
   3551   1.1     bjh21     int8 roundingMode;
   3552   1.1     bjh21     floatx80 z;
   3553   1.1     bjh21 
   3554   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3555   1.1     bjh21     if ( 0x403E <= aExp ) {
   3556   1.1     bjh21         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
   3557   1.1     bjh21             return propagateFloatx80NaN( a, a );
   3558   1.1     bjh21         }
   3559   1.1     bjh21         return a;
   3560   1.1     bjh21     }
   3561   1.1     bjh21     if ( aExp < 0x3FFF ) {
   3562   1.1     bjh21         if (    ( aExp == 0 )
   3563   1.1     bjh21              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
   3564   1.1     bjh21             return a;
   3565   1.1     bjh21         }
   3566  1.12      matt         set_float_exception_inexact_flag();
   3567   1.1     bjh21         aSign = extractFloatx80Sign( a );
   3568   1.1     bjh21         switch ( float_rounding_mode ) {
   3569   1.1     bjh21          case float_round_nearest_even:
   3570   1.1     bjh21             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
   3571   1.1     bjh21                ) {
   3572   1.1     bjh21                 return
   3573   1.1     bjh21                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3574   1.1     bjh21             }
   3575   1.1     bjh21             break;
   3576   1.1     bjh21 	 case float_round_to_zero:
   3577   1.1     bjh21 	    break;
   3578   1.1     bjh21          case float_round_down:
   3579   1.1     bjh21             return
   3580   1.1     bjh21                   aSign ?
   3581   1.1     bjh21                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
   3582   1.1     bjh21                 : packFloatx80( 0, 0, 0 );
   3583   1.1     bjh21          case float_round_up:
   3584   1.1     bjh21             return
   3585   1.1     bjh21                   aSign ? packFloatx80( 1, 0, 0 )
   3586   1.1     bjh21                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
   3587   1.1     bjh21         }
   3588   1.1     bjh21         return packFloatx80( aSign, 0, 0 );
   3589   1.1     bjh21     }
   3590   1.1     bjh21     lastBitMask = 1;
   3591   1.1     bjh21     lastBitMask <<= 0x403E - aExp;
   3592   1.1     bjh21     roundBitsMask = lastBitMask - 1;
   3593   1.1     bjh21     z = a;
   3594   1.1     bjh21     roundingMode = float_rounding_mode;
   3595   1.1     bjh21     if ( roundingMode == float_round_nearest_even ) {
   3596   1.1     bjh21         z.low += lastBitMask>>1;
   3597   1.1     bjh21         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   3598   1.1     bjh21     }
   3599   1.1     bjh21     else if ( roundingMode != float_round_to_zero ) {
   3600   1.1     bjh21         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
   3601   1.1     bjh21             z.low += roundBitsMask;
   3602   1.1     bjh21         }
   3603   1.1     bjh21     }
   3604   1.1     bjh21     z.low &= ~ roundBitsMask;
   3605   1.1     bjh21     if ( z.low == 0 ) {
   3606   1.1     bjh21         ++z.high;
   3607   1.1     bjh21         z.low = LIT64( 0x8000000000000000 );
   3608   1.1     bjh21     }
   3609  1.16       nat     z.high <<= X80SHIFT;
   3610  1.12      matt     if ( z.low != a.low ) set_float_exception_inexact_flag();
   3611   1.1     bjh21     return z;
   3612   1.1     bjh21 
   3613   1.1     bjh21 }
   3614   1.1     bjh21 
   3615   1.1     bjh21 /*
   3616   1.1     bjh21 -------------------------------------------------------------------------------
   3617   1.1     bjh21 Returns the result of adding the absolute values of the extended double-
   3618   1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
   3619   1.1     bjh21 negated before being returned.  `zSign' is ignored if the result is a NaN.
   3620   1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
   3621   1.1     bjh21 Floating-Point Arithmetic.
   3622   1.1     bjh21 -------------------------------------------------------------------------------
   3623   1.1     bjh21 */
   3624   1.1     bjh21 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3625   1.1     bjh21 {
   3626   1.1     bjh21     int32 aExp, bExp, zExp;
   3627   1.1     bjh21     bits64 aSig, bSig, zSig0, zSig1;
   3628   1.1     bjh21     int32 expDiff;
   3629   1.1     bjh21 
   3630   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3631   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3632   1.1     bjh21     bSig = extractFloatx80Frac( b );
   3633   1.1     bjh21     bExp = extractFloatx80Exp( b );
   3634   1.1     bjh21     expDiff = aExp - bExp;
   3635   1.1     bjh21     if ( 0 < expDiff ) {
   3636   1.1     bjh21         if ( aExp == 0x7FFF ) {
   3637   1.1     bjh21             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3638   1.1     bjh21             return a;
   3639   1.1     bjh21         }
   3640   1.1     bjh21         if ( bExp == 0 ) --expDiff;
   3641   1.1     bjh21         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3642   1.1     bjh21         zExp = aExp;
   3643   1.1     bjh21     }
   3644   1.1     bjh21     else if ( expDiff < 0 ) {
   3645   1.1     bjh21         if ( bExp == 0x7FFF ) {
   3646   1.1     bjh21             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3647   1.1     bjh21             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3648   1.1     bjh21         }
   3649   1.1     bjh21         if ( aExp == 0 ) ++expDiff;
   3650   1.1     bjh21         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3651   1.1     bjh21         zExp = bExp;
   3652   1.1     bjh21     }
   3653   1.1     bjh21     else {
   3654   1.1     bjh21         if ( aExp == 0x7FFF ) {
   3655   1.1     bjh21             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3656   1.1     bjh21                 return propagateFloatx80NaN( a, b );
   3657   1.1     bjh21             }
   3658   1.1     bjh21             return a;
   3659   1.1     bjh21         }
   3660   1.1     bjh21         zSig1 = 0;
   3661   1.1     bjh21         zSig0 = aSig + bSig;
   3662   1.1     bjh21         if ( aExp == 0 ) {
   3663   1.1     bjh21             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
   3664   1.1     bjh21             goto roundAndPack;
   3665   1.1     bjh21         }
   3666   1.1     bjh21         zExp = aExp;
   3667   1.1     bjh21         goto shiftRight1;
   3668   1.1     bjh21     }
   3669   1.1     bjh21     zSig0 = aSig + bSig;
   3670   1.1     bjh21     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
   3671   1.1     bjh21  shiftRight1:
   3672   1.1     bjh21     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3673   1.1     bjh21     zSig0 |= LIT64( 0x8000000000000000 );
   3674   1.1     bjh21     ++zExp;
   3675   1.1     bjh21  roundAndPack:
   3676   1.1     bjh21     return
   3677   1.1     bjh21         roundAndPackFloatx80(
   3678   1.1     bjh21             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3679   1.1     bjh21 
   3680   1.1     bjh21 }
   3681   1.1     bjh21 
   3682   1.1     bjh21 /*
   3683   1.1     bjh21 -------------------------------------------------------------------------------
   3684   1.1     bjh21 Returns the result of subtracting the absolute values of the extended
   3685   1.1     bjh21 double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
   3686   1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
   3687   1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   3688   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   3689   1.1     bjh21 -------------------------------------------------------------------------------
   3690   1.1     bjh21 */
   3691   1.1     bjh21 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   3692   1.1     bjh21 {
   3693   1.1     bjh21     int32 aExp, bExp, zExp;
   3694   1.1     bjh21     bits64 aSig, bSig, zSig0, zSig1;
   3695   1.1     bjh21     int32 expDiff;
   3696   1.1     bjh21     floatx80 z;
   3697   1.1     bjh21 
   3698   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3699   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3700   1.1     bjh21     bSig = extractFloatx80Frac( b );
   3701   1.1     bjh21     bExp = extractFloatx80Exp( b );
   3702   1.1     bjh21     expDiff = aExp - bExp;
   3703   1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
   3704   1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
   3705   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3706   1.1     bjh21         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
   3707   1.1     bjh21             return propagateFloatx80NaN( a, b );
   3708   1.1     bjh21         }
   3709   1.1     bjh21         float_raise( float_flag_invalid );
   3710   1.1     bjh21         z.low = floatx80_default_nan_low;
   3711  1.16       nat         z.high = floatx80_default_nan_high<<X80SHIFT;
   3712   1.1     bjh21         return z;
   3713   1.1     bjh21     }
   3714   1.1     bjh21     if ( aExp == 0 ) {
   3715   1.1     bjh21         aExp = 1;
   3716   1.1     bjh21         bExp = 1;
   3717   1.1     bjh21     }
   3718   1.1     bjh21     zSig1 = 0;
   3719   1.1     bjh21     if ( bSig < aSig ) goto aBigger;
   3720   1.1     bjh21     if ( aSig < bSig ) goto bBigger;
   3721   1.1     bjh21     return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
   3722   1.1     bjh21  bExpBigger:
   3723   1.1     bjh21     if ( bExp == 0x7FFF ) {
   3724   1.1     bjh21         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3725   1.1     bjh21         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3726   1.1     bjh21     }
   3727   1.1     bjh21     if ( aExp == 0 ) ++expDiff;
   3728   1.1     bjh21     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
   3729   1.1     bjh21  bBigger:
   3730   1.1     bjh21     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
   3731   1.1     bjh21     zExp = bExp;
   3732   1.1     bjh21     zSign ^= 1;
   3733   1.1     bjh21     goto normalizeRoundAndPack;
   3734   1.1     bjh21  aExpBigger:
   3735   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3736   1.1     bjh21         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3737   1.1     bjh21         return a;
   3738   1.1     bjh21     }
   3739   1.1     bjh21     if ( bExp == 0 ) --expDiff;
   3740   1.1     bjh21     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
   3741   1.1     bjh21  aBigger:
   3742   1.1     bjh21     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
   3743   1.1     bjh21     zExp = aExp;
   3744   1.1     bjh21  normalizeRoundAndPack:
   3745   1.1     bjh21     return
   3746   1.1     bjh21         normalizeRoundAndPackFloatx80(
   3747   1.1     bjh21             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3748   1.1     bjh21 
   3749   1.1     bjh21 }
   3750   1.1     bjh21 
   3751   1.1     bjh21 /*
   3752   1.1     bjh21 -------------------------------------------------------------------------------
   3753   1.1     bjh21 Returns the result of adding the extended double-precision floating-point
   3754   1.1     bjh21 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   3755   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   3756   1.1     bjh21 -------------------------------------------------------------------------------
   3757   1.1     bjh21 */
   3758   1.1     bjh21 floatx80 floatx80_add( floatx80 a, floatx80 b )
   3759   1.1     bjh21 {
   3760   1.1     bjh21     flag aSign, bSign;
   3761   1.1     bjh21 
   3762   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3763   1.1     bjh21     bSign = extractFloatx80Sign( b );
   3764   1.1     bjh21     if ( aSign == bSign ) {
   3765   1.1     bjh21         return addFloatx80Sigs( a, b, aSign );
   3766   1.1     bjh21     }
   3767   1.1     bjh21     else {
   3768   1.1     bjh21         return subFloatx80Sigs( a, b, aSign );
   3769   1.1     bjh21     }
   3770   1.1     bjh21 
   3771   1.1     bjh21 }
   3772   1.1     bjh21 
   3773   1.1     bjh21 /*
   3774   1.1     bjh21 -------------------------------------------------------------------------------
   3775   1.1     bjh21 Returns the result of subtracting the extended double-precision floating-
   3776   1.1     bjh21 point values `a' and `b'.  The operation is performed according to the
   3777   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3778   1.1     bjh21 -------------------------------------------------------------------------------
   3779   1.1     bjh21 */
   3780   1.1     bjh21 floatx80 floatx80_sub( floatx80 a, floatx80 b )
   3781   1.1     bjh21 {
   3782   1.1     bjh21     flag aSign, bSign;
   3783   1.1     bjh21 
   3784   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3785   1.1     bjh21     bSign = extractFloatx80Sign( b );
   3786   1.1     bjh21     if ( aSign == bSign ) {
   3787   1.1     bjh21         return subFloatx80Sigs( a, b, aSign );
   3788   1.1     bjh21     }
   3789   1.1     bjh21     else {
   3790   1.1     bjh21         return addFloatx80Sigs( a, b, aSign );
   3791   1.1     bjh21     }
   3792   1.1     bjh21 
   3793   1.1     bjh21 }
   3794   1.1     bjh21 
   3795   1.1     bjh21 /*
   3796   1.1     bjh21 -------------------------------------------------------------------------------
   3797   1.1     bjh21 Returns the result of multiplying the extended double-precision floating-
   3798   1.1     bjh21 point values `a' and `b'.  The operation is performed according to the
   3799   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3800   1.1     bjh21 -------------------------------------------------------------------------------
   3801   1.1     bjh21 */
   3802   1.1     bjh21 floatx80 floatx80_mul( floatx80 a, floatx80 b )
   3803   1.1     bjh21 {
   3804   1.1     bjh21     flag aSign, bSign, zSign;
   3805   1.1     bjh21     int32 aExp, bExp, zExp;
   3806   1.1     bjh21     bits64 aSig, bSig, zSig0, zSig1;
   3807   1.1     bjh21     floatx80 z;
   3808   1.1     bjh21 
   3809   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3810   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3811   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3812   1.1     bjh21     bSig = extractFloatx80Frac( b );
   3813   1.1     bjh21     bExp = extractFloatx80Exp( b );
   3814   1.1     bjh21     bSign = extractFloatx80Sign( b );
   3815   1.1     bjh21     zSign = aSign ^ bSign;
   3816   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3817   1.1     bjh21         if (    (bits64) ( aSig<<1 )
   3818   1.1     bjh21              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3819   1.1     bjh21             return propagateFloatx80NaN( a, b );
   3820   1.1     bjh21         }
   3821   1.1     bjh21         if ( ( bExp | bSig ) == 0 ) goto invalid;
   3822   1.1     bjh21         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3823   1.1     bjh21     }
   3824   1.1     bjh21     if ( bExp == 0x7FFF ) {
   3825   1.1     bjh21         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3826   1.1     bjh21         if ( ( aExp | aSig ) == 0 ) {
   3827   1.1     bjh21  invalid:
   3828   1.1     bjh21             float_raise( float_flag_invalid );
   3829   1.1     bjh21             z.low = floatx80_default_nan_low;
   3830  1.16       nat             z.high = floatx80_default_nan_high<<X80SHIFT;
   3831   1.1     bjh21             return z;
   3832   1.1     bjh21         }
   3833   1.1     bjh21         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3834   1.1     bjh21     }
   3835   1.1     bjh21     if ( aExp == 0 ) {
   3836   1.1     bjh21         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3837   1.1     bjh21         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3838   1.1     bjh21     }
   3839   1.1     bjh21     if ( bExp == 0 ) {
   3840   1.1     bjh21         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3841   1.1     bjh21         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3842   1.1     bjh21     }
   3843   1.1     bjh21     zExp = aExp + bExp - 0x3FFE;
   3844   1.1     bjh21     mul64To128( aSig, bSig, &zSig0, &zSig1 );
   3845   1.1     bjh21     if ( 0 < (sbits64) zSig0 ) {
   3846   1.1     bjh21         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
   3847   1.1     bjh21         --zExp;
   3848   1.1     bjh21     }
   3849   1.1     bjh21     return
   3850   1.1     bjh21         roundAndPackFloatx80(
   3851   1.1     bjh21             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3852   1.1     bjh21 
   3853   1.1     bjh21 }
   3854   1.1     bjh21 
   3855   1.1     bjh21 /*
   3856   1.1     bjh21 -------------------------------------------------------------------------------
   3857   1.1     bjh21 Returns the result of dividing the extended double-precision floating-point
   3858   1.1     bjh21 value `a' by the corresponding value `b'.  The operation is performed
   3859   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3860   1.1     bjh21 -------------------------------------------------------------------------------
   3861   1.1     bjh21 */
   3862   1.1     bjh21 floatx80 floatx80_div( floatx80 a, floatx80 b )
   3863   1.1     bjh21 {
   3864   1.1     bjh21     flag aSign, bSign, zSign;
   3865   1.1     bjh21     int32 aExp, bExp, zExp;
   3866   1.1     bjh21     bits64 aSig, bSig, zSig0, zSig1;
   3867   1.1     bjh21     bits64 rem0, rem1, rem2, term0, term1, term2;
   3868   1.1     bjh21     floatx80 z;
   3869   1.1     bjh21 
   3870   1.1     bjh21     aSig = extractFloatx80Frac( a );
   3871   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3872   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3873   1.1     bjh21     bSig = extractFloatx80Frac( b );
   3874   1.1     bjh21     bExp = extractFloatx80Exp( b );
   3875   1.1     bjh21     bSign = extractFloatx80Sign( b );
   3876   1.1     bjh21     zSign = aSign ^ bSign;
   3877   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3878   1.1     bjh21         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3879   1.1     bjh21         if ( bExp == 0x7FFF ) {
   3880   1.1     bjh21             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3881   1.1     bjh21             goto invalid;
   3882   1.1     bjh21         }
   3883   1.1     bjh21         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3884   1.1     bjh21     }
   3885   1.1     bjh21     if ( bExp == 0x7FFF ) {
   3886   1.1     bjh21         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3887   1.1     bjh21         return packFloatx80( zSign, 0, 0 );
   3888   1.1     bjh21     }
   3889   1.1     bjh21     if ( bExp == 0 ) {
   3890   1.1     bjh21         if ( bSig == 0 ) {
   3891   1.1     bjh21             if ( ( aExp | aSig ) == 0 ) {
   3892   1.1     bjh21  invalid:
   3893   1.1     bjh21                 float_raise( float_flag_invalid );
   3894   1.1     bjh21                 z.low = floatx80_default_nan_low;
   3895  1.16       nat                 z.high = floatx80_default_nan_high<<X80SHIFT;
   3896   1.1     bjh21                 return z;
   3897   1.1     bjh21             }
   3898   1.1     bjh21             float_raise( float_flag_divbyzero );
   3899   1.1     bjh21             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   3900   1.1     bjh21         }
   3901   1.1     bjh21         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3902   1.1     bjh21     }
   3903   1.1     bjh21     if ( aExp == 0 ) {
   3904   1.1     bjh21         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
   3905   1.1     bjh21         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
   3906   1.1     bjh21     }
   3907   1.1     bjh21     zExp = aExp - bExp + 0x3FFE;
   3908   1.1     bjh21     rem1 = 0;
   3909   1.1     bjh21     if ( bSig <= aSig ) {
   3910   1.1     bjh21         shift128Right( aSig, 0, 1, &aSig, &rem1 );
   3911   1.1     bjh21         ++zExp;
   3912   1.1     bjh21     }
   3913   1.1     bjh21     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
   3914   1.1     bjh21     mul64To128( bSig, zSig0, &term0, &term1 );
   3915   1.1     bjh21     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
   3916   1.1     bjh21     while ( (sbits64) rem0 < 0 ) {
   3917   1.1     bjh21         --zSig0;
   3918   1.1     bjh21         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
   3919   1.1     bjh21     }
   3920   1.1     bjh21     zSig1 = estimateDiv128To64( rem1, 0, bSig );
   3921   1.1     bjh21     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
   3922   1.1     bjh21         mul64To128( bSig, zSig1, &term1, &term2 );
   3923   1.1     bjh21         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   3924   1.1     bjh21         while ( (sbits64) rem1 < 0 ) {
   3925   1.1     bjh21             --zSig1;
   3926   1.1     bjh21             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
   3927   1.1     bjh21         }
   3928   1.1     bjh21         zSig1 |= ( ( rem1 | rem2 ) != 0 );
   3929   1.1     bjh21     }
   3930   1.1     bjh21     return
   3931   1.1     bjh21         roundAndPackFloatx80(
   3932   1.1     bjh21             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
   3933   1.1     bjh21 
   3934   1.1     bjh21 }
   3935   1.1     bjh21 
   3936   1.1     bjh21 /*
   3937   1.1     bjh21 -------------------------------------------------------------------------------
   3938   1.1     bjh21 Returns the remainder of the extended double-precision floating-point value
   3939   1.1     bjh21 `a' with respect to the corresponding value `b'.  The operation is performed
   3940   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   3941   1.1     bjh21 -------------------------------------------------------------------------------
   3942   1.1     bjh21 */
   3943   1.1     bjh21 floatx80 floatx80_rem( floatx80 a, floatx80 b )
   3944   1.1     bjh21 {
   3945  1.14    martin     flag aSign, zSign;
   3946   1.1     bjh21     int32 aExp, bExp, expDiff;
   3947   1.1     bjh21     bits64 aSig0, aSig1, bSig;
   3948   1.1     bjh21     bits64 q, term0, term1, alternateASig0, alternateASig1;
   3949   1.1     bjh21     floatx80 z;
   3950   1.1     bjh21 
   3951   1.1     bjh21     aSig0 = extractFloatx80Frac( a );
   3952   1.1     bjh21     aExp = extractFloatx80Exp( a );
   3953   1.1     bjh21     aSign = extractFloatx80Sign( a );
   3954   1.1     bjh21     bSig = extractFloatx80Frac( b );
   3955   1.1     bjh21     bExp = extractFloatx80Exp( b );
   3956  1.14    martin 
   3957   1.1     bjh21     if ( aExp == 0x7FFF ) {
   3958   1.1     bjh21         if (    (bits64) ( aSig0<<1 )
   3959   1.1     bjh21              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
   3960   1.1     bjh21             return propagateFloatx80NaN( a, b );
   3961   1.1     bjh21         }
   3962   1.1     bjh21         goto invalid;
   3963   1.1     bjh21     }
   3964   1.1     bjh21     if ( bExp == 0x7FFF ) {
   3965   1.1     bjh21         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
   3966   1.1     bjh21         return a;
   3967   1.1     bjh21     }
   3968   1.1     bjh21     if ( bExp == 0 ) {
   3969   1.1     bjh21         if ( bSig == 0 ) {
   3970   1.1     bjh21  invalid:
   3971   1.1     bjh21             float_raise( float_flag_invalid );
   3972   1.1     bjh21             z.low = floatx80_default_nan_low;
   3973  1.16       nat             z.high = floatx80_default_nan_high<<X80SHIFT;
   3974   1.1     bjh21             return z;
   3975   1.1     bjh21         }
   3976   1.1     bjh21         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
   3977   1.1     bjh21     }
   3978   1.1     bjh21     if ( aExp == 0 ) {
   3979   1.1     bjh21         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
   3980   1.1     bjh21         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   3981   1.1     bjh21     }
   3982   1.1     bjh21     bSig |= LIT64( 0x8000000000000000 );
   3983   1.1     bjh21     zSign = aSign;
   3984   1.1     bjh21     expDiff = aExp - bExp;
   3985   1.1     bjh21     aSig1 = 0;
   3986   1.1     bjh21     if ( expDiff < 0 ) {
   3987   1.1     bjh21         if ( expDiff < -1 ) return a;
   3988   1.1     bjh21         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
   3989   1.1     bjh21         expDiff = 0;
   3990   1.1     bjh21     }
   3991   1.1     bjh21     q = ( bSig <= aSig0 );
   3992   1.1     bjh21     if ( q ) aSig0 -= bSig;
   3993   1.1     bjh21     expDiff -= 64;
   3994   1.1     bjh21     while ( 0 < expDiff ) {
   3995   1.1     bjh21         q = estimateDiv128To64( aSig0, aSig1, bSig );
   3996   1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   3997   1.1     bjh21         mul64To128( bSig, q, &term0, &term1 );
   3998   1.1     bjh21         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   3999   1.1     bjh21         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
   4000   1.1     bjh21         expDiff -= 62;
   4001   1.1     bjh21     }
   4002   1.1     bjh21     expDiff += 64;
   4003   1.1     bjh21     if ( 0 < expDiff ) {
   4004   1.1     bjh21         q = estimateDiv128To64( aSig0, aSig1, bSig );
   4005   1.1     bjh21         q = ( 2 < q ) ? q - 2 : 0;
   4006   1.1     bjh21         q >>= 64 - expDiff;
   4007   1.1     bjh21         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
   4008   1.1     bjh21         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4009   1.1     bjh21         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
   4010   1.1     bjh21         while ( le128( term0, term1, aSig0, aSig1 ) ) {
   4011   1.1     bjh21             ++q;
   4012   1.1     bjh21             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
   4013   1.1     bjh21         }
   4014   1.1     bjh21     }
   4015   1.1     bjh21     else {
   4016   1.1     bjh21         term1 = 0;
   4017   1.1     bjh21         term0 = bSig;
   4018   1.1     bjh21     }
   4019   1.1     bjh21     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
   4020   1.1     bjh21     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4021   1.1     bjh21          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
   4022   1.1     bjh21               && ( q & 1 ) )
   4023   1.1     bjh21        ) {
   4024   1.1     bjh21         aSig0 = alternateASig0;
   4025   1.1     bjh21         aSig1 = alternateASig1;
   4026   1.1     bjh21         zSign = ! zSign;
   4027   1.1     bjh21     }
   4028   1.1     bjh21     return
   4029   1.1     bjh21         normalizeRoundAndPackFloatx80(
   4030   1.1     bjh21             80, zSign, bExp + expDiff, aSig0, aSig1 );
   4031   1.1     bjh21 
   4032   1.1     bjh21 }
   4033   1.1     bjh21 
   4034   1.1     bjh21 /*
   4035   1.1     bjh21 -------------------------------------------------------------------------------
   4036   1.1     bjh21 Returns the square root of the extended double-precision floating-point
   4037   1.1     bjh21 value `a'.  The operation is performed according to the IEC/IEEE Standard
   4038   1.1     bjh21 for Binary Floating-Point Arithmetic.
   4039   1.1     bjh21 -------------------------------------------------------------------------------
   4040   1.1     bjh21 */
   4041   1.1     bjh21 floatx80 floatx80_sqrt( floatx80 a )
   4042   1.1     bjh21 {
   4043   1.1     bjh21     flag aSign;
   4044   1.1     bjh21     int32 aExp, zExp;
   4045   1.1     bjh21     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
   4046   1.1     bjh21     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   4047   1.1     bjh21     floatx80 z;
   4048   1.1     bjh21 
   4049   1.1     bjh21     aSig0 = extractFloatx80Frac( a );
   4050   1.1     bjh21     aExp = extractFloatx80Exp( a );
   4051   1.1     bjh21     aSign = extractFloatx80Sign( a );
   4052   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4053   1.1     bjh21         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
   4054   1.1     bjh21         if ( ! aSign ) return a;
   4055   1.1     bjh21         goto invalid;
   4056   1.1     bjh21     }
   4057   1.1     bjh21     if ( aSign ) {
   4058   1.1     bjh21         if ( ( aExp | aSig0 ) == 0 ) return a;
   4059   1.1     bjh21  invalid:
   4060   1.1     bjh21         float_raise( float_flag_invalid );
   4061   1.1     bjh21         z.low = floatx80_default_nan_low;
   4062  1.16       nat         z.high = floatx80_default_nan_high<<X80SHIFT;
   4063   1.1     bjh21         return z;
   4064   1.1     bjh21     }
   4065   1.1     bjh21     if ( aExp == 0 ) {
   4066   1.1     bjh21         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
   4067   1.1     bjh21         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
   4068   1.1     bjh21     }
   4069   1.1     bjh21     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
   4070   1.1     bjh21     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
   4071   1.1     bjh21     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
   4072   1.1     bjh21     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   4073   1.1     bjh21     doubleZSig0 = zSig0<<1;
   4074   1.1     bjh21     mul64To128( zSig0, zSig0, &term0, &term1 );
   4075   1.1     bjh21     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   4076   1.1     bjh21     while ( (sbits64) rem0 < 0 ) {
   4077   1.1     bjh21         --zSig0;
   4078   1.1     bjh21         doubleZSig0 -= 2;
   4079   1.1     bjh21         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   4080   1.1     bjh21     }
   4081   1.1     bjh21     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   4082   1.1     bjh21     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
   4083   1.1     bjh21         if ( zSig1 == 0 ) zSig1 = 1;
   4084   1.1     bjh21         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   4085   1.1     bjh21         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   4086   1.1     bjh21         mul64To128( zSig1, zSig1, &term2, &term3 );
   4087   1.1     bjh21         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   4088   1.1     bjh21         while ( (sbits64) rem1 < 0 ) {
   4089   1.1     bjh21             --zSig1;
   4090   1.1     bjh21             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   4091   1.1     bjh21             term3 |= 1;
   4092   1.1     bjh21             term2 |= doubleZSig0;
   4093   1.1     bjh21             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   4094   1.1     bjh21         }
   4095   1.1     bjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   4096   1.1     bjh21     }
   4097   1.1     bjh21     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
   4098   1.1     bjh21     zSig0 |= doubleZSig0;
   4099   1.1     bjh21     return
   4100   1.1     bjh21         roundAndPackFloatx80(
   4101   1.1     bjh21             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
   4102   1.1     bjh21 
   4103   1.1     bjh21 }
   4104   1.1     bjh21 
   4105   1.1     bjh21 /*
   4106   1.1     bjh21 -------------------------------------------------------------------------------
   4107   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is
   4108   1.1     bjh21 equal to the corresponding value `b', and 0 otherwise.  The comparison is
   4109   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
   4110   1.1     bjh21 Arithmetic.
   4111   1.1     bjh21 -------------------------------------------------------------------------------
   4112   1.1     bjh21 */
   4113   1.1     bjh21 flag floatx80_eq( floatx80 a, floatx80 b )
   4114   1.1     bjh21 {
   4115   1.1     bjh21 
   4116   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4117   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4118   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4119   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4120   1.1     bjh21        ) {
   4121   1.1     bjh21         if (    floatx80_is_signaling_nan( a )
   4122   1.1     bjh21              || floatx80_is_signaling_nan( b ) ) {
   4123   1.1     bjh21             float_raise( float_flag_invalid );
   4124   1.1     bjh21         }
   4125   1.1     bjh21         return 0;
   4126   1.1     bjh21     }
   4127   1.1     bjh21     return
   4128   1.1     bjh21            ( a.low == b.low )
   4129   1.1     bjh21         && (    ( a.high == b.high )
   4130   1.1     bjh21              || (    ( a.low == 0 )
   4131  1.16       nat                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4132  1.16       nat 	   == 0 ) ) );
   4133   1.1     bjh21 
   4134   1.1     bjh21 }
   4135   1.1     bjh21 
   4136   1.1     bjh21 /*
   4137   1.1     bjh21 -------------------------------------------------------------------------------
   4138   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is
   4139   1.1     bjh21 less than or equal to the corresponding value `b', and 0 otherwise.  The
   4140   1.1     bjh21 comparison is performed according to the IEC/IEEE Standard for Binary
   4141   1.1     bjh21 Floating-Point Arithmetic.
   4142   1.1     bjh21 -------------------------------------------------------------------------------
   4143   1.1     bjh21 */
   4144   1.1     bjh21 flag floatx80_le( floatx80 a, floatx80 b )
   4145   1.1     bjh21 {
   4146   1.1     bjh21     flag aSign, bSign;
   4147   1.1     bjh21 
   4148   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4149   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4150   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4151   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4152   1.1     bjh21        ) {
   4153   1.1     bjh21         float_raise( float_flag_invalid );
   4154   1.1     bjh21         return 0;
   4155   1.1     bjh21     }
   4156   1.1     bjh21     aSign = extractFloatx80Sign( a );
   4157   1.1     bjh21     bSign = extractFloatx80Sign( b );
   4158   1.1     bjh21     if ( aSign != bSign ) {
   4159   1.1     bjh21         return
   4160   1.1     bjh21                aSign
   4161  1.16       nat             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4162  1.16       nat 	         <<1 ) ) | a.low | b.low ) == 0 );
   4163   1.1     bjh21     }
   4164   1.1     bjh21     return
   4165  1.16       nat           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4166  1.16       nat         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4167   1.1     bjh21 
   4168   1.1     bjh21 }
   4169   1.1     bjh21 
   4170   1.1     bjh21 /*
   4171   1.1     bjh21 -------------------------------------------------------------------------------
   4172   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is
   4173   1.1     bjh21 less than the corresponding value `b', and 0 otherwise.  The comparison
   4174   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4175   1.1     bjh21 Arithmetic.
   4176   1.1     bjh21 -------------------------------------------------------------------------------
   4177   1.1     bjh21 */
   4178   1.1     bjh21 flag floatx80_lt( floatx80 a, floatx80 b )
   4179   1.1     bjh21 {
   4180   1.1     bjh21     flag aSign, bSign;
   4181   1.1     bjh21 
   4182   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4183   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4184   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4185   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4186   1.1     bjh21        ) {
   4187   1.1     bjh21         float_raise( float_flag_invalid );
   4188   1.1     bjh21         return 0;
   4189   1.1     bjh21     }
   4190   1.1     bjh21     aSign = extractFloatx80Sign( a );
   4191   1.1     bjh21     bSign = extractFloatx80Sign( b );
   4192   1.1     bjh21     if ( aSign != bSign ) {
   4193   1.1     bjh21         return
   4194   1.1     bjh21                aSign
   4195  1.16       nat             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)
   4196  1.16       nat 		 <<1 ) ) | a.low | b.low ) != 0 );
   4197   1.1     bjh21     }
   4198   1.1     bjh21     return
   4199  1.16       nat           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4200  1.16       nat         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4201   1.1     bjh21 
   4202   1.1     bjh21 }
   4203   1.1     bjh21 
   4204   1.1     bjh21 /*
   4205   1.1     bjh21 -------------------------------------------------------------------------------
   4206   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is equal
   4207   1.1     bjh21 to the corresponding value `b', and 0 otherwise.  The invalid exception is
   4208   1.1     bjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
   4209   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4210   1.1     bjh21 -------------------------------------------------------------------------------
   4211   1.1     bjh21 */
   4212   1.1     bjh21 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
   4213   1.1     bjh21 {
   4214   1.1     bjh21 
   4215   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4216   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4217   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4218   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4219   1.1     bjh21        ) {
   4220   1.1     bjh21         float_raise( float_flag_invalid );
   4221   1.1     bjh21         return 0;
   4222   1.1     bjh21     }
   4223   1.1     bjh21     return
   4224   1.1     bjh21            ( a.low == b.low )
   4225  1.16       nat         && (    ( (a.high>>X80SHIFT) == (b.high>>X80SHIFT) )
   4226   1.1     bjh21              || (    ( a.low == 0 )
   4227  1.16       nat                   && ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4228  1.16       nat 	    == 0 ) ) );
   4229   1.1     bjh21 
   4230   1.1     bjh21 }
   4231   1.1     bjh21 
   4232   1.1     bjh21 /*
   4233   1.1     bjh21 -------------------------------------------------------------------------------
   4234   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is less
   4235   1.1     bjh21 than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
   4236   1.1     bjh21 do not cause an exception.  Otherwise, the comparison is performed according
   4237   1.1     bjh21 to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4238   1.1     bjh21 -------------------------------------------------------------------------------
   4239   1.1     bjh21 */
   4240   1.1     bjh21 flag floatx80_le_quiet( floatx80 a, floatx80 b )
   4241   1.1     bjh21 {
   4242   1.1     bjh21     flag aSign, bSign;
   4243   1.1     bjh21 
   4244   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4245   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4246   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4247   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4248   1.1     bjh21        ) {
   4249   1.1     bjh21         if (    floatx80_is_signaling_nan( a )
   4250   1.1     bjh21              || floatx80_is_signaling_nan( b ) ) {
   4251   1.1     bjh21             float_raise( float_flag_invalid );
   4252   1.1     bjh21         }
   4253   1.1     bjh21         return 0;
   4254   1.1     bjh21     }
   4255   1.1     bjh21     aSign = extractFloatx80Sign( a );
   4256   1.1     bjh21     bSign = extractFloatx80Sign( b );
   4257   1.1     bjh21     if ( aSign != bSign ) {
   4258   1.1     bjh21         return
   4259   1.1     bjh21                aSign
   4260  1.16       nat             || (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1
   4261  1.16       nat 		 ) ) | a.low | b.low ) == 0 );
   4262   1.1     bjh21     }
   4263   1.1     bjh21     return
   4264  1.16       nat           aSign ? le128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4265  1.16       nat         : le128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4266   1.1     bjh21 
   4267   1.1     bjh21 }
   4268   1.1     bjh21 
   4269   1.1     bjh21 /*
   4270   1.1     bjh21 -------------------------------------------------------------------------------
   4271   1.1     bjh21 Returns 1 if the extended double-precision floating-point value `a' is less
   4272   1.1     bjh21 than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
   4273   1.1     bjh21 an exception.  Otherwise, the comparison is performed according to the
   4274   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   4275   1.1     bjh21 -------------------------------------------------------------------------------
   4276   1.1     bjh21 */
   4277   1.1     bjh21 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
   4278   1.1     bjh21 {
   4279   1.1     bjh21     flag aSign, bSign;
   4280   1.1     bjh21 
   4281   1.1     bjh21     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
   4282   1.1     bjh21               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
   4283   1.1     bjh21          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
   4284   1.1     bjh21               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
   4285   1.1     bjh21        ) {
   4286   1.1     bjh21         if (    floatx80_is_signaling_nan( a )
   4287   1.1     bjh21              || floatx80_is_signaling_nan( b ) ) {
   4288   1.1     bjh21             float_raise( float_flag_invalid );
   4289   1.1     bjh21         }
   4290   1.1     bjh21         return 0;
   4291   1.1     bjh21     }
   4292   1.1     bjh21     aSign = extractFloatx80Sign( a );
   4293   1.1     bjh21     bSign = extractFloatx80Sign( b );
   4294   1.1     bjh21     if ( aSign != bSign ) {
   4295   1.1     bjh21         return
   4296   1.1     bjh21                aSign
   4297  1.16       nat             && (    ( ( (bits16) ( ((bits32)( a.high | b.high )>>X80SHIFT)<<1 )
   4298  1.16       nat 		 ) | a.low | b.low ) != 0 );
   4299   1.1     bjh21     }
   4300   1.1     bjh21     return
   4301  1.16       nat           aSign ? lt128( b.high>>X80SHIFT, b.low, a.high>>X80SHIFT, a.low )
   4302  1.16       nat         : lt128( a.high>>X80SHIFT, a.low, b.high>>X80SHIFT, b.low );
   4303   1.1     bjh21 
   4304   1.1     bjh21 }
   4305   1.1     bjh21 
   4306   1.1     bjh21 #endif
   4307   1.1     bjh21 
   4308   1.1     bjh21 #ifdef FLOAT128
   4309   1.1     bjh21 
   4310   1.1     bjh21 /*
   4311   1.1     bjh21 -------------------------------------------------------------------------------
   4312   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4313   1.1     bjh21 value `a' to the 32-bit two's complement integer format.  The conversion
   4314   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4315   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   4316   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   4317   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   4318   1.1     bjh21 largest integer with the same sign as `a' is returned.
   4319   1.1     bjh21 -------------------------------------------------------------------------------
   4320   1.1     bjh21 */
   4321   1.1     bjh21 int32 float128_to_int32( float128 a )
   4322   1.1     bjh21 {
   4323   1.1     bjh21     flag aSign;
   4324   1.1     bjh21     int32 aExp, shiftCount;
   4325   1.1     bjh21     bits64 aSig0, aSig1;
   4326   1.1     bjh21 
   4327   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4328   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4329   1.1     bjh21     aExp = extractFloat128Exp( a );
   4330   1.1     bjh21     aSign = extractFloat128Sign( a );
   4331   1.1     bjh21     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   4332   1.1     bjh21     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4333   1.1     bjh21     aSig0 |= ( aSig1 != 0 );
   4334   1.1     bjh21     shiftCount = 0x4028 - aExp;
   4335   1.1     bjh21     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
   4336   1.1     bjh21     return roundAndPackInt32( aSign, aSig0 );
   4337   1.1     bjh21 
   4338   1.1     bjh21 }
   4339   1.1     bjh21 
   4340   1.1     bjh21 /*
   4341   1.1     bjh21 -------------------------------------------------------------------------------
   4342   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4343   1.1     bjh21 value `a' to the 32-bit two's complement integer format.  The conversion
   4344   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4345   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.  If
   4346   1.1     bjh21 `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
   4347   1.1     bjh21 conversion overflows, the largest integer with the same sign as `a' is
   4348   1.1     bjh21 returned.
   4349   1.1     bjh21 -------------------------------------------------------------------------------
   4350   1.1     bjh21 */
   4351   1.1     bjh21 int32 float128_to_int32_round_to_zero( float128 a )
   4352   1.1     bjh21 {
   4353   1.1     bjh21     flag aSign;
   4354   1.1     bjh21     int32 aExp, shiftCount;
   4355   1.1     bjh21     bits64 aSig0, aSig1, savedASig;
   4356   1.1     bjh21     int32 z;
   4357   1.1     bjh21 
   4358   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4359   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4360   1.1     bjh21     aExp = extractFloat128Exp( a );
   4361   1.1     bjh21     aSign = extractFloat128Sign( a );
   4362   1.1     bjh21     aSig0 |= ( aSig1 != 0 );
   4363   1.1     bjh21     if ( 0x401E < aExp ) {
   4364   1.1     bjh21         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
   4365   1.1     bjh21         goto invalid;
   4366   1.1     bjh21     }
   4367   1.1     bjh21     else if ( aExp < 0x3FFF ) {
   4368  1.12      matt         if ( aExp || aSig0 ) set_float_exception_inexact_flag();
   4369   1.1     bjh21         return 0;
   4370   1.1     bjh21     }
   4371   1.1     bjh21     aSig0 |= LIT64( 0x0001000000000000 );
   4372   1.1     bjh21     shiftCount = 0x402F - aExp;
   4373   1.1     bjh21     savedASig = aSig0;
   4374   1.1     bjh21     aSig0 >>= shiftCount;
   4375  1.10  christos     z = (int32)aSig0;
   4376   1.1     bjh21     if ( aSign ) z = - z;
   4377   1.1     bjh21     if ( ( z < 0 ) ^ aSign ) {
   4378   1.1     bjh21  invalid:
   4379   1.1     bjh21         float_raise( float_flag_invalid );
   4380   1.1     bjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   4381   1.1     bjh21     }
   4382   1.1     bjh21     if ( ( aSig0<<shiftCount ) != savedASig ) {
   4383  1.12      matt         set_float_exception_inexact_flag();
   4384   1.1     bjh21     }
   4385   1.1     bjh21     return z;
   4386   1.1     bjh21 
   4387   1.1     bjh21 }
   4388   1.1     bjh21 
   4389   1.1     bjh21 /*
   4390   1.1     bjh21 -------------------------------------------------------------------------------
   4391   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4392   1.1     bjh21 value `a' to the 64-bit two's complement integer format.  The conversion
   4393   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4394   1.1     bjh21 Arithmetic---which means in particular that the conversion is rounded
   4395   1.1     bjh21 according to the current rounding mode.  If `a' is a NaN, the largest
   4396   1.1     bjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
   4397   1.1     bjh21 largest integer with the same sign as `a' is returned.
   4398   1.1     bjh21 -------------------------------------------------------------------------------
   4399   1.1     bjh21 */
   4400   1.1     bjh21 int64 float128_to_int64( float128 a )
   4401   1.1     bjh21 {
   4402   1.1     bjh21     flag aSign;
   4403   1.1     bjh21     int32 aExp, shiftCount;
   4404   1.1     bjh21     bits64 aSig0, aSig1;
   4405   1.1     bjh21 
   4406   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4407   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4408   1.1     bjh21     aExp = extractFloat128Exp( a );
   4409   1.1     bjh21     aSign = extractFloat128Sign( a );
   4410   1.1     bjh21     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4411   1.1     bjh21     shiftCount = 0x402F - aExp;
   4412   1.1     bjh21     if ( shiftCount <= 0 ) {
   4413   1.1     bjh21         if ( 0x403E < aExp ) {
   4414   1.1     bjh21             float_raise( float_flag_invalid );
   4415   1.1     bjh21             if (    ! aSign
   4416   1.1     bjh21                  || (    ( aExp == 0x7FFF )
   4417   1.1     bjh21                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
   4418   1.1     bjh21                     )
   4419   1.1     bjh21                ) {
   4420   1.1     bjh21                 return LIT64( 0x7FFFFFFFFFFFFFFF );
   4421   1.1     bjh21             }
   4422   1.1     bjh21             return (sbits64) LIT64( 0x8000000000000000 );
   4423   1.1     bjh21         }
   4424   1.1     bjh21         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
   4425   1.1     bjh21     }
   4426   1.1     bjh21     else {
   4427   1.1     bjh21         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
   4428   1.1     bjh21     }
   4429   1.1     bjh21     return roundAndPackInt64( aSign, aSig0, aSig1 );
   4430   1.1     bjh21 
   4431   1.1     bjh21 }
   4432   1.1     bjh21 
   4433   1.1     bjh21 /*
   4434   1.1     bjh21 -------------------------------------------------------------------------------
   4435   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4436   1.1     bjh21 value `a' to the 64-bit two's complement integer format.  The conversion
   4437   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4438   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.
   4439   1.1     bjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   4440   1.1     bjh21 the conversion overflows, the largest integer with the same sign as `a' is
   4441   1.1     bjh21 returned.
   4442   1.1     bjh21 -------------------------------------------------------------------------------
   4443   1.1     bjh21 */
   4444   1.1     bjh21 int64 float128_to_int64_round_to_zero( float128 a )
   4445   1.1     bjh21 {
   4446   1.1     bjh21     flag aSign;
   4447   1.1     bjh21     int32 aExp, shiftCount;
   4448   1.1     bjh21     bits64 aSig0, aSig1;
   4449   1.1     bjh21     int64 z;
   4450   1.1     bjh21 
   4451   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4452   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4453   1.1     bjh21     aExp = extractFloat128Exp( a );
   4454   1.1     bjh21     aSign = extractFloat128Sign( a );
   4455   1.1     bjh21     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4456   1.1     bjh21     shiftCount = aExp - 0x402F;
   4457   1.1     bjh21     if ( 0 < shiftCount ) {
   4458   1.1     bjh21         if ( 0x403E <= aExp ) {
   4459   1.1     bjh21             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4460   1.1     bjh21             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4461   1.1     bjh21                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4462  1.12      matt                 if ( aSig1 ) set_float_exception_inexact_flag();
   4463   1.1     bjh21             }
   4464   1.1     bjh21             else {
   4465   1.1     bjh21                 float_raise( float_flag_invalid );
   4466   1.1     bjh21                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
   4467   1.1     bjh21                     return LIT64( 0x7FFFFFFFFFFFFFFF );
   4468   1.1     bjh21                 }
   4469   1.1     bjh21             }
   4470   1.1     bjh21             return (sbits64) LIT64( 0x8000000000000000 );
   4471   1.1     bjh21         }
   4472   1.1     bjh21         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4473   1.1     bjh21         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4474  1.12      matt             set_float_exception_inexact_flag();
   4475   1.1     bjh21         }
   4476   1.1     bjh21     }
   4477   1.1     bjh21     else {
   4478   1.1     bjh21         if ( aExp < 0x3FFF ) {
   4479   1.1     bjh21             if ( aExp | aSig0 | aSig1 ) {
   4480  1.12      matt                 set_float_exception_inexact_flag();
   4481   1.1     bjh21             }
   4482   1.1     bjh21             return 0;
   4483   1.1     bjh21         }
   4484   1.1     bjh21         z = aSig0>>( - shiftCount );
   4485   1.1     bjh21         if (    aSig1
   4486   1.1     bjh21              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4487  1.12      matt             set_float_exception_inexact_flag();
   4488   1.1     bjh21         }
   4489   1.1     bjh21     }
   4490   1.1     bjh21     if ( aSign ) z = - z;
   4491   1.1     bjh21     return z;
   4492   1.1     bjh21 
   4493   1.1     bjh21 }
   4494   1.1     bjh21 
   4495   1.8      matt #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
   4496   1.8      matt     && defined(SOFTFLOAT_NEED_FIXUNS)
   4497   1.1     bjh21 /*
   4498   1.4    martin  * just like above - but do not care for overflow of signed results
   4499   1.4    martin  */
   4500   1.4    martin uint64 float128_to_uint64_round_to_zero( float128 a )
   4501   1.4    martin {
   4502   1.4    martin     flag aSign;
   4503   1.4    martin     int32 aExp, shiftCount;
   4504   1.4    martin     bits64 aSig0, aSig1;
   4505   1.4    martin     uint64 z;
   4506   1.4    martin 
   4507   1.4    martin     aSig1 = extractFloat128Frac1( a );
   4508   1.4    martin     aSig0 = extractFloat128Frac0( a );
   4509   1.4    martin     aExp = extractFloat128Exp( a );
   4510   1.4    martin     aSign = extractFloat128Sign( a );
   4511   1.4    martin     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
   4512   1.4    martin     shiftCount = aExp - 0x402F;
   4513   1.4    martin     if ( 0 < shiftCount ) {
   4514   1.4    martin         if ( 0x403F <= aExp ) {
   4515   1.4    martin             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
   4516   1.4    martin             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
   4517   1.4    martin                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
   4518  1.12      matt                 if ( aSig1 ) set_float_exception_inexact_flag();
   4519   1.4    martin             }
   4520   1.4    martin             else {
   4521   1.4    martin                 float_raise( float_flag_invalid );
   4522   1.4    martin             }
   4523   1.5    martin             return LIT64( 0xFFFFFFFFFFFFFFFF );
   4524   1.4    martin         }
   4525   1.4    martin         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
   4526   1.4    martin         if ( (bits64) ( aSig1<<shiftCount ) ) {
   4527  1.12      matt             set_float_exception_inexact_flag();
   4528   1.4    martin         }
   4529   1.4    martin     }
   4530   1.4    martin     else {
   4531   1.4    martin         if ( aExp < 0x3FFF ) {
   4532   1.4    martin             if ( aExp | aSig0 | aSig1 ) {
   4533  1.12      matt                 set_float_exception_inexact_flag();
   4534   1.4    martin             }
   4535   1.4    martin             return 0;
   4536   1.4    martin         }
   4537   1.4    martin         z = aSig0>>( - shiftCount );
   4538   1.4    martin         if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
   4539  1.12      matt             set_float_exception_inexact_flag();
   4540   1.4    martin         }
   4541   1.4    martin     }
   4542   1.4    martin     if ( aSign ) z = - z;
   4543   1.4    martin     return z;
   4544   1.4    martin 
   4545   1.4    martin }
   4546   1.8      matt #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
   4547   1.4    martin 
   4548   1.4    martin /*
   4549   1.1     bjh21 -------------------------------------------------------------------------------
   4550   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4551   1.1     bjh21 value `a' to the single-precision floating-point format.  The conversion
   4552   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4553   1.1     bjh21 Arithmetic.
   4554   1.1     bjh21 -------------------------------------------------------------------------------
   4555   1.1     bjh21 */
   4556   1.1     bjh21 float32 float128_to_float32( float128 a )
   4557   1.1     bjh21 {
   4558   1.1     bjh21     flag aSign;
   4559   1.1     bjh21     int32 aExp;
   4560   1.1     bjh21     bits64 aSig0, aSig1;
   4561   1.1     bjh21     bits32 zSig;
   4562   1.1     bjh21 
   4563   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4564   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4565   1.1     bjh21     aExp = extractFloat128Exp( a );
   4566   1.1     bjh21     aSign = extractFloat128Sign( a );
   4567   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4568   1.1     bjh21         if ( aSig0 | aSig1 ) {
   4569   1.1     bjh21             return commonNaNToFloat32( float128ToCommonNaN( a ) );
   4570   1.1     bjh21         }
   4571   1.1     bjh21         return packFloat32( aSign, 0xFF, 0 );
   4572   1.1     bjh21     }
   4573   1.1     bjh21     aSig0 |= ( aSig1 != 0 );
   4574   1.1     bjh21     shift64RightJamming( aSig0, 18, &aSig0 );
   4575  1.10  christos     zSig = (bits32)aSig0;
   4576   1.1     bjh21     if ( aExp || zSig ) {
   4577   1.1     bjh21         zSig |= 0x40000000;
   4578   1.1     bjh21         aExp -= 0x3F81;
   4579   1.1     bjh21     }
   4580   1.1     bjh21     return roundAndPackFloat32( aSign, aExp, zSig );
   4581   1.1     bjh21 
   4582   1.1     bjh21 }
   4583   1.1     bjh21 
   4584   1.1     bjh21 /*
   4585   1.1     bjh21 -------------------------------------------------------------------------------
   4586   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4587   1.1     bjh21 value `a' to the double-precision floating-point format.  The conversion
   4588   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   4589   1.1     bjh21 Arithmetic.
   4590   1.1     bjh21 -------------------------------------------------------------------------------
   4591   1.1     bjh21 */
   4592   1.1     bjh21 float64 float128_to_float64( float128 a )
   4593   1.1     bjh21 {
   4594   1.1     bjh21     flag aSign;
   4595   1.1     bjh21     int32 aExp;
   4596   1.1     bjh21     bits64 aSig0, aSig1;
   4597   1.1     bjh21 
   4598   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4599   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4600   1.1     bjh21     aExp = extractFloat128Exp( a );
   4601   1.1     bjh21     aSign = extractFloat128Sign( a );
   4602   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4603   1.1     bjh21         if ( aSig0 | aSig1 ) {
   4604   1.1     bjh21             return commonNaNToFloat64( float128ToCommonNaN( a ) );
   4605   1.1     bjh21         }
   4606   1.1     bjh21         return packFloat64( aSign, 0x7FF, 0 );
   4607   1.1     bjh21     }
   4608   1.1     bjh21     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4609   1.1     bjh21     aSig0 |= ( aSig1 != 0 );
   4610   1.1     bjh21     if ( aExp || aSig0 ) {
   4611   1.1     bjh21         aSig0 |= LIT64( 0x4000000000000000 );
   4612   1.1     bjh21         aExp -= 0x3C01;
   4613   1.1     bjh21     }
   4614   1.1     bjh21     return roundAndPackFloat64( aSign, aExp, aSig0 );
   4615   1.1     bjh21 
   4616   1.1     bjh21 }
   4617   1.1     bjh21 
   4618   1.1     bjh21 #ifdef FLOATX80
   4619   1.1     bjh21 
   4620   1.1     bjh21 /*
   4621   1.1     bjh21 -------------------------------------------------------------------------------
   4622   1.1     bjh21 Returns the result of converting the quadruple-precision floating-point
   4623   1.1     bjh21 value `a' to the extended double-precision floating-point format.  The
   4624   1.1     bjh21 conversion is performed according to the IEC/IEEE Standard for Binary
   4625   1.1     bjh21 Floating-Point Arithmetic.
   4626   1.1     bjh21 -------------------------------------------------------------------------------
   4627   1.1     bjh21 */
   4628   1.1     bjh21 floatx80 float128_to_floatx80( float128 a )
   4629   1.1     bjh21 {
   4630   1.1     bjh21     flag aSign;
   4631   1.1     bjh21     int32 aExp;
   4632   1.1     bjh21     bits64 aSig0, aSig1;
   4633   1.1     bjh21 
   4634   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4635   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4636   1.1     bjh21     aExp = extractFloat128Exp( a );
   4637   1.1     bjh21     aSign = extractFloat128Sign( a );
   4638   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4639   1.1     bjh21         if ( aSig0 | aSig1 ) {
   4640   1.1     bjh21             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
   4641   1.1     bjh21         }
   4642   1.1     bjh21         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
   4643   1.1     bjh21     }
   4644   1.1     bjh21     if ( aExp == 0 ) {
   4645   1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
   4646   1.1     bjh21         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   4647   1.1     bjh21     }
   4648   1.1     bjh21     else {
   4649   1.1     bjh21         aSig0 |= LIT64( 0x0001000000000000 );
   4650   1.1     bjh21     }
   4651   1.1     bjh21     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
   4652   1.1     bjh21     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
   4653   1.1     bjh21 
   4654   1.1     bjh21 }
   4655   1.1     bjh21 
   4656   1.1     bjh21 #endif
   4657   1.1     bjh21 
   4658   1.1     bjh21 /*
   4659   1.1     bjh21 -------------------------------------------------------------------------------
   4660   1.1     bjh21 Rounds the quadruple-precision floating-point value `a' to an integer, and
   4661   1.1     bjh21 returns the result as a quadruple-precision floating-point value.  The
   4662   1.1     bjh21 operation is performed according to the IEC/IEEE Standard for Binary
   4663   1.1     bjh21 Floating-Point Arithmetic.
   4664   1.1     bjh21 -------------------------------------------------------------------------------
   4665   1.1     bjh21 */
   4666   1.1     bjh21 float128 float128_round_to_int( float128 a )
   4667   1.1     bjh21 {
   4668   1.1     bjh21     flag aSign;
   4669   1.1     bjh21     int32 aExp;
   4670   1.1     bjh21     bits64 lastBitMask, roundBitsMask;
   4671   1.1     bjh21     int8 roundingMode;
   4672   1.1     bjh21     float128 z;
   4673   1.1     bjh21 
   4674   1.1     bjh21     aExp = extractFloat128Exp( a );
   4675   1.1     bjh21     if ( 0x402F <= aExp ) {
   4676   1.1     bjh21         if ( 0x406F <= aExp ) {
   4677   1.1     bjh21             if (    ( aExp == 0x7FFF )
   4678   1.1     bjh21                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
   4679   1.1     bjh21                ) {
   4680   1.1     bjh21                 return propagateFloat128NaN( a, a );
   4681   1.1     bjh21             }
   4682   1.1     bjh21             return a;
   4683   1.1     bjh21         }
   4684   1.1     bjh21         lastBitMask = 1;
   4685   1.1     bjh21         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
   4686   1.1     bjh21         roundBitsMask = lastBitMask - 1;
   4687   1.1     bjh21         z = a;
   4688   1.1     bjh21         roundingMode = float_rounding_mode;
   4689   1.1     bjh21         if ( roundingMode == float_round_nearest_even ) {
   4690   1.1     bjh21             if ( lastBitMask ) {
   4691   1.1     bjh21                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   4692   1.1     bjh21                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   4693   1.1     bjh21             }
   4694   1.1     bjh21             else {
   4695   1.1     bjh21                 if ( (sbits64) z.low < 0 ) {
   4696   1.1     bjh21                     ++z.high;
   4697   1.1     bjh21                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
   4698   1.1     bjh21                 }
   4699   1.1     bjh21             }
   4700   1.1     bjh21         }
   4701   1.1     bjh21         else if ( roundingMode != float_round_to_zero ) {
   4702   1.1     bjh21             if (   extractFloat128Sign( z )
   4703   1.1     bjh21                  ^ ( roundingMode == float_round_up ) ) {
   4704   1.1     bjh21                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   4705   1.1     bjh21             }
   4706   1.1     bjh21         }
   4707   1.1     bjh21         z.low &= ~ roundBitsMask;
   4708   1.1     bjh21     }
   4709   1.1     bjh21     else {
   4710   1.1     bjh21         if ( aExp < 0x3FFF ) {
   4711   1.1     bjh21             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   4712  1.12      matt             set_float_exception_inexact_flag();
   4713   1.1     bjh21             aSign = extractFloat128Sign( a );
   4714   1.1     bjh21             switch ( float_rounding_mode ) {
   4715   1.1     bjh21              case float_round_nearest_even:
   4716   1.1     bjh21                 if (    ( aExp == 0x3FFE )
   4717   1.1     bjh21                      && (   extractFloat128Frac0( a )
   4718   1.1     bjh21                           | extractFloat128Frac1( a ) )
   4719   1.1     bjh21                    ) {
   4720   1.1     bjh21                     return packFloat128( aSign, 0x3FFF, 0, 0 );
   4721   1.1     bjh21                 }
   4722   1.1     bjh21                 break;
   4723   1.1     bjh21 	     case float_round_to_zero:
   4724   1.1     bjh21 		break;
   4725   1.1     bjh21              case float_round_down:
   4726   1.1     bjh21                 return
   4727   1.1     bjh21                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
   4728   1.1     bjh21                     : packFloat128( 0, 0, 0, 0 );
   4729   1.1     bjh21              case float_round_up:
   4730   1.1     bjh21                 return
   4731   1.1     bjh21                       aSign ? packFloat128( 1, 0, 0, 0 )
   4732   1.1     bjh21                     : packFloat128( 0, 0x3FFF, 0, 0 );
   4733   1.1     bjh21             }
   4734   1.1     bjh21             return packFloat128( aSign, 0, 0, 0 );
   4735   1.1     bjh21         }
   4736   1.1     bjh21         lastBitMask = 1;
   4737   1.1     bjh21         lastBitMask <<= 0x402F - aExp;
   4738   1.1     bjh21         roundBitsMask = lastBitMask - 1;
   4739   1.1     bjh21         z.low = 0;
   4740   1.1     bjh21         z.high = a.high;
   4741   1.1     bjh21         roundingMode = float_rounding_mode;
   4742   1.1     bjh21         if ( roundingMode == float_round_nearest_even ) {
   4743   1.1     bjh21             z.high += lastBitMask>>1;
   4744   1.1     bjh21             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   4745   1.1     bjh21                 z.high &= ~ lastBitMask;
   4746   1.1     bjh21             }
   4747   1.1     bjh21         }
   4748   1.1     bjh21         else if ( roundingMode != float_round_to_zero ) {
   4749   1.1     bjh21             if (   extractFloat128Sign( z )
   4750   1.1     bjh21                  ^ ( roundingMode == float_round_up ) ) {
   4751   1.1     bjh21                 z.high |= ( a.low != 0 );
   4752   1.1     bjh21                 z.high += roundBitsMask;
   4753   1.1     bjh21             }
   4754   1.1     bjh21         }
   4755   1.1     bjh21         z.high &= ~ roundBitsMask;
   4756   1.1     bjh21     }
   4757   1.1     bjh21     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   4758  1.12      matt         set_float_exception_inexact_flag();
   4759   1.1     bjh21     }
   4760   1.1     bjh21     return z;
   4761   1.1     bjh21 
   4762   1.1     bjh21 }
   4763   1.1     bjh21 
   4764   1.1     bjh21 /*
   4765   1.1     bjh21 -------------------------------------------------------------------------------
   4766   1.1     bjh21 Returns the result of adding the absolute values of the quadruple-precision
   4767   1.1     bjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   4768   1.1     bjh21 before being returned.  `zSign' is ignored if the result is a NaN.
   4769   1.1     bjh21 The addition is performed according to the IEC/IEEE Standard for Binary
   4770   1.1     bjh21 Floating-Point Arithmetic.
   4771   1.1     bjh21 -------------------------------------------------------------------------------
   4772   1.1     bjh21 */
   4773   1.1     bjh21 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
   4774   1.1     bjh21 {
   4775   1.1     bjh21     int32 aExp, bExp, zExp;
   4776   1.1     bjh21     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   4777   1.1     bjh21     int32 expDiff;
   4778   1.1     bjh21 
   4779   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4780   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4781   1.1     bjh21     aExp = extractFloat128Exp( a );
   4782   1.1     bjh21     bSig1 = extractFloat128Frac1( b );
   4783   1.1     bjh21     bSig0 = extractFloat128Frac0( b );
   4784   1.1     bjh21     bExp = extractFloat128Exp( b );
   4785   1.1     bjh21     expDiff = aExp - bExp;
   4786   1.1     bjh21     if ( 0 < expDiff ) {
   4787   1.1     bjh21         if ( aExp == 0x7FFF ) {
   4788   1.1     bjh21             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4789   1.1     bjh21             return a;
   4790   1.1     bjh21         }
   4791   1.1     bjh21         if ( bExp == 0 ) {
   4792   1.1     bjh21             --expDiff;
   4793   1.1     bjh21         }
   4794   1.1     bjh21         else {
   4795   1.1     bjh21             bSig0 |= LIT64( 0x0001000000000000 );
   4796   1.1     bjh21         }
   4797   1.1     bjh21         shift128ExtraRightJamming(
   4798   1.1     bjh21             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   4799   1.1     bjh21         zExp = aExp;
   4800   1.1     bjh21     }
   4801   1.1     bjh21     else if ( expDiff < 0 ) {
   4802   1.1     bjh21         if ( bExp == 0x7FFF ) {
   4803   1.1     bjh21             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4804   1.1     bjh21             return packFloat128( zSign, 0x7FFF, 0, 0 );
   4805   1.1     bjh21         }
   4806   1.1     bjh21         if ( aExp == 0 ) {
   4807   1.1     bjh21             ++expDiff;
   4808   1.1     bjh21         }
   4809   1.1     bjh21         else {
   4810   1.1     bjh21             aSig0 |= LIT64( 0x0001000000000000 );
   4811   1.1     bjh21         }
   4812   1.1     bjh21         shift128ExtraRightJamming(
   4813   1.1     bjh21             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   4814   1.1     bjh21         zExp = bExp;
   4815   1.1     bjh21     }
   4816   1.1     bjh21     else {
   4817   1.1     bjh21         if ( aExp == 0x7FFF ) {
   4818   1.1     bjh21             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4819   1.1     bjh21                 return propagateFloat128NaN( a, b );
   4820   1.1     bjh21             }
   4821   1.1     bjh21             return a;
   4822   1.1     bjh21         }
   4823   1.1     bjh21         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4824   1.1     bjh21         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
   4825   1.1     bjh21         zSig2 = 0;
   4826   1.1     bjh21         zSig0 |= LIT64( 0x0002000000000000 );
   4827   1.1     bjh21         zExp = aExp;
   4828   1.1     bjh21         goto shiftRight1;
   4829   1.1     bjh21     }
   4830   1.1     bjh21     aSig0 |= LIT64( 0x0001000000000000 );
   4831   1.1     bjh21     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4832   1.1     bjh21     --zExp;
   4833   1.1     bjh21     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
   4834   1.1     bjh21     ++zExp;
   4835   1.1     bjh21  shiftRight1:
   4836   1.1     bjh21     shift128ExtraRightJamming(
   4837   1.1     bjh21         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   4838   1.1     bjh21  roundAndPack:
   4839   1.1     bjh21     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   4840   1.1     bjh21 
   4841   1.1     bjh21 }
   4842   1.1     bjh21 
   4843   1.1     bjh21 /*
   4844   1.1     bjh21 -------------------------------------------------------------------------------
   4845   1.1     bjh21 Returns the result of subtracting the absolute values of the quadruple-
   4846   1.1     bjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   4847   1.1     bjh21 difference is negated before being returned.  `zSign' is ignored if the
   4848   1.1     bjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   4849   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   4850   1.1     bjh21 -------------------------------------------------------------------------------
   4851   1.1     bjh21 */
   4852   1.1     bjh21 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
   4853   1.1     bjh21 {
   4854   1.1     bjh21     int32 aExp, bExp, zExp;
   4855   1.1     bjh21     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   4856   1.1     bjh21     int32 expDiff;
   4857   1.1     bjh21     float128 z;
   4858   1.1     bjh21 
   4859   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4860   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4861   1.1     bjh21     aExp = extractFloat128Exp( a );
   4862   1.1     bjh21     bSig1 = extractFloat128Frac1( b );
   4863   1.1     bjh21     bSig0 = extractFloat128Frac0( b );
   4864   1.1     bjh21     bExp = extractFloat128Exp( b );
   4865   1.1     bjh21     expDiff = aExp - bExp;
   4866   1.1     bjh21     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
   4867   1.1     bjh21     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
   4868   1.1     bjh21     if ( 0 < expDiff ) goto aExpBigger;
   4869   1.1     bjh21     if ( expDiff < 0 ) goto bExpBigger;
   4870   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4871   1.1     bjh21         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   4872   1.1     bjh21             return propagateFloat128NaN( a, b );
   4873   1.1     bjh21         }
   4874   1.1     bjh21         float_raise( float_flag_invalid );
   4875   1.1     bjh21         z.low = float128_default_nan_low;
   4876   1.1     bjh21         z.high = float128_default_nan_high;
   4877   1.1     bjh21         return z;
   4878   1.1     bjh21     }
   4879   1.1     bjh21     if ( aExp == 0 ) {
   4880   1.1     bjh21         aExp = 1;
   4881   1.1     bjh21         bExp = 1;
   4882   1.1     bjh21     }
   4883   1.1     bjh21     if ( bSig0 < aSig0 ) goto aBigger;
   4884   1.1     bjh21     if ( aSig0 < bSig0 ) goto bBigger;
   4885   1.1     bjh21     if ( bSig1 < aSig1 ) goto aBigger;
   4886   1.1     bjh21     if ( aSig1 < bSig1 ) goto bBigger;
   4887   1.1     bjh21     return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
   4888   1.1     bjh21  bExpBigger:
   4889   1.1     bjh21     if ( bExp == 0x7FFF ) {
   4890   1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   4891   1.1     bjh21         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
   4892   1.1     bjh21     }
   4893   1.1     bjh21     if ( aExp == 0 ) {
   4894   1.1     bjh21         ++expDiff;
   4895   1.1     bjh21     }
   4896   1.1     bjh21     else {
   4897   1.1     bjh21         aSig0 |= LIT64( 0x4000000000000000 );
   4898   1.1     bjh21     }
   4899   1.1     bjh21     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   4900   1.1     bjh21     bSig0 |= LIT64( 0x4000000000000000 );
   4901   1.1     bjh21  bBigger:
   4902   1.1     bjh21     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   4903   1.1     bjh21     zExp = bExp;
   4904   1.1     bjh21     zSign ^= 1;
   4905   1.1     bjh21     goto normalizeRoundAndPack;
   4906   1.1     bjh21  aExpBigger:
   4907   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4908   1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   4909   1.1     bjh21         return a;
   4910   1.1     bjh21     }
   4911   1.1     bjh21     if ( bExp == 0 ) {
   4912   1.1     bjh21         --expDiff;
   4913   1.1     bjh21     }
   4914   1.1     bjh21     else {
   4915   1.1     bjh21         bSig0 |= LIT64( 0x4000000000000000 );
   4916   1.1     bjh21     }
   4917   1.1     bjh21     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   4918   1.1     bjh21     aSig0 |= LIT64( 0x4000000000000000 );
   4919   1.1     bjh21  aBigger:
   4920   1.1     bjh21     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   4921   1.1     bjh21     zExp = aExp;
   4922   1.1     bjh21  normalizeRoundAndPack:
   4923   1.1     bjh21     --zExp;
   4924   1.1     bjh21     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
   4925   1.1     bjh21 
   4926   1.1     bjh21 }
   4927   1.1     bjh21 
   4928   1.1     bjh21 /*
   4929   1.1     bjh21 -------------------------------------------------------------------------------
   4930   1.1     bjh21 Returns the result of adding the quadruple-precision floating-point values
   4931   1.1     bjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   4932   1.1     bjh21 for Binary Floating-Point Arithmetic.
   4933   1.1     bjh21 -------------------------------------------------------------------------------
   4934   1.1     bjh21 */
   4935   1.1     bjh21 float128 float128_add( float128 a, float128 b )
   4936   1.1     bjh21 {
   4937   1.1     bjh21     flag aSign, bSign;
   4938   1.1     bjh21 
   4939   1.1     bjh21     aSign = extractFloat128Sign( a );
   4940   1.1     bjh21     bSign = extractFloat128Sign( b );
   4941   1.1     bjh21     if ( aSign == bSign ) {
   4942   1.1     bjh21         return addFloat128Sigs( a, b, aSign );
   4943   1.1     bjh21     }
   4944   1.1     bjh21     else {
   4945   1.1     bjh21         return subFloat128Sigs( a, b, aSign );
   4946   1.1     bjh21     }
   4947   1.1     bjh21 
   4948   1.1     bjh21 }
   4949   1.1     bjh21 
   4950   1.1     bjh21 /*
   4951   1.1     bjh21 -------------------------------------------------------------------------------
   4952   1.1     bjh21 Returns the result of subtracting the quadruple-precision floating-point
   4953   1.1     bjh21 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4954   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   4955   1.1     bjh21 -------------------------------------------------------------------------------
   4956   1.1     bjh21 */
   4957   1.1     bjh21 float128 float128_sub( float128 a, float128 b )
   4958   1.1     bjh21 {
   4959   1.1     bjh21     flag aSign, bSign;
   4960   1.1     bjh21 
   4961   1.1     bjh21     aSign = extractFloat128Sign( a );
   4962   1.1     bjh21     bSign = extractFloat128Sign( b );
   4963   1.1     bjh21     if ( aSign == bSign ) {
   4964   1.1     bjh21         return subFloat128Sigs( a, b, aSign );
   4965   1.1     bjh21     }
   4966   1.1     bjh21     else {
   4967   1.1     bjh21         return addFloat128Sigs( a, b, aSign );
   4968   1.1     bjh21     }
   4969   1.1     bjh21 
   4970   1.1     bjh21 }
   4971   1.1     bjh21 
   4972   1.1     bjh21 /*
   4973   1.1     bjh21 -------------------------------------------------------------------------------
   4974   1.1     bjh21 Returns the result of multiplying the quadruple-precision floating-point
   4975   1.1     bjh21 values `a' and `b'.  The operation is performed according to the IEC/IEEE
   4976   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   4977   1.1     bjh21 -------------------------------------------------------------------------------
   4978   1.1     bjh21 */
   4979   1.1     bjh21 float128 float128_mul( float128 a, float128 b )
   4980   1.1     bjh21 {
   4981   1.1     bjh21     flag aSign, bSign, zSign;
   4982   1.1     bjh21     int32 aExp, bExp, zExp;
   4983   1.1     bjh21     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   4984   1.1     bjh21     float128 z;
   4985   1.1     bjh21 
   4986   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   4987   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   4988   1.1     bjh21     aExp = extractFloat128Exp( a );
   4989   1.1     bjh21     aSign = extractFloat128Sign( a );
   4990   1.1     bjh21     bSig1 = extractFloat128Frac1( b );
   4991   1.1     bjh21     bSig0 = extractFloat128Frac0( b );
   4992   1.1     bjh21     bExp = extractFloat128Exp( b );
   4993   1.1     bjh21     bSign = extractFloat128Sign( b );
   4994   1.1     bjh21     zSign = aSign ^ bSign;
   4995   1.1     bjh21     if ( aExp == 0x7FFF ) {
   4996   1.1     bjh21         if (    ( aSig0 | aSig1 )
   4997   1.1     bjh21              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   4998   1.1     bjh21             return propagateFloat128NaN( a, b );
   4999   1.1     bjh21         }
   5000   1.1     bjh21         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   5001   1.1     bjh21         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5002   1.1     bjh21     }
   5003   1.1     bjh21     if ( bExp == 0x7FFF ) {
   5004   1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5005   1.1     bjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5006   1.1     bjh21  invalid:
   5007   1.1     bjh21             float_raise( float_flag_invalid );
   5008   1.1     bjh21             z.low = float128_default_nan_low;
   5009   1.1     bjh21             z.high = float128_default_nan_high;
   5010   1.1     bjh21             return z;
   5011   1.1     bjh21         }
   5012   1.1     bjh21         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5013   1.1     bjh21     }
   5014   1.1     bjh21     if ( aExp == 0 ) {
   5015   1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5016   1.1     bjh21         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5017   1.1     bjh21     }
   5018   1.1     bjh21     if ( bExp == 0 ) {
   5019   1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5020   1.1     bjh21         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5021   1.1     bjh21     }
   5022   1.1     bjh21     zExp = aExp + bExp - 0x4000;
   5023   1.1     bjh21     aSig0 |= LIT64( 0x0001000000000000 );
   5024   1.1     bjh21     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
   5025   1.1     bjh21     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   5026   1.1     bjh21     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   5027   1.1     bjh21     zSig2 |= ( zSig3 != 0 );
   5028   1.1     bjh21     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
   5029   1.1     bjh21         shift128ExtraRightJamming(
   5030   1.1     bjh21             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   5031   1.1     bjh21         ++zExp;
   5032   1.1     bjh21     }
   5033   1.1     bjh21     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5034   1.1     bjh21 
   5035   1.1     bjh21 }
   5036   1.1     bjh21 
   5037   1.1     bjh21 /*
   5038   1.1     bjh21 -------------------------------------------------------------------------------
   5039   1.1     bjh21 Returns the result of dividing the quadruple-precision floating-point value
   5040   1.1     bjh21 `a' by the corresponding value `b'.  The operation is performed according to
   5041   1.1     bjh21 the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5042   1.1     bjh21 -------------------------------------------------------------------------------
   5043   1.1     bjh21 */
   5044   1.1     bjh21 float128 float128_div( float128 a, float128 b )
   5045   1.1     bjh21 {
   5046   1.1     bjh21     flag aSign, bSign, zSign;
   5047   1.1     bjh21     int32 aExp, bExp, zExp;
   5048   1.1     bjh21     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   5049   1.1     bjh21     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5050   1.1     bjh21     float128 z;
   5051   1.1     bjh21 
   5052   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   5053   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   5054   1.1     bjh21     aExp = extractFloat128Exp( a );
   5055   1.1     bjh21     aSign = extractFloat128Sign( a );
   5056   1.1     bjh21     bSig1 = extractFloat128Frac1( b );
   5057   1.1     bjh21     bSig0 = extractFloat128Frac0( b );
   5058   1.1     bjh21     bExp = extractFloat128Exp( b );
   5059   1.1     bjh21     bSign = extractFloat128Sign( b );
   5060   1.1     bjh21     zSign = aSign ^ bSign;
   5061   1.1     bjh21     if ( aExp == 0x7FFF ) {
   5062   1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
   5063   1.1     bjh21         if ( bExp == 0x7FFF ) {
   5064   1.1     bjh21             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5065   1.1     bjh21             goto invalid;
   5066   1.1     bjh21         }
   5067   1.1     bjh21         return packFloat128( zSign, 0x7FFF, 0, 0 );
   5068   1.1     bjh21     }
   5069   1.1     bjh21     if ( bExp == 0x7FFF ) {
   5070   1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5071   1.1     bjh21         return packFloat128( zSign, 0, 0, 0 );
   5072   1.1     bjh21     }
   5073   1.1     bjh21     if ( bExp == 0 ) {
   5074   1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
   5075   1.1     bjh21             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   5076   1.1     bjh21  invalid:
   5077   1.1     bjh21                 float_raise( float_flag_invalid );
   5078   1.1     bjh21                 z.low = float128_default_nan_low;
   5079   1.1     bjh21                 z.high = float128_default_nan_high;
   5080   1.1     bjh21                 return z;
   5081   1.1     bjh21             }
   5082   1.1     bjh21             float_raise( float_flag_divbyzero );
   5083   1.1     bjh21             return packFloat128( zSign, 0x7FFF, 0, 0 );
   5084   1.1     bjh21         }
   5085   1.1     bjh21         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5086   1.1     bjh21     }
   5087   1.1     bjh21     if ( aExp == 0 ) {
   5088   1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
   5089   1.1     bjh21         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5090   1.1     bjh21     }
   5091   1.1     bjh21     zExp = aExp - bExp + 0x3FFD;
   5092   1.1     bjh21     shortShift128Left(
   5093   1.1     bjh21         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
   5094   1.1     bjh21     shortShift128Left(
   5095   1.1     bjh21         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5096   1.1     bjh21     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
   5097   1.1     bjh21         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   5098   1.1     bjh21         ++zExp;
   5099   1.1     bjh21     }
   5100   1.1     bjh21     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5101   1.1     bjh21     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   5102   1.1     bjh21     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   5103   1.1     bjh21     while ( (sbits64) rem0 < 0 ) {
   5104   1.1     bjh21         --zSig0;
   5105   1.1     bjh21         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   5106   1.1     bjh21     }
   5107   1.1     bjh21     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
   5108   1.1     bjh21     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
   5109   1.1     bjh21         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   5110   1.1     bjh21         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   5111   1.1     bjh21         while ( (sbits64) rem1 < 0 ) {
   5112   1.1     bjh21             --zSig1;
   5113   1.1     bjh21             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   5114   1.1     bjh21         }
   5115   1.1     bjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5116   1.1     bjh21     }
   5117   1.1     bjh21     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
   5118   1.1     bjh21     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
   5119   1.1     bjh21 
   5120   1.1     bjh21 }
   5121   1.1     bjh21 
   5122   1.1     bjh21 /*
   5123   1.1     bjh21 -------------------------------------------------------------------------------
   5124   1.1     bjh21 Returns the remainder of the quadruple-precision floating-point value `a'
   5125   1.1     bjh21 with respect to the corresponding value `b'.  The operation is performed
   5126   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5127   1.1     bjh21 -------------------------------------------------------------------------------
   5128   1.1     bjh21 */
   5129   1.1     bjh21 float128 float128_rem( float128 a, float128 b )
   5130   1.1     bjh21 {
   5131   1.9    martin     flag aSign, zSign;
   5132   1.1     bjh21     int32 aExp, bExp, expDiff;
   5133   1.1     bjh21     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   5134   1.1     bjh21     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
   5135   1.1     bjh21     sbits64 sigMean0;
   5136   1.1     bjh21     float128 z;
   5137   1.1     bjh21 
   5138   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   5139   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   5140   1.1     bjh21     aExp = extractFloat128Exp( a );
   5141   1.1     bjh21     aSign = extractFloat128Sign( a );
   5142   1.1     bjh21     bSig1 = extractFloat128Frac1( b );
   5143   1.1     bjh21     bSig0 = extractFloat128Frac0( b );
   5144   1.1     bjh21     bExp = extractFloat128Exp( b );
   5145   1.1     bjh21     if ( aExp == 0x7FFF ) {
   5146   1.1     bjh21         if (    ( aSig0 | aSig1 )
   5147   1.1     bjh21              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
   5148   1.1     bjh21             return propagateFloat128NaN( a, b );
   5149   1.1     bjh21         }
   5150   1.1     bjh21         goto invalid;
   5151   1.1     bjh21     }
   5152   1.1     bjh21     if ( bExp == 0x7FFF ) {
   5153   1.1     bjh21         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
   5154   1.1     bjh21         return a;
   5155   1.1     bjh21     }
   5156   1.1     bjh21     if ( bExp == 0 ) {
   5157   1.1     bjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
   5158   1.1     bjh21  invalid:
   5159   1.1     bjh21             float_raise( float_flag_invalid );
   5160   1.1     bjh21             z.low = float128_default_nan_low;
   5161   1.1     bjh21             z.high = float128_default_nan_high;
   5162   1.1     bjh21             return z;
   5163   1.1     bjh21         }
   5164   1.1     bjh21         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   5165   1.1     bjh21     }
   5166   1.1     bjh21     if ( aExp == 0 ) {
   5167   1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   5168   1.1     bjh21         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5169   1.1     bjh21     }
   5170   1.1     bjh21     expDiff = aExp - bExp;
   5171   1.1     bjh21     if ( expDiff < -1 ) return a;
   5172   1.1     bjh21     shortShift128Left(
   5173   1.1     bjh21         aSig0 | LIT64( 0x0001000000000000 ),
   5174   1.1     bjh21         aSig1,
   5175   1.1     bjh21         15 - ( expDiff < 0 ),
   5176   1.1     bjh21         &aSig0,
   5177   1.1     bjh21         &aSig1
   5178   1.1     bjh21     );
   5179   1.1     bjh21     shortShift128Left(
   5180   1.1     bjh21         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
   5181   1.1     bjh21     q = le128( bSig0, bSig1, aSig0, aSig1 );
   5182   1.1     bjh21     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5183   1.1     bjh21     expDiff -= 64;
   5184   1.1     bjh21     while ( 0 < expDiff ) {
   5185   1.1     bjh21         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5186   1.1     bjh21         q = ( 4 < q ) ? q - 4 : 0;
   5187   1.1     bjh21         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5188   1.1     bjh21         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
   5189   1.1     bjh21         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
   5190   1.1     bjh21         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   5191   1.1     bjh21         expDiff -= 61;
   5192   1.1     bjh21     }
   5193   1.1     bjh21     if ( -64 < expDiff ) {
   5194   1.1     bjh21         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
   5195   1.1     bjh21         q = ( 4 < q ) ? q - 4 : 0;
   5196   1.1     bjh21         q >>= - expDiff;
   5197   1.1     bjh21         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5198   1.1     bjh21         expDiff += 52;
   5199   1.1     bjh21         if ( expDiff < 0 ) {
   5200   1.1     bjh21             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   5201   1.1     bjh21         }
   5202   1.1     bjh21         else {
   5203   1.1     bjh21             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   5204   1.1     bjh21         }
   5205   1.1     bjh21         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
   5206   1.1     bjh21         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   5207   1.1     bjh21     }
   5208   1.1     bjh21     else {
   5209   1.1     bjh21         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
   5210   1.1     bjh21         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
   5211   1.1     bjh21     }
   5212   1.1     bjh21     do {
   5213   1.1     bjh21         alternateASig0 = aSig0;
   5214   1.1     bjh21         alternateASig1 = aSig1;
   5215   1.1     bjh21         ++q;
   5216   1.1     bjh21         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   5217   1.1     bjh21     } while ( 0 <= (sbits64) aSig0 );
   5218   1.1     bjh21     add128(
   5219   1.3       mrg         aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
   5220   1.1     bjh21     if (    ( sigMean0 < 0 )
   5221   1.1     bjh21          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   5222   1.1     bjh21         aSig0 = alternateASig0;
   5223   1.1     bjh21         aSig1 = alternateASig1;
   5224   1.1     bjh21     }
   5225   1.1     bjh21     zSign = ( (sbits64) aSig0 < 0 );
   5226   1.1     bjh21     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   5227   1.1     bjh21     return
   5228   1.1     bjh21         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   5229   1.1     bjh21 
   5230   1.1     bjh21 }
   5231   1.1     bjh21 
   5232   1.1     bjh21 /*
   5233   1.1     bjh21 -------------------------------------------------------------------------------
   5234   1.1     bjh21 Returns the square root of the quadruple-precision floating-point value `a'.
   5235   1.1     bjh21 The operation is performed according to the IEC/IEEE Standard for Binary
   5236   1.1     bjh21 Floating-Point Arithmetic.
   5237   1.1     bjh21 -------------------------------------------------------------------------------
   5238   1.1     bjh21 */
   5239   1.1     bjh21 float128 float128_sqrt( float128 a )
   5240   1.1     bjh21 {
   5241   1.1     bjh21     flag aSign;
   5242   1.1     bjh21     int32 aExp, zExp;
   5243   1.1     bjh21     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   5244   1.1     bjh21     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   5245   1.1     bjh21     float128 z;
   5246   1.1     bjh21 
   5247   1.1     bjh21     aSig1 = extractFloat128Frac1( a );
   5248   1.1     bjh21     aSig0 = extractFloat128Frac0( a );
   5249   1.1     bjh21     aExp = extractFloat128Exp( a );
   5250   1.1     bjh21     aSign = extractFloat128Sign( a );
   5251   1.1     bjh21     if ( aExp == 0x7FFF ) {
   5252   1.1     bjh21         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
   5253   1.1     bjh21         if ( ! aSign ) return a;
   5254   1.1     bjh21         goto invalid;
   5255   1.1     bjh21     }
   5256   1.1     bjh21     if ( aSign ) {
   5257   1.1     bjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   5258   1.1     bjh21  invalid:
   5259   1.1     bjh21         float_raise( float_flag_invalid );
   5260   1.1     bjh21         z.low = float128_default_nan_low;
   5261   1.1     bjh21         z.high = float128_default_nan_high;
   5262   1.1     bjh21         return z;
   5263   1.1     bjh21     }
   5264   1.1     bjh21     if ( aExp == 0 ) {
   5265   1.1     bjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
   5266   1.1     bjh21         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   5267   1.1     bjh21     }
   5268  1.15  christos     zExp = (int32) ( (bits32)(aExp - 0x3FFF) >> 1) + 0x3FFE;
   5269   1.1     bjh21     aSig0 |= LIT64( 0x0001000000000000 );
   5270  1.10  christos     zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
   5271   1.1     bjh21     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
   5272   1.1     bjh21     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
   5273   1.1     bjh21     doubleZSig0 = zSig0<<1;
   5274   1.1     bjh21     mul64To128( zSig0, zSig0, &term0, &term1 );
   5275   1.1     bjh21     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   5276   1.1     bjh21     while ( (sbits64) rem0 < 0 ) {
   5277   1.1     bjh21         --zSig0;
   5278   1.1     bjh21         doubleZSig0 -= 2;
   5279   1.1     bjh21         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
   5280   1.1     bjh21     }
   5281   1.1     bjh21     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
   5282   1.1     bjh21     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
   5283   1.1     bjh21         if ( zSig1 == 0 ) zSig1 = 1;
   5284   1.1     bjh21         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
   5285   1.1     bjh21         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
   5286   1.1     bjh21         mul64To128( zSig1, zSig1, &term2, &term3 );
   5287   1.1     bjh21         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   5288   1.1     bjh21         while ( (sbits64) rem1 < 0 ) {
   5289   1.1     bjh21             --zSig1;
   5290   1.1     bjh21             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
   5291   1.1     bjh21             term3 |= 1;
   5292   1.1     bjh21             term2 |= doubleZSig0;
   5293   1.1     bjh21             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   5294   1.1     bjh21         }
   5295   1.1     bjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   5296   1.1     bjh21     }
   5297   1.1     bjh21     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
   5298   1.1     bjh21     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
   5299   1.1     bjh21 
   5300   1.1     bjh21 }
   5301   1.1     bjh21 
   5302   1.1     bjh21 /*
   5303   1.1     bjh21 -------------------------------------------------------------------------------
   5304   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5305   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5306   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5307   1.1     bjh21 -------------------------------------------------------------------------------
   5308   1.1     bjh21 */
   5309   1.1     bjh21 flag float128_eq( float128 a, float128 b )
   5310   1.1     bjh21 {
   5311   1.1     bjh21 
   5312   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5313   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5314   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5315   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5316   1.1     bjh21        ) {
   5317   1.1     bjh21         if (    float128_is_signaling_nan( a )
   5318   1.1     bjh21              || float128_is_signaling_nan( b ) ) {
   5319   1.1     bjh21             float_raise( float_flag_invalid );
   5320   1.1     bjh21         }
   5321   1.1     bjh21         return 0;
   5322   1.1     bjh21     }
   5323   1.1     bjh21     return
   5324   1.1     bjh21            ( a.low == b.low )
   5325   1.1     bjh21         && (    ( a.high == b.high )
   5326   1.1     bjh21              || (    ( a.low == 0 )
   5327   1.1     bjh21                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5328   1.1     bjh21            );
   5329   1.1     bjh21 
   5330   1.1     bjh21 }
   5331   1.1     bjh21 
   5332   1.1     bjh21 /*
   5333   1.1     bjh21 -------------------------------------------------------------------------------
   5334   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5335   1.1     bjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   5336   1.1     bjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   5337   1.1     bjh21 Arithmetic.
   5338   1.1     bjh21 -------------------------------------------------------------------------------
   5339   1.1     bjh21 */
   5340   1.1     bjh21 flag float128_le( float128 a, float128 b )
   5341   1.1     bjh21 {
   5342   1.1     bjh21     flag aSign, bSign;
   5343   1.1     bjh21 
   5344   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5345   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5346   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5347   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5348   1.1     bjh21        ) {
   5349   1.1     bjh21         float_raise( float_flag_invalid );
   5350   1.1     bjh21         return 0;
   5351   1.1     bjh21     }
   5352   1.1     bjh21     aSign = extractFloat128Sign( a );
   5353   1.1     bjh21     bSign = extractFloat128Sign( b );
   5354   1.1     bjh21     if ( aSign != bSign ) {
   5355   1.1     bjh21         return
   5356   1.1     bjh21                aSign
   5357   1.1     bjh21             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5358   1.1     bjh21                  == 0 );
   5359   1.1     bjh21     }
   5360   1.1     bjh21     return
   5361   1.1     bjh21           aSign ? le128( b.high, b.low, a.high, a.low )
   5362   1.1     bjh21         : le128( a.high, a.low, b.high, b.low );
   5363   1.1     bjh21 
   5364   1.1     bjh21 }
   5365   1.1     bjh21 
   5366   1.1     bjh21 /*
   5367   1.1     bjh21 -------------------------------------------------------------------------------
   5368   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5369   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
   5370   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5371   1.1     bjh21 -------------------------------------------------------------------------------
   5372   1.1     bjh21 */
   5373   1.1     bjh21 flag float128_lt( float128 a, float128 b )
   5374   1.1     bjh21 {
   5375   1.1     bjh21     flag aSign, bSign;
   5376   1.1     bjh21 
   5377   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5378   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5379   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5380   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5381   1.1     bjh21        ) {
   5382   1.1     bjh21         float_raise( float_flag_invalid );
   5383   1.1     bjh21         return 0;
   5384   1.1     bjh21     }
   5385   1.1     bjh21     aSign = extractFloat128Sign( a );
   5386   1.1     bjh21     bSign = extractFloat128Sign( b );
   5387   1.1     bjh21     if ( aSign != bSign ) {
   5388   1.1     bjh21         return
   5389   1.1     bjh21                aSign
   5390   1.1     bjh21             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5391   1.1     bjh21                  != 0 );
   5392   1.1     bjh21     }
   5393   1.1     bjh21     return
   5394   1.1     bjh21           aSign ? lt128( b.high, b.low, a.high, a.low )
   5395   1.1     bjh21         : lt128( a.high, a.low, b.high, b.low );
   5396   1.1     bjh21 
   5397   1.1     bjh21 }
   5398   1.1     bjh21 
   5399   1.1     bjh21 /*
   5400   1.1     bjh21 -------------------------------------------------------------------------------
   5401   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is equal to
   5402   1.1     bjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
   5403   1.1     bjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
   5404   1.1     bjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5405   1.1     bjh21 -------------------------------------------------------------------------------
   5406   1.1     bjh21 */
   5407   1.1     bjh21 flag float128_eq_signaling( float128 a, float128 b )
   5408   1.1     bjh21 {
   5409   1.1     bjh21 
   5410   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5411   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5412   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5413   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5414   1.1     bjh21        ) {
   5415   1.1     bjh21         float_raise( float_flag_invalid );
   5416   1.1     bjh21         return 0;
   5417   1.1     bjh21     }
   5418   1.1     bjh21     return
   5419   1.1     bjh21            ( a.low == b.low )
   5420   1.1     bjh21         && (    ( a.high == b.high )
   5421   1.1     bjh21              || (    ( a.low == 0 )
   5422   1.1     bjh21                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
   5423   1.1     bjh21            );
   5424   1.1     bjh21 
   5425   1.1     bjh21 }
   5426   1.1     bjh21 
   5427   1.1     bjh21 /*
   5428   1.1     bjh21 -------------------------------------------------------------------------------
   5429   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5430   1.1     bjh21 or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   5431   1.1     bjh21 cause an exception.  Otherwise, the comparison is performed according to the
   5432   1.1     bjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   5433   1.1     bjh21 -------------------------------------------------------------------------------
   5434   1.1     bjh21 */
   5435   1.1     bjh21 flag float128_le_quiet( float128 a, float128 b )
   5436   1.1     bjh21 {
   5437   1.1     bjh21     flag aSign, bSign;
   5438   1.1     bjh21 
   5439   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5440   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5441   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5442   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5443   1.1     bjh21        ) {
   5444   1.1     bjh21         if (    float128_is_signaling_nan( a )
   5445   1.1     bjh21              || float128_is_signaling_nan( b ) ) {
   5446   1.1     bjh21             float_raise( float_flag_invalid );
   5447   1.1     bjh21         }
   5448   1.1     bjh21         return 0;
   5449   1.1     bjh21     }
   5450   1.1     bjh21     aSign = extractFloat128Sign( a );
   5451   1.1     bjh21     bSign = extractFloat128Sign( b );
   5452   1.1     bjh21     if ( aSign != bSign ) {
   5453   1.1     bjh21         return
   5454   1.1     bjh21                aSign
   5455   1.1     bjh21             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5456   1.1     bjh21                  == 0 );
   5457   1.1     bjh21     }
   5458   1.1     bjh21     return
   5459   1.1     bjh21           aSign ? le128( b.high, b.low, a.high, a.low )
   5460   1.1     bjh21         : le128( a.high, a.low, b.high, b.low );
   5461   1.1     bjh21 
   5462   1.1     bjh21 }
   5463   1.1     bjh21 
   5464   1.1     bjh21 /*
   5465   1.1     bjh21 -------------------------------------------------------------------------------
   5466   1.1     bjh21 Returns 1 if the quadruple-precision floating-point value `a' is less than
   5467   1.1     bjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   5468   1.1     bjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   5469   1.1     bjh21 Standard for Binary Floating-Point Arithmetic.
   5470   1.1     bjh21 -------------------------------------------------------------------------------
   5471   1.1     bjh21 */
   5472   1.1     bjh21 flag float128_lt_quiet( float128 a, float128 b )
   5473   1.1     bjh21 {
   5474   1.1     bjh21     flag aSign, bSign;
   5475   1.1     bjh21 
   5476   1.1     bjh21     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
   5477   1.1     bjh21               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
   5478   1.1     bjh21          || (    ( extractFloat128Exp( b ) == 0x7FFF )
   5479   1.1     bjh21               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
   5480   1.1     bjh21        ) {
   5481   1.1     bjh21         if (    float128_is_signaling_nan( a )
   5482   1.1     bjh21              || float128_is_signaling_nan( b ) ) {
   5483   1.1     bjh21             float_raise( float_flag_invalid );
   5484   1.1     bjh21         }
   5485   1.1     bjh21         return 0;
   5486   1.1     bjh21     }
   5487   1.1     bjh21     aSign = extractFloat128Sign( a );
   5488   1.1     bjh21     bSign = extractFloat128Sign( b );
   5489   1.1     bjh21     if ( aSign != bSign ) {
   5490   1.1     bjh21         return
   5491   1.1     bjh21                aSign
   5492   1.1     bjh21             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
   5493   1.1     bjh21                  != 0 );
   5494   1.1     bjh21     }
   5495   1.1     bjh21     return
   5496   1.1     bjh21           aSign ? lt128( b.high, b.low, a.high, a.low )
   5497   1.1     bjh21         : lt128( a.high, a.low, b.high, b.low );
   5498   1.1     bjh21 
   5499   1.1     bjh21 }
   5500   1.1     bjh21 
   5501   1.1     bjh21 #endif
   5502   1.1     bjh21 
   5503   1.1     bjh21 
   5504   1.1     bjh21 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
   5505   1.1     bjh21 
   5506   1.1     bjh21 /*
   5507   1.1     bjh21  * These two routines are not part of the original softfloat distribution.
   5508   1.1     bjh21  *
   5509   1.1     bjh21  * They are based on the corresponding conversions to integer but return
   5510   1.1     bjh21  * unsigned numbers instead since these functions are required by GCC.
   5511   1.1     bjh21  *
   5512   1.2      salo  * Added by Mark Brinicombe <mark (at) NetBSD.org>	27/09/97
   5513   1.1     bjh21  *
   5514   1.1     bjh21  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
   5515   1.1     bjh21  */
   5516   1.1     bjh21 
   5517   1.1     bjh21 /*
   5518   1.1     bjh21 -------------------------------------------------------------------------------
   5519   1.1     bjh21 Returns the result of converting the double-precision floating-point value
   5520   1.1     bjh21 `a' to the 32-bit unsigned integer format.  The conversion is
   5521   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-point
   5522   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.  If
   5523   1.1     bjh21 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5524   1.1     bjh21 overflows, the largest integer positive is returned.
   5525   1.1     bjh21 -------------------------------------------------------------------------------
   5526   1.1     bjh21 */
   5527   1.1     bjh21 uint32 float64_to_uint32_round_to_zero( float64 a )
   5528   1.1     bjh21 {
   5529   1.1     bjh21     flag aSign;
   5530   1.1     bjh21     int16 aExp, shiftCount;
   5531   1.1     bjh21     bits64 aSig, savedASig;
   5532   1.1     bjh21     uint32 z;
   5533   1.1     bjh21 
   5534   1.1     bjh21     aSig = extractFloat64Frac( a );
   5535   1.1     bjh21     aExp = extractFloat64Exp( a );
   5536   1.1     bjh21     aSign = extractFloat64Sign( a );
   5537   1.1     bjh21 
   5538   1.1     bjh21     if (aSign) {
   5539   1.1     bjh21         float_raise( float_flag_invalid );
   5540   1.1     bjh21     	return(0);
   5541   1.1     bjh21     }
   5542   1.1     bjh21 
   5543   1.1     bjh21     if ( 0x41E < aExp ) {
   5544   1.1     bjh21         float_raise( float_flag_invalid );
   5545   1.1     bjh21         return 0xffffffff;
   5546   1.1     bjh21     }
   5547   1.1     bjh21     else if ( aExp < 0x3FF ) {
   5548  1.12      matt         if ( aExp || aSig ) set_float_exception_inexact_flag();
   5549   1.1     bjh21         return 0;
   5550   1.1     bjh21     }
   5551   1.1     bjh21     aSig |= LIT64( 0x0010000000000000 );
   5552   1.1     bjh21     shiftCount = 0x433 - aExp;
   5553   1.1     bjh21     savedASig = aSig;
   5554   1.1     bjh21     aSig >>= shiftCount;
   5555  1.10  christos     z = (uint32)aSig;
   5556   1.1     bjh21     if ( ( aSig<<shiftCount ) != savedASig ) {
   5557  1.12      matt         set_float_exception_inexact_flag();
   5558   1.1     bjh21     }
   5559   1.1     bjh21     return z;
   5560   1.1     bjh21 
   5561   1.1     bjh21 }
   5562   1.1     bjh21 
   5563   1.1     bjh21 /*
   5564   1.1     bjh21 -------------------------------------------------------------------------------
   5565   1.1     bjh21 Returns the result of converting the single-precision floating-point value
   5566   1.1     bjh21 `a' to the 32-bit unsigned integer format.  The conversion is
   5567   1.1     bjh21 performed according to the IEC/IEEE Standard for Binary Floating-point
   5568   1.1     bjh21 Arithmetic, except that the conversion is always rounded toward zero.  If
   5569   1.1     bjh21 `a' is a NaN, the largest positive integer is returned.  If the conversion
   5570   1.1     bjh21 overflows, the largest positive integer is returned.
   5571   1.1     bjh21 -------------------------------------------------------------------------------
   5572   1.1     bjh21 */
   5573   1.1     bjh21 uint32 float32_to_uint32_round_to_zero( float32 a )
   5574   1.1     bjh21 {
   5575   1.1     bjh21     flag aSign;
   5576   1.1     bjh21     int16 aExp, shiftCount;
   5577   1.1     bjh21     bits32 aSig;
   5578   1.1     bjh21     uint32 z;
   5579   1.1     bjh21 
   5580   1.1     bjh21     aSig = extractFloat32Frac( a );
   5581   1.1     bjh21     aExp = extractFloat32Exp( a );
   5582   1.1     bjh21     aSign = extractFloat32Sign( a );
   5583   1.1     bjh21     shiftCount = aExp - 0x9E;
   5584   1.1     bjh21 
   5585   1.1     bjh21     if (aSign) {
   5586   1.1     bjh21         float_raise( float_flag_invalid );
   5587   1.1     bjh21     	return(0);
   5588   1.1     bjh21     }
   5589   1.1     bjh21     if ( 0 < shiftCount ) {
   5590   1.1     bjh21         float_raise( float_flag_invalid );
   5591   1.1     bjh21         return 0xFFFFFFFF;
   5592   1.1     bjh21     }
   5593   1.1     bjh21     else if ( aExp <= 0x7E ) {
   5594  1.12      matt         if ( aExp | aSig ) set_float_exception_inexact_flag();
   5595   1.1     bjh21         return 0;
   5596   1.1     bjh21     }
   5597   1.1     bjh21     aSig = ( aSig | 0x800000 )<<8;
   5598   1.1     bjh21     z = aSig>>( - shiftCount );
   5599   1.1     bjh21     if ( aSig<<( shiftCount & 31 ) ) {
   5600  1.12      matt         set_float_exception_inexact_flag();
   5601   1.1     bjh21     }
   5602   1.1     bjh21     return z;
   5603   1.1     bjh21 
   5604   1.1     bjh21 }
   5605   1.1     bjh21 
   5606   1.1     bjh21 #endif
   5607