Home | History | Annotate | Line # | Download | only in bits64
      1  1.3  christos /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */
      2  1.1     bjh21 
      3  1.1     bjh21 /*
      4  1.1     bjh21 ===============================================================================
      5  1.1     bjh21 
      6  1.1     bjh21 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      7  1.1     bjh21 Arithmetic Package, Release 2a.
      8  1.1     bjh21 
      9  1.1     bjh21 Written by John R. Hauser.  This work was made possible in part by the
     10  1.1     bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
     11  1.1     bjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
     12  1.1     bjh21 National Science Foundation under grant MIP-9311980.  The original version
     13  1.1     bjh21 of this code was written as part of a project to build a fixed-point vector
     14  1.1     bjh21 processor in collaboration with the University of California at Berkeley,
     15  1.1     bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     16  1.1     bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     17  1.1     bjh21 arithmetic/SoftFloat.html'.
     18  1.1     bjh21 
     19  1.1     bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     20  1.1     bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     21  1.1     bjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     22  1.1     bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     23  1.1     bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     24  1.1     bjh21 
     25  1.1     bjh21 Derivative works are acceptable, even for commercial purposes, so long as
     26  1.1     bjh21 (1) they include prominent notice that the work is derivative, and (2) they
     27  1.1     bjh21 include prominent notice akin to these four paragraphs for those parts of
     28  1.1     bjh21 this code that are retained.
     29  1.1     bjh21 
     30  1.1     bjh21 ===============================================================================
     31  1.1     bjh21 */
     32  1.1     bjh21 
     33  1.1     bjh21 /*
     34  1.1     bjh21 -------------------------------------------------------------------------------
     35  1.1     bjh21 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     36  1.1     bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of
     37  1.1     bjh21 the result by setting the least significant bit to 1.  The value of `count'
     38  1.1     bjh21 can be arbitrarily large; in particular, if `count' is greater than 32, the
     39  1.1     bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     40  1.1     bjh21 The result is stored in the location pointed to by `zPtr'.
     41  1.1     bjh21 -------------------------------------------------------------------------------
     42  1.1     bjh21 */
     43  1.1     bjh21 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     44  1.1     bjh21 {
     45  1.1     bjh21     bits32 z;
     46  1.1     bjh21 
     47  1.1     bjh21     if ( count == 0 ) {
     48  1.1     bjh21         z = a;
     49  1.1     bjh21     }
     50  1.1     bjh21     else if ( count < 32 ) {
     51  1.1     bjh21         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     52  1.1     bjh21     }
     53  1.1     bjh21     else {
     54  1.1     bjh21         z = ( a != 0 );
     55  1.1     bjh21     }
     56  1.1     bjh21     *zPtr = z;
     57  1.1     bjh21 
     58  1.1     bjh21 }
     59  1.1     bjh21 
     60  1.1     bjh21 /*
     61  1.1     bjh21 -------------------------------------------------------------------------------
     62  1.1     bjh21 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     63  1.1     bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of
     64  1.1     bjh21 the result by setting the least significant bit to 1.  The value of `count'
     65  1.1     bjh21 can be arbitrarily large; in particular, if `count' is greater than 64, the
     66  1.1     bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     67  1.1     bjh21 The result is stored in the location pointed to by `zPtr'.
     68  1.1     bjh21 -------------------------------------------------------------------------------
     69  1.1     bjh21 */
     70  1.1     bjh21 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     71  1.1     bjh21 {
     72  1.1     bjh21     bits64 z;
     73  1.1     bjh21 
     74  1.1     bjh21     if ( count == 0 ) {
     75  1.1     bjh21         z = a;
     76  1.1     bjh21     }
     77  1.1     bjh21     else if ( count < 64 ) {
     78  1.1     bjh21         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     79  1.1     bjh21     }
     80  1.1     bjh21     else {
     81  1.1     bjh21         z = ( a != 0 );
     82  1.1     bjh21     }
     83  1.1     bjh21     *zPtr = z;
     84  1.1     bjh21 
     85  1.1     bjh21 }
     86  1.1     bjh21 
     87  1.1     bjh21 /*
     88  1.1     bjh21 -------------------------------------------------------------------------------
     89  1.1     bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     90  1.1     bjh21 _plus_ the number of bits given in `count'.  The shifted result is at most
     91  1.1     bjh21 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     92  1.1     bjh21 bits shifted off form a second 64-bit result as follows:  The _last_ bit
     93  1.1     bjh21 shifted off is the most-significant bit of the extra result, and the other
     94  1.1     bjh21 63 bits of the extra result are all zero if and only if _all_but_the_last_
     95  1.1     bjh21 bits shifted off were all zero.  This extra result is stored in the location
     96  1.1     bjh21 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     97  1.1     bjh21     (This routine makes more sense if `a0' and `a1' are considered to form a
     98  1.1     bjh21 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
     99  1.1     bjh21 value is shifted right by the number of bits given in `count', and the
    100  1.1     bjh21 integer part of the result is returned at the location pointed to by
    101  1.1     bjh21 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    102  1.1     bjh21 described above, and is returned at the location pointed to by `z1Ptr'.)
    103  1.1     bjh21 -------------------------------------------------------------------------------
    104  1.1     bjh21 */
    105  1.1     bjh21 INLINE void
    106  1.1     bjh21  shift64ExtraRightJamming(
    107  1.1     bjh21      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    108  1.1     bjh21 {
    109  1.1     bjh21     bits64 z0, z1;
    110  1.1     bjh21     int8 negCount = ( - count ) & 63;
    111  1.1     bjh21 
    112  1.1     bjh21     if ( count == 0 ) {
    113  1.1     bjh21         z1 = a1;
    114  1.1     bjh21         z0 = a0;
    115  1.1     bjh21     }
    116  1.1     bjh21     else if ( count < 64 ) {
    117  1.1     bjh21         z1 = ( a0<<negCount ) | ( a1 != 0 );
    118  1.1     bjh21         z0 = a0>>count;
    119  1.1     bjh21     }
    120  1.1     bjh21     else {
    121  1.1     bjh21         if ( count == 64 ) {
    122  1.1     bjh21             z1 = a0 | ( a1 != 0 );
    123  1.1     bjh21         }
    124  1.1     bjh21         else {
    125  1.1     bjh21             z1 = ( ( a0 | a1 ) != 0 );
    126  1.1     bjh21         }
    127  1.1     bjh21         z0 = 0;
    128  1.1     bjh21     }
    129  1.1     bjh21     *z1Ptr = z1;
    130  1.1     bjh21     *z0Ptr = z0;
    131  1.1     bjh21 
    132  1.1     bjh21 }
    133  1.1     bjh21 
    134  1.1     bjh21 /*
    135  1.1     bjh21 -------------------------------------------------------------------------------
    136  1.1     bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    137  1.1     bjh21 number of bits given in `count'.  Any bits shifted off are lost.  The value
    138  1.1     bjh21 of `count' can be arbitrarily large; in particular, if `count' is greater
    139  1.1     bjh21 than 128, the result will be 0.  The result is broken into two 64-bit pieces
    140  1.1     bjh21 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    141  1.1     bjh21 -------------------------------------------------------------------------------
    142  1.1     bjh21 */
    143  1.1     bjh21 INLINE void
    144  1.1     bjh21  shift128Right(
    145  1.1     bjh21      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    146  1.1     bjh21 {
    147  1.1     bjh21     bits64 z0, z1;
    148  1.1     bjh21     int8 negCount = ( - count ) & 63;
    149  1.1     bjh21 
    150  1.1     bjh21     if ( count == 0 ) {
    151  1.1     bjh21         z1 = a1;
    152  1.1     bjh21         z0 = a0;
    153  1.1     bjh21     }
    154  1.1     bjh21     else if ( count < 64 ) {
    155  1.1     bjh21         z1 = ( a0<<negCount ) | ( a1>>count );
    156  1.1     bjh21         z0 = a0>>count;
    157  1.1     bjh21     }
    158  1.1     bjh21     else {
    159  1.1     bjh21         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
    160  1.1     bjh21         z0 = 0;
    161  1.1     bjh21     }
    162  1.1     bjh21     *z1Ptr = z1;
    163  1.1     bjh21     *z0Ptr = z0;
    164  1.1     bjh21 
    165  1.1     bjh21 }
    166  1.1     bjh21 
    167  1.1     bjh21 /*
    168  1.1     bjh21 -------------------------------------------------------------------------------
    169  1.1     bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    170  1.1     bjh21 number of bits given in `count'.  If any nonzero bits are shifted off, they
    171  1.1     bjh21 are ``jammed'' into the least significant bit of the result by setting the
    172  1.1     bjh21 least significant bit to 1.  The value of `count' can be arbitrarily large;
    173  1.1     bjh21 in particular, if `count' is greater than 128, the result will be either
    174  1.1     bjh21 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    175  1.1     bjh21 nonzero.  The result is broken into two 64-bit pieces which are stored at
    176  1.1     bjh21 the locations pointed to by `z0Ptr' and `z1Ptr'.
    177  1.1     bjh21 -------------------------------------------------------------------------------
    178  1.1     bjh21 */
    179  1.1     bjh21 INLINE void
    180  1.1     bjh21  shift128RightJamming(
    181  1.1     bjh21      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    182  1.1     bjh21 {
    183  1.1     bjh21     bits64 z0, z1;
    184  1.1     bjh21     int8 negCount = ( - count ) & 63;
    185  1.1     bjh21 
    186  1.1     bjh21     if ( count == 0 ) {
    187  1.1     bjh21         z1 = a1;
    188  1.1     bjh21         z0 = a0;
    189  1.1     bjh21     }
    190  1.1     bjh21     else if ( count < 64 ) {
    191  1.1     bjh21         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    192  1.1     bjh21         z0 = a0>>count;
    193  1.1     bjh21     }
    194  1.1     bjh21     else {
    195  1.1     bjh21         if ( count == 64 ) {
    196  1.1     bjh21             z1 = a0 | ( a1 != 0 );
    197  1.1     bjh21         }
    198  1.1     bjh21         else if ( count < 128 ) {
    199  1.1     bjh21             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    200  1.1     bjh21         }
    201  1.1     bjh21         else {
    202  1.1     bjh21             z1 = ( ( a0 | a1 ) != 0 );
    203  1.1     bjh21         }
    204  1.1     bjh21         z0 = 0;
    205  1.1     bjh21     }
    206  1.1     bjh21     *z1Ptr = z1;
    207  1.1     bjh21     *z0Ptr = z0;
    208  1.1     bjh21 
    209  1.1     bjh21 }
    210  1.1     bjh21 
    211  1.1     bjh21 /*
    212  1.1     bjh21 -------------------------------------------------------------------------------
    213  1.1     bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    214  1.1     bjh21 by 64 _plus_ the number of bits given in `count'.  The shifted result is
    215  1.1     bjh21 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    216  1.1     bjh21 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    217  1.1     bjh21 off form a third 64-bit result as follows:  The _last_ bit shifted off is
    218  1.1     bjh21 the most-significant bit of the extra result, and the other 63 bits of the
    219  1.1     bjh21 extra result are all zero if and only if _all_but_the_last_ bits shifted off
    220  1.1     bjh21 were all zero.  This extra result is stored in the location pointed to by
    221  1.1     bjh21 `z2Ptr'.  The value of `count' can be arbitrarily large.
    222  1.1     bjh21     (This routine makes more sense if `a0', `a1', and `a2' are considered
    223  1.1     bjh21 to form a fixed-point value with binary point between `a1' and `a2'.  This
    224  1.1     bjh21 fixed-point value is shifted right by the number of bits given in `count',
    225  1.1     bjh21 and the integer part of the result is returned at the locations pointed to
    226  1.1     bjh21 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    227  1.1     bjh21 corrupted as described above, and is returned at the location pointed to by
    228  1.1     bjh21 `z2Ptr'.)
    229  1.1     bjh21 -------------------------------------------------------------------------------
    230  1.1     bjh21 */
    231  1.1     bjh21 INLINE void
    232  1.1     bjh21  shift128ExtraRightJamming(
    233  1.1     bjh21      bits64 a0,
    234  1.1     bjh21      bits64 a1,
    235  1.1     bjh21      bits64 a2,
    236  1.1     bjh21      int16 count,
    237  1.1     bjh21      bits64 *z0Ptr,
    238  1.1     bjh21      bits64 *z1Ptr,
    239  1.1     bjh21      bits64 *z2Ptr
    240  1.1     bjh21  )
    241  1.1     bjh21 {
    242  1.1     bjh21     bits64 z0, z1, z2;
    243  1.1     bjh21     int8 negCount = ( - count ) & 63;
    244  1.1     bjh21 
    245  1.1     bjh21     if ( count == 0 ) {
    246  1.1     bjh21         z2 = a2;
    247  1.1     bjh21         z1 = a1;
    248  1.1     bjh21         z0 = a0;
    249  1.1     bjh21     }
    250  1.1     bjh21     else {
    251  1.1     bjh21         if ( count < 64 ) {
    252  1.1     bjh21             z2 = a1<<negCount;
    253  1.1     bjh21             z1 = ( a0<<negCount ) | ( a1>>count );
    254  1.1     bjh21             z0 = a0>>count;
    255  1.1     bjh21         }
    256  1.1     bjh21         else {
    257  1.1     bjh21             if ( count == 64 ) {
    258  1.1     bjh21                 z2 = a1;
    259  1.1     bjh21                 z1 = a0;
    260  1.1     bjh21             }
    261  1.1     bjh21             else {
    262  1.1     bjh21                 a2 |= a1;
    263  1.1     bjh21                 if ( count < 128 ) {
    264  1.1     bjh21                     z2 = a0<<negCount;
    265  1.1     bjh21                     z1 = a0>>( count & 63 );
    266  1.1     bjh21                 }
    267  1.1     bjh21                 else {
    268  1.1     bjh21                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    269  1.1     bjh21                     z1 = 0;
    270  1.1     bjh21                 }
    271  1.1     bjh21             }
    272  1.1     bjh21             z0 = 0;
    273  1.1     bjh21         }
    274  1.1     bjh21         z2 |= ( a2 != 0 );
    275  1.1     bjh21     }
    276  1.1     bjh21     *z2Ptr = z2;
    277  1.1     bjh21     *z1Ptr = z1;
    278  1.1     bjh21     *z0Ptr = z0;
    279  1.1     bjh21 
    280  1.1     bjh21 }
    281  1.1     bjh21 
    282  1.1     bjh21 /*
    283  1.1     bjh21 -------------------------------------------------------------------------------
    284  1.1     bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    285  1.1     bjh21 number of bits given in `count'.  Any bits shifted off are lost.  The value
    286  1.1     bjh21 of `count' must be less than 64.  The result is broken into two 64-bit
    287  1.1     bjh21 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    288  1.1     bjh21 -------------------------------------------------------------------------------
    289  1.1     bjh21 */
    290  1.1     bjh21 INLINE void
    291  1.1     bjh21  shortShift128Left(
    292  1.1     bjh21      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    293  1.1     bjh21 {
    294  1.1     bjh21 
    295  1.1     bjh21     *z1Ptr = a1<<count;
    296  1.1     bjh21     *z0Ptr =
    297  1.1     bjh21         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    298  1.1     bjh21 
    299  1.1     bjh21 }
    300  1.1     bjh21 
    301  1.1     bjh21 /*
    302  1.1     bjh21 -------------------------------------------------------------------------------
    303  1.1     bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    304  1.1     bjh21 by the number of bits given in `count'.  Any bits shifted off are lost.
    305  1.1     bjh21 The value of `count' must be less than 64.  The result is broken into three
    306  1.1     bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    307  1.1     bjh21 `z1Ptr', and `z2Ptr'.
    308  1.1     bjh21 -------------------------------------------------------------------------------
    309  1.1     bjh21 */
    310  1.1     bjh21 INLINE void
    311  1.1     bjh21  shortShift192Left(
    312  1.1     bjh21      bits64 a0,
    313  1.1     bjh21      bits64 a1,
    314  1.1     bjh21      bits64 a2,
    315  1.1     bjh21      int16 count,
    316  1.1     bjh21      bits64 *z0Ptr,
    317  1.1     bjh21      bits64 *z1Ptr,
    318  1.1     bjh21      bits64 *z2Ptr
    319  1.1     bjh21  )
    320  1.1     bjh21 {
    321  1.1     bjh21     bits64 z0, z1, z2;
    322  1.1     bjh21     int8 negCount;
    323  1.1     bjh21 
    324  1.1     bjh21     z2 = a2<<count;
    325  1.1     bjh21     z1 = a1<<count;
    326  1.1     bjh21     z0 = a0<<count;
    327  1.1     bjh21     if ( 0 < count ) {
    328  1.1     bjh21         negCount = ( ( - count ) & 63 );
    329  1.1     bjh21         z1 |= a2>>negCount;
    330  1.1     bjh21         z0 |= a1>>negCount;
    331  1.1     bjh21     }
    332  1.1     bjh21     *z2Ptr = z2;
    333  1.1     bjh21     *z1Ptr = z1;
    334  1.1     bjh21     *z0Ptr = z0;
    335  1.1     bjh21 
    336  1.1     bjh21 }
    337  1.1     bjh21 
    338  1.1     bjh21 /*
    339  1.1     bjh21 -------------------------------------------------------------------------------
    340  1.1     bjh21 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    341  1.1     bjh21 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    342  1.1     bjh21 any carry out is lost.  The result is broken into two 64-bit pieces which
    343  1.1     bjh21 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    344  1.1     bjh21 -------------------------------------------------------------------------------
    345  1.1     bjh21 */
    346  1.1     bjh21 INLINE void
    347  1.1     bjh21  add128(
    348  1.1     bjh21      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    349  1.1     bjh21 {
    350  1.1     bjh21     bits64 z1;
    351  1.1     bjh21 
    352  1.1     bjh21     z1 = a1 + b1;
    353  1.1     bjh21     *z1Ptr = z1;
    354  1.1     bjh21     *z0Ptr = a0 + b0 + ( z1 < a1 );
    355  1.1     bjh21 
    356  1.1     bjh21 }
    357  1.1     bjh21 
    358  1.1     bjh21 /*
    359  1.1     bjh21 -------------------------------------------------------------------------------
    360  1.1     bjh21 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    361  1.1     bjh21 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    362  1.1     bjh21 modulo 2^192, so any carry out is lost.  The result is broken into three
    363  1.1     bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    364  1.1     bjh21 `z1Ptr', and `z2Ptr'.
    365  1.1     bjh21 -------------------------------------------------------------------------------
    366  1.1     bjh21 */
    367  1.1     bjh21 INLINE void
    368  1.1     bjh21  add192(
    369  1.1     bjh21      bits64 a0,
    370  1.1     bjh21      bits64 a1,
    371  1.1     bjh21      bits64 a2,
    372  1.1     bjh21      bits64 b0,
    373  1.1     bjh21      bits64 b1,
    374  1.1     bjh21      bits64 b2,
    375  1.1     bjh21      bits64 *z0Ptr,
    376  1.1     bjh21      bits64 *z1Ptr,
    377  1.1     bjh21      bits64 *z2Ptr
    378  1.1     bjh21  )
    379  1.1     bjh21 {
    380  1.1     bjh21     bits64 z0, z1, z2;
    381  1.1     bjh21     int8 carry0, carry1;
    382  1.1     bjh21 
    383  1.1     bjh21     z2 = a2 + b2;
    384  1.1     bjh21     carry1 = ( z2 < a2 );
    385  1.1     bjh21     z1 = a1 + b1;
    386  1.1     bjh21     carry0 = ( z1 < a1 );
    387  1.1     bjh21     z0 = a0 + b0;
    388  1.1     bjh21     z1 += carry1;
    389  1.2      tron     z0 += ( z1 < (bits64)carry1 );
    390  1.1     bjh21     z0 += carry0;
    391  1.1     bjh21     *z2Ptr = z2;
    392  1.1     bjh21     *z1Ptr = z1;
    393  1.1     bjh21     *z0Ptr = z0;
    394  1.1     bjh21 
    395  1.1     bjh21 }
    396  1.1     bjh21 
    397  1.1     bjh21 /*
    398  1.1     bjh21 -------------------------------------------------------------------------------
    399  1.1     bjh21 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    400  1.1     bjh21 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    401  1.1     bjh21 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    402  1.1     bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    403  1.1     bjh21 `z1Ptr'.
    404  1.1     bjh21 -------------------------------------------------------------------------------
    405  1.1     bjh21 */
    406  1.1     bjh21 INLINE void
    407  1.1     bjh21  sub128(
    408  1.1     bjh21      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    409  1.1     bjh21 {
    410  1.1     bjh21 
    411  1.1     bjh21     *z1Ptr = a1 - b1;
    412  1.1     bjh21     *z0Ptr = a0 - b0 - ( a1 < b1 );
    413  1.1     bjh21 
    414  1.1     bjh21 }
    415  1.1     bjh21 
    416  1.1     bjh21 /*
    417  1.1     bjh21 -------------------------------------------------------------------------------
    418  1.1     bjh21 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    419  1.1     bjh21 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    420  1.1     bjh21 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    421  1.1     bjh21 result is broken into three 64-bit pieces which are stored at the locations
    422  1.1     bjh21 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    423  1.1     bjh21 -------------------------------------------------------------------------------
    424  1.1     bjh21 */
    425  1.1     bjh21 INLINE void
    426  1.1     bjh21  sub192(
    427  1.1     bjh21      bits64 a0,
    428  1.1     bjh21      bits64 a1,
    429  1.1     bjh21      bits64 a2,
    430  1.1     bjh21      bits64 b0,
    431  1.1     bjh21      bits64 b1,
    432  1.1     bjh21      bits64 b2,
    433  1.1     bjh21      bits64 *z0Ptr,
    434  1.1     bjh21      bits64 *z1Ptr,
    435  1.1     bjh21      bits64 *z2Ptr
    436  1.1     bjh21  )
    437  1.1     bjh21 {
    438  1.1     bjh21     bits64 z0, z1, z2;
    439  1.1     bjh21     int8 borrow0, borrow1;
    440  1.1     bjh21 
    441  1.1     bjh21     z2 = a2 - b2;
    442  1.1     bjh21     borrow1 = ( a2 < b2 );
    443  1.1     bjh21     z1 = a1 - b1;
    444  1.1     bjh21     borrow0 = ( a1 < b1 );
    445  1.1     bjh21     z0 = a0 - b0;
    446  1.2      tron     z0 -= ( z1 < (bits64)borrow1 );
    447  1.1     bjh21     z1 -= borrow1;
    448  1.1     bjh21     z0 -= borrow0;
    449  1.1     bjh21     *z2Ptr = z2;
    450  1.1     bjh21     *z1Ptr = z1;
    451  1.1     bjh21     *z0Ptr = z0;
    452  1.1     bjh21 
    453  1.1     bjh21 }
    454  1.1     bjh21 
    455  1.1     bjh21 /*
    456  1.1     bjh21 -------------------------------------------------------------------------------
    457  1.1     bjh21 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    458  1.1     bjh21 into two 64-bit pieces which are stored at the locations pointed to by
    459  1.1     bjh21 `z0Ptr' and `z1Ptr'.
    460  1.1     bjh21 -------------------------------------------------------------------------------
    461  1.1     bjh21 */
    462  1.1     bjh21 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    463  1.1     bjh21 {
    464  1.1     bjh21     bits32 aHigh, aLow, bHigh, bLow;
    465  1.1     bjh21     bits64 z0, zMiddleA, zMiddleB, z1;
    466  1.1     bjh21 
    467  1.3  christos     aLow = (bits32)a;
    468  1.3  christos     aHigh = (bits32)(a>>32);
    469  1.3  christos     bLow = (bits32)b;
    470  1.3  christos     bHigh = (bits32)(b>>32);
    471  1.1     bjh21     z1 = ( (bits64) aLow ) * bLow;
    472  1.1     bjh21     zMiddleA = ( (bits64) aLow ) * bHigh;
    473  1.1     bjh21     zMiddleB = ( (bits64) aHigh ) * bLow;
    474  1.1     bjh21     z0 = ( (bits64) aHigh ) * bHigh;
    475  1.1     bjh21     zMiddleA += zMiddleB;
    476  1.1     bjh21     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    477  1.1     bjh21     zMiddleA <<= 32;
    478  1.1     bjh21     z1 += zMiddleA;
    479  1.1     bjh21     z0 += ( z1 < zMiddleA );
    480  1.1     bjh21     *z1Ptr = z1;
    481  1.1     bjh21     *z0Ptr = z0;
    482  1.1     bjh21 
    483  1.1     bjh21 }
    484  1.1     bjh21 
    485  1.1     bjh21 /*
    486  1.1     bjh21 -------------------------------------------------------------------------------
    487  1.1     bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    488  1.1     bjh21 `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    489  1.1     bjh21 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    490  1.1     bjh21 `z2Ptr'.
    491  1.1     bjh21 -------------------------------------------------------------------------------
    492  1.1     bjh21 */
    493  1.1     bjh21 INLINE void
    494  1.1     bjh21  mul128By64To192(
    495  1.1     bjh21      bits64 a0,
    496  1.1     bjh21      bits64 a1,
    497  1.1     bjh21      bits64 b,
    498  1.1     bjh21      bits64 *z0Ptr,
    499  1.1     bjh21      bits64 *z1Ptr,
    500  1.1     bjh21      bits64 *z2Ptr
    501  1.1     bjh21  )
    502  1.1     bjh21 {
    503  1.1     bjh21     bits64 z0, z1, z2, more1;
    504  1.1     bjh21 
    505  1.1     bjh21     mul64To128( a1, b, &z1, &z2 );
    506  1.1     bjh21     mul64To128( a0, b, &z0, &more1 );
    507  1.1     bjh21     add128( z0, more1, 0, z1, &z0, &z1 );
    508  1.1     bjh21     *z2Ptr = z2;
    509  1.1     bjh21     *z1Ptr = z1;
    510  1.1     bjh21     *z0Ptr = z0;
    511  1.1     bjh21 
    512  1.1     bjh21 }
    513  1.1     bjh21 
    514  1.1     bjh21 /*
    515  1.1     bjh21 -------------------------------------------------------------------------------
    516  1.1     bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    517  1.1     bjh21 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    518  1.1     bjh21 product.  The product is broken into four 64-bit pieces which are stored at
    519  1.1     bjh21 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    520  1.1     bjh21 -------------------------------------------------------------------------------
    521  1.1     bjh21 */
    522  1.1     bjh21 INLINE void
    523  1.1     bjh21  mul128To256(
    524  1.1     bjh21      bits64 a0,
    525  1.1     bjh21      bits64 a1,
    526  1.1     bjh21      bits64 b0,
    527  1.1     bjh21      bits64 b1,
    528  1.1     bjh21      bits64 *z0Ptr,
    529  1.1     bjh21      bits64 *z1Ptr,
    530  1.1     bjh21      bits64 *z2Ptr,
    531  1.1     bjh21      bits64 *z3Ptr
    532  1.1     bjh21  )
    533  1.1     bjh21 {
    534  1.1     bjh21     bits64 z0, z1, z2, z3;
    535  1.1     bjh21     bits64 more1, more2;
    536  1.1     bjh21 
    537  1.1     bjh21     mul64To128( a1, b1, &z2, &z3 );
    538  1.1     bjh21     mul64To128( a1, b0, &z1, &more2 );
    539  1.1     bjh21     add128( z1, more2, 0, z2, &z1, &z2 );
    540  1.1     bjh21     mul64To128( a0, b0, &z0, &more1 );
    541  1.1     bjh21     add128( z0, more1, 0, z1, &z0, &z1 );
    542  1.1     bjh21     mul64To128( a0, b1, &more1, &more2 );
    543  1.1     bjh21     add128( more1, more2, 0, z2, &more1, &z2 );
    544  1.1     bjh21     add128( z0, z1, 0, more1, &z0, &z1 );
    545  1.1     bjh21     *z3Ptr = z3;
    546  1.1     bjh21     *z2Ptr = z2;
    547  1.1     bjh21     *z1Ptr = z1;
    548  1.1     bjh21     *z0Ptr = z0;
    549  1.1     bjh21 
    550  1.1     bjh21 }
    551  1.1     bjh21 
    552  1.1     bjh21 /*
    553  1.1     bjh21 -------------------------------------------------------------------------------
    554  1.1     bjh21 Returns an approximation to the 64-bit integer quotient obtained by dividing
    555  1.1     bjh21 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    556  1.1     bjh21 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    557  1.1     bjh21 toward zero, the approximation returned lies between q and q + 2 inclusive.
    558  1.1     bjh21 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    559  1.1     bjh21 unsigned integer is returned.
    560  1.1     bjh21 -------------------------------------------------------------------------------
    561  1.1     bjh21 */
    562  1.1     bjh21 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    563  1.1     bjh21 {
    564  1.1     bjh21     bits64 b0, b1;
    565  1.1     bjh21     bits64 rem0, rem1, term0, term1;
    566  1.1     bjh21     bits64 z;
    567  1.1     bjh21 
    568  1.1     bjh21     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    569  1.1     bjh21     b0 = b>>32;
    570  1.1     bjh21     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    571  1.1     bjh21     mul64To128( b, z, &term0, &term1 );
    572  1.1     bjh21     sub128( a0, a1, term0, term1, &rem0, &rem1 );
    573  1.1     bjh21     while ( ( (sbits64) rem0 ) < 0 ) {
    574  1.1     bjh21         z -= LIT64( 0x100000000 );
    575  1.1     bjh21         b1 = b<<32;
    576  1.1     bjh21         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    577  1.1     bjh21     }
    578  1.1     bjh21     rem0 = ( rem0<<32 ) | ( rem1>>32 );
    579  1.1     bjh21     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    580  1.1     bjh21     return z;
    581  1.1     bjh21 
    582  1.1     bjh21 }
    583  1.1     bjh21 
    584  1.1     bjh21 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    585  1.1     bjh21 /*
    586  1.1     bjh21 -------------------------------------------------------------------------------
    587  1.1     bjh21 Returns an approximation to the square root of the 32-bit significand given
    588  1.1     bjh21 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    589  1.1     bjh21 `aExp' (the least significant bit) is 1, the integer returned approximates
    590  1.1     bjh21 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    591  1.1     bjh21 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    592  1.1     bjh21 case, the approximation returned lies strictly within +/-2 of the exact
    593  1.1     bjh21 value.
    594  1.1     bjh21 -------------------------------------------------------------------------------
    595  1.1     bjh21 */
    596  1.1     bjh21 static bits32 estimateSqrt32( int16 aExp, bits32 a )
    597  1.1     bjh21 {
    598  1.1     bjh21     static const bits16 sqrtOddAdjustments[] = {
    599  1.1     bjh21         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    600  1.1     bjh21         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    601  1.1     bjh21     };
    602  1.1     bjh21     static const bits16 sqrtEvenAdjustments[] = {
    603  1.1     bjh21         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    604  1.1     bjh21         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    605  1.1     bjh21     };
    606  1.1     bjh21     int8 idx;
    607  1.1     bjh21     bits32 z;
    608  1.1     bjh21 
    609  1.1     bjh21     idx = ( a>>27 ) & 15;
    610  1.1     bjh21     if ( aExp & 1 ) {
    611  1.1     bjh21         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
    612  1.1     bjh21         z = ( ( a / z )<<14 ) + ( z<<15 );
    613  1.1     bjh21         a >>= 1;
    614  1.1     bjh21     }
    615  1.1     bjh21     else {
    616  1.1     bjh21         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
    617  1.1     bjh21         z = a / z + z;
    618  1.1     bjh21         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    619  1.3  christos         if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );
    620  1.1     bjh21     }
    621  1.1     bjh21     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
    622  1.1     bjh21 
    623  1.1     bjh21 }
    624  1.1     bjh21 #endif
    625  1.1     bjh21 
    626  1.1     bjh21 /*
    627  1.1     bjh21 -------------------------------------------------------------------------------
    628  1.1     bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of
    629  1.1     bjh21 `a'.  If `a' is zero, 32 is returned.
    630  1.1     bjh21 -------------------------------------------------------------------------------
    631  1.1     bjh21 */
    632  1.1     bjh21 static int8 countLeadingZeros32( bits32 a )
    633  1.1     bjh21 {
    634  1.1     bjh21     static const int8 countLeadingZerosHigh[] = {
    635  1.1     bjh21         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    636  1.1     bjh21         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    637  1.1     bjh21         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    638  1.1     bjh21         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    639  1.1     bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    640  1.1     bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    641  1.1     bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    642  1.1     bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    643  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    644  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    645  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    646  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    647  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    648  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    649  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    650  1.1     bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    651  1.1     bjh21     };
    652  1.1     bjh21     int8 shiftCount;
    653  1.1     bjh21 
    654  1.1     bjh21     shiftCount = 0;
    655  1.1     bjh21     if ( a < 0x10000 ) {
    656  1.1     bjh21         shiftCount += 16;
    657  1.1     bjh21         a <<= 16;
    658  1.1     bjh21     }
    659  1.1     bjh21     if ( a < 0x1000000 ) {
    660  1.1     bjh21         shiftCount += 8;
    661  1.1     bjh21         a <<= 8;
    662  1.1     bjh21     }
    663  1.1     bjh21     shiftCount += countLeadingZerosHigh[ a>>24 ];
    664  1.1     bjh21     return shiftCount;
    665  1.1     bjh21 
    666  1.1     bjh21 }
    667  1.1     bjh21 
    668  1.1     bjh21 /*
    669  1.1     bjh21 -------------------------------------------------------------------------------
    670  1.1     bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of
    671  1.1     bjh21 `a'.  If `a' is zero, 64 is returned.
    672  1.1     bjh21 -------------------------------------------------------------------------------
    673  1.1     bjh21 */
    674  1.1     bjh21 static int8 countLeadingZeros64( bits64 a )
    675  1.1     bjh21 {
    676  1.1     bjh21     int8 shiftCount;
    677  1.1     bjh21 
    678  1.1     bjh21     shiftCount = 0;
    679  1.1     bjh21     if ( a < ( (bits64) 1 )<<32 ) {
    680  1.1     bjh21         shiftCount += 32;
    681  1.1     bjh21     }
    682  1.1     bjh21     else {
    683  1.1     bjh21         a >>= 32;
    684  1.1     bjh21     }
    685  1.3  christos     shiftCount += (int8)countLeadingZeros32( (bits32)a );
    686  1.1     bjh21     return shiftCount;
    687  1.1     bjh21 
    688  1.1     bjh21 }
    689  1.1     bjh21 
    690  1.1     bjh21 /*
    691  1.1     bjh21 -------------------------------------------------------------------------------
    692  1.1     bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    693  1.1     bjh21 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    694  1.1     bjh21 Otherwise, returns 0.
    695  1.1     bjh21 -------------------------------------------------------------------------------
    696  1.1     bjh21 */
    697  1.1     bjh21 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    698  1.1     bjh21 {
    699  1.1     bjh21 
    700  1.1     bjh21     return ( a0 == b0 ) && ( a1 == b1 );
    701  1.1     bjh21 
    702  1.1     bjh21 }
    703  1.1     bjh21 
    704  1.1     bjh21 /*
    705  1.1     bjh21 -------------------------------------------------------------------------------
    706  1.1     bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    707  1.1     bjh21 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    708  1.1     bjh21 Otherwise, returns 0.
    709  1.1     bjh21 -------------------------------------------------------------------------------
    710  1.1     bjh21 */
    711  1.1     bjh21 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    712  1.1     bjh21 {
    713  1.1     bjh21 
    714  1.1     bjh21     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    715  1.1     bjh21 
    716  1.1     bjh21 }
    717  1.1     bjh21 
    718  1.1     bjh21 /*
    719  1.1     bjh21 -------------------------------------------------------------------------------
    720  1.1     bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    721  1.1     bjh21 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    722  1.1     bjh21 returns 0.
    723  1.1     bjh21 -------------------------------------------------------------------------------
    724  1.1     bjh21 */
    725  1.1     bjh21 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    726  1.1     bjh21 {
    727  1.1     bjh21 
    728  1.1     bjh21     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    729  1.1     bjh21 
    730  1.1     bjh21 }
    731  1.1     bjh21 
    732  1.1     bjh21 /*
    733  1.1     bjh21 -------------------------------------------------------------------------------
    734  1.1     bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    735  1.1     bjh21 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    736  1.1     bjh21 Otherwise, returns 0.
    737  1.1     bjh21 -------------------------------------------------------------------------------
    738  1.1     bjh21 */
    739  1.1     bjh21 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    740  1.1     bjh21 {
    741  1.1     bjh21 
    742  1.1     bjh21     return ( a0 != b0 ) || ( a1 != b1 );
    743  1.1     bjh21 
    744  1.1     bjh21 }
    745  1.1     bjh21 
    746