Home | History | Annotate | Line # | Download | only in libkern
softfloat-macros.h revision 1.1
      1  1.1  ross /* $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $ */
      2  1.1  ross 
      3  1.1  ross /*
      4  1.1  ross ===============================================================================
      5  1.1  ross 
      6  1.1  ross This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      7  1.1  ross Arithmetic Package, Release 2a.
      8  1.1  ross 
      9  1.1  ross Written by John R. Hauser.  This work was made possible in part by the
     10  1.1  ross International Computer Science Institute, located at Suite 600, 1947 Center
     11  1.1  ross Street, Berkeley, California 94704.  Funding was partially provided by the
     12  1.1  ross National Science Foundation under grant MIP-9311980.  The original version
     13  1.1  ross of this code was written as part of a project to build a fixed-point vector
     14  1.1  ross processor in collaboration with the University of California at Berkeley,
     15  1.1  ross overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     16  1.1  ross is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     17  1.1  ross arithmetic/SoftFloat.html'.
     18  1.1  ross 
     19  1.1  ross THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     20  1.1  ross has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     21  1.1  ross TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     22  1.1  ross PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     23  1.1  ross AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     24  1.1  ross 
     25  1.1  ross Derivative works are acceptable, even for commercial purposes, so long as
     26  1.1  ross (1) they include prominent notice that the work is derivative, and (2) they
     27  1.1  ross include prominent notice akin to these four paragraphs for those parts of
     28  1.1  ross this code that are retained.
     29  1.1  ross 
     30  1.1  ross ===============================================================================
     31  1.1  ross */
     32  1.1  ross 
     33  1.1  ross /*
     34  1.1  ross -------------------------------------------------------------------------------
     35  1.1  ross Shifts `a' right by the number of bits given in `count'.  If any nonzero
     36  1.1  ross bits are shifted off, they are ``jammed'' into the least significant bit of
     37  1.1  ross the result by setting the least significant bit to 1.  The value of `count'
     38  1.1  ross can be arbitrarily large; in particular, if `count' is greater than 32, the
     39  1.1  ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     40  1.1  ross The result is stored in the location pointed to by `zPtr'.
     41  1.1  ross -------------------------------------------------------------------------------
     42  1.1  ross */
     43  1.1  ross INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     44  1.1  ross {
     45  1.1  ross     bits32 z;
     46  1.1  ross 
     47  1.1  ross     if ( count == 0 ) {
     48  1.1  ross         z = a;
     49  1.1  ross     }
     50  1.1  ross     else if ( count < 32 ) {
     51  1.1  ross         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     52  1.1  ross     }
     53  1.1  ross     else {
     54  1.1  ross         z = ( a != 0 );
     55  1.1  ross     }
     56  1.1  ross     *zPtr = z;
     57  1.1  ross 
     58  1.1  ross }
     59  1.1  ross 
     60  1.1  ross /*
     61  1.1  ross -------------------------------------------------------------------------------
     62  1.1  ross Shifts `a' right by the number of bits given in `count'.  If any nonzero
     63  1.1  ross bits are shifted off, they are ``jammed'' into the least significant bit of
     64  1.1  ross the result by setting the least significant bit to 1.  The value of `count'
     65  1.1  ross can be arbitrarily large; in particular, if `count' is greater than 64, the
     66  1.1  ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     67  1.1  ross The result is stored in the location pointed to by `zPtr'.
     68  1.1  ross -------------------------------------------------------------------------------
     69  1.1  ross */
     70  1.1  ross INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     71  1.1  ross {
     72  1.1  ross     bits64 z;
     73  1.1  ross 
     74  1.1  ross     if ( count == 0 ) {
     75  1.1  ross         z = a;
     76  1.1  ross     }
     77  1.1  ross     else if ( count < 64 ) {
     78  1.1  ross         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     79  1.1  ross     }
     80  1.1  ross     else {
     81  1.1  ross         z = ( a != 0 );
     82  1.1  ross     }
     83  1.1  ross     *zPtr = z;
     84  1.1  ross 
     85  1.1  ross }
     86  1.1  ross 
     87  1.1  ross /*
     88  1.1  ross -------------------------------------------------------------------------------
     89  1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     90  1.1  ross _plus_ the number of bits given in `count'.  The shifted result is at most
     91  1.1  ross 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     92  1.1  ross bits shifted off form a second 64-bit result as follows:  The _last_ bit
     93  1.1  ross shifted off is the most-significant bit of the extra result, and the other
     94  1.1  ross 63 bits of the extra result are all zero if and only if _all_but_the_last_
     95  1.1  ross bits shifted off were all zero.  This extra result is stored in the location
     96  1.1  ross pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     97  1.1  ross     (This routine makes more sense if `a0' and `a1' are considered to form a
     98  1.1  ross fixed-point value with binary point between `a0' and `a1'.  This fixed-point
     99  1.1  ross value is shifted right by the number of bits given in `count', and the
    100  1.1  ross integer part of the result is returned at the location pointed to by
    101  1.1  ross `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    102  1.1  ross described above, and is returned at the location pointed to by `z1Ptr'.)
    103  1.1  ross -------------------------------------------------------------------------------
    104  1.1  ross */
    105  1.1  ross INLINE void
    106  1.1  ross  shift64ExtraRightJamming(
    107  1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    108  1.1  ross {
    109  1.1  ross     bits64 z0, z1;
    110  1.1  ross     int8 negCount = ( - count ) & 63;
    111  1.1  ross 
    112  1.1  ross     if ( count == 0 ) {
    113  1.1  ross         z1 = a1;
    114  1.1  ross         z0 = a0;
    115  1.1  ross     }
    116  1.1  ross     else if ( count < 64 ) {
    117  1.1  ross         z1 = ( a0<<negCount ) | ( a1 != 0 );
    118  1.1  ross         z0 = a0>>count;
    119  1.1  ross     }
    120  1.1  ross     else {
    121  1.1  ross         if ( count == 64 ) {
    122  1.1  ross             z1 = a0 | ( a1 != 0 );
    123  1.1  ross         }
    124  1.1  ross         else {
    125  1.1  ross             z1 = ( ( a0 | a1 ) != 0 );
    126  1.1  ross         }
    127  1.1  ross         z0 = 0;
    128  1.1  ross     }
    129  1.1  ross     *z1Ptr = z1;
    130  1.1  ross     *z0Ptr = z0;
    131  1.1  ross 
    132  1.1  ross }
    133  1.1  ross 
    134  1.1  ross /*
    135  1.1  ross -------------------------------------------------------------------------------
    136  1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    137  1.1  ross number of bits given in `count'.  Any bits shifted off are lost.  The value
    138  1.1  ross of `count' can be arbitrarily large; in particular, if `count' is greater
    139  1.1  ross than 128, the result will be 0.  The result is broken into two 64-bit pieces
    140  1.1  ross which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    141  1.1  ross -------------------------------------------------------------------------------
    142  1.1  ross */
    143  1.1  ross INLINE void
    144  1.1  ross  shift128Right(
    145  1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    146  1.1  ross {
    147  1.1  ross     bits64 z0, z1;
    148  1.1  ross     int8 negCount = ( - count ) & 63;
    149  1.1  ross 
    150  1.1  ross     if ( count == 0 ) {
    151  1.1  ross         z1 = a1;
    152  1.1  ross         z0 = a0;
    153  1.1  ross     }
    154  1.1  ross     else if ( count < 64 ) {
    155  1.1  ross         z1 = ( a0<<negCount ) | ( a1>>count );
    156  1.1  ross         z0 = a0>>count;
    157  1.1  ross     }
    158  1.1  ross     else {
    159  1.1  ross         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
    160  1.1  ross         z0 = 0;
    161  1.1  ross     }
    162  1.1  ross     *z1Ptr = z1;
    163  1.1  ross     *z0Ptr = z0;
    164  1.1  ross 
    165  1.1  ross }
    166  1.1  ross 
    167  1.1  ross /*
    168  1.1  ross -------------------------------------------------------------------------------
    169  1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    170  1.1  ross number of bits given in `count'.  If any nonzero bits are shifted off, they
    171  1.1  ross are ``jammed'' into the least significant bit of the result by setting the
    172  1.1  ross least significant bit to 1.  The value of `count' can be arbitrarily large;
    173  1.1  ross in particular, if `count' is greater than 128, the result will be either
    174  1.1  ross 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    175  1.1  ross nonzero.  The result is broken into two 64-bit pieces which are stored at
    176  1.1  ross the locations pointed to by `z0Ptr' and `z1Ptr'.
    177  1.1  ross -------------------------------------------------------------------------------
    178  1.1  ross */
    179  1.1  ross INLINE void
    180  1.1  ross  shift128RightJamming(
    181  1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    182  1.1  ross {
    183  1.1  ross     bits64 z0, z1;
    184  1.1  ross     int8 negCount = ( - count ) & 63;
    185  1.1  ross 
    186  1.1  ross     if ( count == 0 ) {
    187  1.1  ross         z1 = a1;
    188  1.1  ross         z0 = a0;
    189  1.1  ross     }
    190  1.1  ross     else if ( count < 64 ) {
    191  1.1  ross         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    192  1.1  ross         z0 = a0>>count;
    193  1.1  ross     }
    194  1.1  ross     else {
    195  1.1  ross         if ( count == 64 ) {
    196  1.1  ross             z1 = a0 | ( a1 != 0 );
    197  1.1  ross         }
    198  1.1  ross         else if ( count < 128 ) {
    199  1.1  ross             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    200  1.1  ross         }
    201  1.1  ross         else {
    202  1.1  ross             z1 = ( ( a0 | a1 ) != 0 );
    203  1.1  ross         }
    204  1.1  ross         z0 = 0;
    205  1.1  ross     }
    206  1.1  ross     *z1Ptr = z1;
    207  1.1  ross     *z0Ptr = z0;
    208  1.1  ross 
    209  1.1  ross }
    210  1.1  ross 
    211  1.1  ross /*
    212  1.1  ross -------------------------------------------------------------------------------
    213  1.1  ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    214  1.1  ross by 64 _plus_ the number of bits given in `count'.  The shifted result is
    215  1.1  ross at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    216  1.1  ross stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    217  1.1  ross off form a third 64-bit result as follows:  The _last_ bit shifted off is
    218  1.1  ross the most-significant bit of the extra result, and the other 63 bits of the
    219  1.1  ross extra result are all zero if and only if _all_but_the_last_ bits shifted off
    220  1.1  ross were all zero.  This extra result is stored in the location pointed to by
    221  1.1  ross `z2Ptr'.  The value of `count' can be arbitrarily large.
    222  1.1  ross     (This routine makes more sense if `a0', `a1', and `a2' are considered
    223  1.1  ross to form a fixed-point value with binary point between `a1' and `a2'.  This
    224  1.1  ross fixed-point value is shifted right by the number of bits given in `count',
    225  1.1  ross and the integer part of the result is returned at the locations pointed to
    226  1.1  ross by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    227  1.1  ross corrupted as described above, and is returned at the location pointed to by
    228  1.1  ross `z2Ptr'.)
    229  1.1  ross -------------------------------------------------------------------------------
    230  1.1  ross */
    231  1.1  ross INLINE void
    232  1.1  ross  shift128ExtraRightJamming(
    233  1.1  ross      bits64 a0,
    234  1.1  ross      bits64 a1,
    235  1.1  ross      bits64 a2,
    236  1.1  ross      int16 count,
    237  1.1  ross      bits64 *z0Ptr,
    238  1.1  ross      bits64 *z1Ptr,
    239  1.1  ross      bits64 *z2Ptr
    240  1.1  ross  )
    241  1.1  ross {
    242  1.1  ross     bits64 z0, z1, z2;
    243  1.1  ross     int8 negCount = ( - count ) & 63;
    244  1.1  ross 
    245  1.1  ross     if ( count == 0 ) {
    246  1.1  ross         z2 = a2;
    247  1.1  ross         z1 = a1;
    248  1.1  ross         z0 = a0;
    249  1.1  ross     }
    250  1.1  ross     else {
    251  1.1  ross         if ( count < 64 ) {
    252  1.1  ross             z2 = a1<<negCount;
    253  1.1  ross             z1 = ( a0<<negCount ) | ( a1>>count );
    254  1.1  ross             z0 = a0>>count;
    255  1.1  ross         }
    256  1.1  ross         else {
    257  1.1  ross             if ( count == 64 ) {
    258  1.1  ross                 z2 = a1;
    259  1.1  ross                 z1 = a0;
    260  1.1  ross             }
    261  1.1  ross             else {
    262  1.1  ross                 a2 |= a1;
    263  1.1  ross                 if ( count < 128 ) {
    264  1.1  ross                     z2 = a0<<negCount;
    265  1.1  ross                     z1 = a0>>( count & 63 );
    266  1.1  ross                 }
    267  1.1  ross                 else {
    268  1.1  ross                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    269  1.1  ross                     z1 = 0;
    270  1.1  ross                 }
    271  1.1  ross             }
    272  1.1  ross             z0 = 0;
    273  1.1  ross         }
    274  1.1  ross         z2 |= ( a2 != 0 );
    275  1.1  ross     }
    276  1.1  ross     *z2Ptr = z2;
    277  1.1  ross     *z1Ptr = z1;
    278  1.1  ross     *z0Ptr = z0;
    279  1.1  ross 
    280  1.1  ross }
    281  1.1  ross 
    282  1.1  ross /*
    283  1.1  ross -------------------------------------------------------------------------------
    284  1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    285  1.1  ross number of bits given in `count'.  Any bits shifted off are lost.  The value
    286  1.1  ross of `count' must be less than 64.  The result is broken into two 64-bit
    287  1.1  ross pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    288  1.1  ross -------------------------------------------------------------------------------
    289  1.1  ross */
    290  1.1  ross INLINE void
    291  1.1  ross  shortShift128Left(
    292  1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    293  1.1  ross {
    294  1.1  ross 
    295  1.1  ross     *z1Ptr = a1<<count;
    296  1.1  ross     *z0Ptr =
    297  1.1  ross         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    298  1.1  ross 
    299  1.1  ross }
    300  1.1  ross 
    301  1.1  ross /*
    302  1.1  ross -------------------------------------------------------------------------------
    303  1.1  ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    304  1.1  ross by the number of bits given in `count'.  Any bits shifted off are lost.
    305  1.1  ross The value of `count' must be less than 64.  The result is broken into three
    306  1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    307  1.1  ross `z1Ptr', and `z2Ptr'.
    308  1.1  ross -------------------------------------------------------------------------------
    309  1.1  ross */
    310  1.1  ross INLINE void
    311  1.1  ross  shortShift192Left(
    312  1.1  ross      bits64 a0,
    313  1.1  ross      bits64 a1,
    314  1.1  ross      bits64 a2,
    315  1.1  ross      int16 count,
    316  1.1  ross      bits64 *z0Ptr,
    317  1.1  ross      bits64 *z1Ptr,
    318  1.1  ross      bits64 *z2Ptr
    319  1.1  ross  )
    320  1.1  ross {
    321  1.1  ross     bits64 z0, z1, z2;
    322  1.1  ross     int8 negCount;
    323  1.1  ross 
    324  1.1  ross     z2 = a2<<count;
    325  1.1  ross     z1 = a1<<count;
    326  1.1  ross     z0 = a0<<count;
    327  1.1  ross     if ( 0 < count ) {
    328  1.1  ross         negCount = ( ( - count ) & 63 );
    329  1.1  ross         z1 |= a2>>negCount;
    330  1.1  ross         z0 |= a1>>negCount;
    331  1.1  ross     }
    332  1.1  ross     *z2Ptr = z2;
    333  1.1  ross     *z1Ptr = z1;
    334  1.1  ross     *z0Ptr = z0;
    335  1.1  ross 
    336  1.1  ross }
    337  1.1  ross 
    338  1.1  ross /*
    339  1.1  ross -------------------------------------------------------------------------------
    340  1.1  ross Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    341  1.1  ross value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    342  1.1  ross any carry out is lost.  The result is broken into two 64-bit pieces which
    343  1.1  ross are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    344  1.1  ross -------------------------------------------------------------------------------
    345  1.1  ross */
    346  1.1  ross INLINE void
    347  1.1  ross  add128(
    348  1.1  ross      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    349  1.1  ross {
    350  1.1  ross     bits64 z1;
    351  1.1  ross 
    352  1.1  ross     z1 = a1 + b1;
    353  1.1  ross     *z1Ptr = z1;
    354  1.1  ross     *z0Ptr = a0 + b0 + ( z1 < a1 );
    355  1.1  ross 
    356  1.1  ross }
    357  1.1  ross 
    358  1.1  ross /*
    359  1.1  ross -------------------------------------------------------------------------------
    360  1.1  ross Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    361  1.1  ross 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    362  1.1  ross modulo 2^192, so any carry out is lost.  The result is broken into three
    363  1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    364  1.1  ross `z1Ptr', and `z2Ptr'.
    365  1.1  ross -------------------------------------------------------------------------------
    366  1.1  ross */
    367  1.1  ross INLINE void
    368  1.1  ross  add192(
    369  1.1  ross      bits64 a0,
    370  1.1  ross      bits64 a1,
    371  1.1  ross      bits64 a2,
    372  1.1  ross      bits64 b0,
    373  1.1  ross      bits64 b1,
    374  1.1  ross      bits64 b2,
    375  1.1  ross      bits64 *z0Ptr,
    376  1.1  ross      bits64 *z1Ptr,
    377  1.1  ross      bits64 *z2Ptr
    378  1.1  ross  )
    379  1.1  ross {
    380  1.1  ross     bits64 z0, z1, z2;
    381  1.1  ross     int8 carry0, carry1;
    382  1.1  ross 
    383  1.1  ross     z2 = a2 + b2;
    384  1.1  ross     carry1 = ( z2 < a2 );
    385  1.1  ross     z1 = a1 + b1;
    386  1.1  ross     carry0 = ( z1 < a1 );
    387  1.1  ross     z0 = a0 + b0;
    388  1.1  ross     z1 += carry1;
    389  1.1  ross     z0 += ( z1 < carry1 );
    390  1.1  ross     z0 += carry0;
    391  1.1  ross     *z2Ptr = z2;
    392  1.1  ross     *z1Ptr = z1;
    393  1.1  ross     *z0Ptr = z0;
    394  1.1  ross 
    395  1.1  ross }
    396  1.1  ross 
    397  1.1  ross /*
    398  1.1  ross -------------------------------------------------------------------------------
    399  1.1  ross Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    400  1.1  ross 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    401  1.1  ross 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    402  1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    403  1.1  ross `z1Ptr'.
    404  1.1  ross -------------------------------------------------------------------------------
    405  1.1  ross */
    406  1.1  ross INLINE void
    407  1.1  ross  sub128(
    408  1.1  ross      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    409  1.1  ross {
    410  1.1  ross 
    411  1.1  ross     *z1Ptr = a1 - b1;
    412  1.1  ross     *z0Ptr = a0 - b0 - ( a1 < b1 );
    413  1.1  ross 
    414  1.1  ross }
    415  1.1  ross 
    416  1.1  ross /*
    417  1.1  ross -------------------------------------------------------------------------------
    418  1.1  ross Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    419  1.1  ross from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    420  1.1  ross Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    421  1.1  ross result is broken into three 64-bit pieces which are stored at the locations
    422  1.1  ross pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    423  1.1  ross -------------------------------------------------------------------------------
    424  1.1  ross */
    425  1.1  ross INLINE void
    426  1.1  ross  sub192(
    427  1.1  ross      bits64 a0,
    428  1.1  ross      bits64 a1,
    429  1.1  ross      bits64 a2,
    430  1.1  ross      bits64 b0,
    431  1.1  ross      bits64 b1,
    432  1.1  ross      bits64 b2,
    433  1.1  ross      bits64 *z0Ptr,
    434  1.1  ross      bits64 *z1Ptr,
    435  1.1  ross      bits64 *z2Ptr
    436  1.1  ross  )
    437  1.1  ross {
    438  1.1  ross     bits64 z0, z1, z2;
    439  1.1  ross     int8 borrow0, borrow1;
    440  1.1  ross 
    441  1.1  ross     z2 = a2 - b2;
    442  1.1  ross     borrow1 = ( a2 < b2 );
    443  1.1  ross     z1 = a1 - b1;
    444  1.1  ross     borrow0 = ( a1 < b1 );
    445  1.1  ross     z0 = a0 - b0;
    446  1.1  ross     z0 -= ( z1 < borrow1 );
    447  1.1  ross     z1 -= borrow1;
    448  1.1  ross     z0 -= borrow0;
    449  1.1  ross     *z2Ptr = z2;
    450  1.1  ross     *z1Ptr = z1;
    451  1.1  ross     *z0Ptr = z0;
    452  1.1  ross 
    453  1.1  ross }
    454  1.1  ross 
    455  1.1  ross /*
    456  1.1  ross -------------------------------------------------------------------------------
    457  1.1  ross Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    458  1.1  ross into two 64-bit pieces which are stored at the locations pointed to by
    459  1.1  ross `z0Ptr' and `z1Ptr'.
    460  1.1  ross -------------------------------------------------------------------------------
    461  1.1  ross */
    462  1.1  ross INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    463  1.1  ross {
    464  1.1  ross     bits32 aHigh, aLow, bHigh, bLow;
    465  1.1  ross     bits64 z0, zMiddleA, zMiddleB, z1;
    466  1.1  ross 
    467  1.1  ross     aLow = a;
    468  1.1  ross     aHigh = a>>32;
    469  1.1  ross     bLow = b;
    470  1.1  ross     bHigh = b>>32;
    471  1.1  ross     z1 = ( (bits64) aLow ) * bLow;
    472  1.1  ross     zMiddleA = ( (bits64) aLow ) * bHigh;
    473  1.1  ross     zMiddleB = ( (bits64) aHigh ) * bLow;
    474  1.1  ross     z0 = ( (bits64) aHigh ) * bHigh;
    475  1.1  ross     zMiddleA += zMiddleB;
    476  1.1  ross     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    477  1.1  ross     zMiddleA <<= 32;
    478  1.1  ross     z1 += zMiddleA;
    479  1.1  ross     z0 += ( z1 < zMiddleA );
    480  1.1  ross     *z1Ptr = z1;
    481  1.1  ross     *z0Ptr = z0;
    482  1.1  ross 
    483  1.1  ross }
    484  1.1  ross 
    485  1.1  ross /*
    486  1.1  ross -------------------------------------------------------------------------------
    487  1.1  ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    488  1.1  ross `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    489  1.1  ross pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    490  1.1  ross `z2Ptr'.
    491  1.1  ross -------------------------------------------------------------------------------
    492  1.1  ross */
    493  1.1  ross INLINE void
    494  1.1  ross  mul128By64To192(
    495  1.1  ross      bits64 a0,
    496  1.1  ross      bits64 a1,
    497  1.1  ross      bits64 b,
    498  1.1  ross      bits64 *z0Ptr,
    499  1.1  ross      bits64 *z1Ptr,
    500  1.1  ross      bits64 *z2Ptr
    501  1.1  ross  )
    502  1.1  ross {
    503  1.1  ross     bits64 z0, z1, z2, more1;
    504  1.1  ross 
    505  1.1  ross     mul64To128( a1, b, &z1, &z2 );
    506  1.1  ross     mul64To128( a0, b, &z0, &more1 );
    507  1.1  ross     add128( z0, more1, 0, z1, &z0, &z1 );
    508  1.1  ross     *z2Ptr = z2;
    509  1.1  ross     *z1Ptr = z1;
    510  1.1  ross     *z0Ptr = z0;
    511  1.1  ross 
    512  1.1  ross }
    513  1.1  ross 
    514  1.1  ross /*
    515  1.1  ross -------------------------------------------------------------------------------
    516  1.1  ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    517  1.1  ross 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    518  1.1  ross product.  The product is broken into four 64-bit pieces which are stored at
    519  1.1  ross the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    520  1.1  ross -------------------------------------------------------------------------------
    521  1.1  ross */
    522  1.1  ross INLINE void
    523  1.1  ross  mul128To256(
    524  1.1  ross      bits64 a0,
    525  1.1  ross      bits64 a1,
    526  1.1  ross      bits64 b0,
    527  1.1  ross      bits64 b1,
    528  1.1  ross      bits64 *z0Ptr,
    529  1.1  ross      bits64 *z1Ptr,
    530  1.1  ross      bits64 *z2Ptr,
    531  1.1  ross      bits64 *z3Ptr
    532  1.1  ross  )
    533  1.1  ross {
    534  1.1  ross     bits64 z0, z1, z2, z3;
    535  1.1  ross     bits64 more1, more2;
    536  1.1  ross 
    537  1.1  ross     mul64To128( a1, b1, &z2, &z3 );
    538  1.1  ross     mul64To128( a1, b0, &z1, &more2 );
    539  1.1  ross     add128( z1, more2, 0, z2, &z1, &z2 );
    540  1.1  ross     mul64To128( a0, b0, &z0, &more1 );
    541  1.1  ross     add128( z0, more1, 0, z1, &z0, &z1 );
    542  1.1  ross     mul64To128( a0, b1, &more1, &more2 );
    543  1.1  ross     add128( more1, more2, 0, z2, &more1, &z2 );
    544  1.1  ross     add128( z0, z1, 0, more1, &z0, &z1 );
    545  1.1  ross     *z3Ptr = z3;
    546  1.1  ross     *z2Ptr = z2;
    547  1.1  ross     *z1Ptr = z1;
    548  1.1  ross     *z0Ptr = z0;
    549  1.1  ross 
    550  1.1  ross }
    551  1.1  ross 
    552  1.1  ross /*
    553  1.1  ross -------------------------------------------------------------------------------
    554  1.1  ross Returns an approximation to the 64-bit integer quotient obtained by dividing
    555  1.1  ross `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    556  1.1  ross divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    557  1.1  ross toward zero, the approximation returned lies between q and q + 2 inclusive.
    558  1.1  ross If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    559  1.1  ross unsigned integer is returned.
    560  1.1  ross -------------------------------------------------------------------------------
    561  1.1  ross */
    562  1.1  ross static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    563  1.1  ross {
    564  1.1  ross     bits64 b0, b1;
    565  1.1  ross     bits64 rem0, rem1, term0, term1;
    566  1.1  ross     bits64 z;
    567  1.1  ross 
    568  1.1  ross     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    569  1.1  ross     b0 = b>>32;
    570  1.1  ross     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    571  1.1  ross     mul64To128( b, z, &term0, &term1 );
    572  1.1  ross     sub128( a0, a1, term0, term1, &rem0, &rem1 );
    573  1.1  ross     while ( ( (sbits64) rem0 ) < 0 ) {
    574  1.1  ross         z -= LIT64( 0x100000000 );
    575  1.1  ross         b1 = b<<32;
    576  1.1  ross         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    577  1.1  ross     }
    578  1.1  ross     rem0 = ( rem0<<32 ) | ( rem1>>32 );
    579  1.1  ross     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    580  1.1  ross     return z;
    581  1.1  ross 
    582  1.1  ross }
    583  1.1  ross 
    584  1.1  ross #ifndef SOFTFLOAT_FOR_GCC /* Not used */
    585  1.1  ross /*
    586  1.1  ross -------------------------------------------------------------------------------
    587  1.1  ross Returns an approximation to the square root of the 32-bit significand given
    588  1.1  ross by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    589  1.1  ross `aExp' (the least significant bit) is 1, the integer returned approximates
    590  1.1  ross 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    591  1.1  ross is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    592  1.1  ross case, the approximation returned lies strictly within +/-2 of the exact
    593  1.1  ross value.
    594  1.1  ross -------------------------------------------------------------------------------
    595  1.1  ross */
    596  1.1  ross static bits32 estimateSqrt32( int16 aExp, bits32 a )
    597  1.1  ross {
    598  1.1  ross     static const bits16 sqrtOddAdjustments[] = {
    599  1.1  ross         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    600  1.1  ross         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    601  1.1  ross     };
    602  1.1  ross     static const bits16 sqrtEvenAdjustments[] = {
    603  1.1  ross         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    604  1.1  ross         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    605  1.1  ross     };
    606  1.1  ross     int8 index;
    607  1.1  ross     bits32 z;
    608  1.1  ross 
    609  1.1  ross     index = ( a>>27 ) & 15;
    610  1.1  ross     if ( aExp & 1 ) {
    611  1.1  ross         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
    612  1.1  ross         z = ( ( a / z )<<14 ) + ( z<<15 );
    613  1.1  ross         a >>= 1;
    614  1.1  ross     }
    615  1.1  ross     else {
    616  1.1  ross         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
    617  1.1  ross         z = a / z + z;
    618  1.1  ross         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    619  1.1  ross         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    620  1.1  ross     }
    621  1.1  ross     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
    622  1.1  ross 
    623  1.1  ross }
    624  1.1  ross #endif
    625  1.1  ross 
    626  1.1  ross /*
    627  1.1  ross -------------------------------------------------------------------------------
    628  1.1  ross Returns the number of leading 0 bits before the most-significant 1 bit of
    629  1.1  ross `a'.  If `a' is zero, 32 is returned.
    630  1.1  ross -------------------------------------------------------------------------------
    631  1.1  ross */
    632  1.1  ross static int8 countLeadingZeros32( bits32 a )
    633  1.1  ross {
    634  1.1  ross     static const int8 countLeadingZerosHigh[] = {
    635  1.1  ross         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    636  1.1  ross         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    637  1.1  ross         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    638  1.1  ross         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    639  1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    640  1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    641  1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    642  1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    643  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    644  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    645  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    646  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    647  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    648  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    649  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    650  1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    651  1.1  ross     };
    652  1.1  ross     int8 shiftCount;
    653  1.1  ross 
    654  1.1  ross     shiftCount = 0;
    655  1.1  ross     if ( a < 0x10000 ) {
    656  1.1  ross         shiftCount += 16;
    657  1.1  ross         a <<= 16;
    658  1.1  ross     }
    659  1.1  ross     if ( a < 0x1000000 ) {
    660  1.1  ross         shiftCount += 8;
    661  1.1  ross         a <<= 8;
    662  1.1  ross     }
    663  1.1  ross     shiftCount += countLeadingZerosHigh[ a>>24 ];
    664  1.1  ross     return shiftCount;
    665  1.1  ross 
    666  1.1  ross }
    667  1.1  ross 
    668  1.1  ross /*
    669  1.1  ross -------------------------------------------------------------------------------
    670  1.1  ross Returns the number of leading 0 bits before the most-significant 1 bit of
    671  1.1  ross `a'.  If `a' is zero, 64 is returned.
    672  1.1  ross -------------------------------------------------------------------------------
    673  1.1  ross */
    674  1.1  ross static int8 countLeadingZeros64( bits64 a )
    675  1.1  ross {
    676  1.1  ross     int8 shiftCount;
    677  1.1  ross 
    678  1.1  ross     shiftCount = 0;
    679  1.1  ross     if ( a < ( (bits64) 1 )<<32 ) {
    680  1.1  ross         shiftCount += 32;
    681  1.1  ross     }
    682  1.1  ross     else {
    683  1.1  ross         a >>= 32;
    684  1.1  ross     }
    685  1.1  ross     shiftCount += countLeadingZeros32( a );
    686  1.1  ross     return shiftCount;
    687  1.1  ross 
    688  1.1  ross }
    689  1.1  ross 
    690  1.1  ross /*
    691  1.1  ross -------------------------------------------------------------------------------
    692  1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    693  1.1  ross is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    694  1.1  ross Otherwise, returns 0.
    695  1.1  ross -------------------------------------------------------------------------------
    696  1.1  ross */
    697  1.1  ross INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    698  1.1  ross {
    699  1.1  ross 
    700  1.1  ross     return ( a0 == b0 ) && ( a1 == b1 );
    701  1.1  ross 
    702  1.1  ross }
    703  1.1  ross 
    704  1.1  ross /*
    705  1.1  ross -------------------------------------------------------------------------------
    706  1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    707  1.1  ross than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    708  1.1  ross Otherwise, returns 0.
    709  1.1  ross -------------------------------------------------------------------------------
    710  1.1  ross */
    711  1.1  ross INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    712  1.1  ross {
    713  1.1  ross 
    714  1.1  ross     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    715  1.1  ross 
    716  1.1  ross }
    717  1.1  ross 
    718  1.1  ross /*
    719  1.1  ross -------------------------------------------------------------------------------
    720  1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    721  1.1  ross than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    722  1.1  ross returns 0.
    723  1.1  ross -------------------------------------------------------------------------------
    724  1.1  ross */
    725  1.1  ross INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    726  1.1  ross {
    727  1.1  ross 
    728  1.1  ross     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    729  1.1  ross 
    730  1.1  ross }
    731  1.1  ross 
    732  1.1  ross /*
    733  1.1  ross -------------------------------------------------------------------------------
    734  1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    735  1.1  ross not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    736  1.1  ross Otherwise, returns 0.
    737  1.1  ross -------------------------------------------------------------------------------
    738  1.1  ross */
    739  1.1  ross INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    740  1.1  ross {
    741  1.1  ross 
    742  1.1  ross     return ( a0 != b0 ) || ( a1 != b1 );
    743  1.1  ross 
    744  1.1  ross }
    745  1.1  ross 
    746