Home | History | Annotate | Line # | Download | only in bits32
      1  1.1  bjh21 
      2  1.1  bjh21 /*
      3  1.1  bjh21 ===============================================================================
      4  1.1  bjh21 
      5  1.1  bjh21 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      6  1.1  bjh21 Arithmetic Package, Release 2a.
      7  1.1  bjh21 
      8  1.1  bjh21 Written by John R. Hauser.  This work was made possible in part by the
      9  1.1  bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
     10  1.1  bjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
     11  1.1  bjh21 National Science Foundation under grant MIP-9311980.  The original version
     12  1.1  bjh21 of this code was written as part of a project to build a fixed-point vector
     13  1.1  bjh21 processor in collaboration with the University of California at Berkeley,
     14  1.1  bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     15  1.1  bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     16  1.1  bjh21 arithmetic/SoftFloat.html'.
     17  1.1  bjh21 
     18  1.1  bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     19  1.1  bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     20  1.1  bjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     21  1.1  bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     22  1.1  bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     23  1.1  bjh21 
     24  1.1  bjh21 Derivative works are acceptable, even for commercial purposes, so long as
     25  1.1  bjh21 (1) they include prominent notice that the work is derivative, and (2) they
     26  1.1  bjh21 include prominent notice akin to these four paragraphs for those parts of
     27  1.1  bjh21 this code that are retained.
     28  1.1  bjh21 
     29  1.1  bjh21 ===============================================================================
     30  1.1  bjh21 */
     31  1.1  bjh21 
     32  1.1  bjh21 /*
     33  1.1  bjh21 -------------------------------------------------------------------------------
     34  1.1  bjh21 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     35  1.1  bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of
     36  1.1  bjh21 the result by setting the least significant bit to 1.  The value of `count'
     37  1.1  bjh21 can be arbitrarily large; in particular, if `count' is greater than 32, the
     38  1.1  bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     39  1.1  bjh21 The result is stored in the location pointed to by `zPtr'.
     40  1.1  bjh21 -------------------------------------------------------------------------------
     41  1.1  bjh21 */
     42  1.1  bjh21 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     43  1.1  bjh21 {
     44  1.1  bjh21     bits32 z;
     45  1.1  bjh21 
     46  1.1  bjh21     if ( count == 0 ) {
     47  1.1  bjh21         z = a;
     48  1.1  bjh21     }
     49  1.1  bjh21     else if ( count < 32 ) {
     50  1.1  bjh21         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     51  1.1  bjh21     }
     52  1.1  bjh21     else {
     53  1.1  bjh21         z = ( a != 0 );
     54  1.1  bjh21     }
     55  1.1  bjh21     *zPtr = z;
     56  1.1  bjh21 
     57  1.1  bjh21 }
     58  1.1  bjh21 
     59  1.1  bjh21 /*
     60  1.1  bjh21 -------------------------------------------------------------------------------
     61  1.1  bjh21 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
     62  1.1  bjh21 number of bits given in `count'.  Any bits shifted off are lost.  The value
     63  1.1  bjh21 of `count' can be arbitrarily large; in particular, if `count' is greater
     64  1.1  bjh21 than 64, the result will be 0.  The result is broken into two 32-bit pieces
     65  1.1  bjh21 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
     66  1.1  bjh21 -------------------------------------------------------------------------------
     67  1.1  bjh21 */
     68  1.1  bjh21 INLINE void
     69  1.1  bjh21  shift64Right(
     70  1.1  bjh21      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
     71  1.1  bjh21 {
     72  1.1  bjh21     bits32 z0, z1;
     73  1.1  bjh21     int8 negCount = ( - count ) & 31;
     74  1.1  bjh21 
     75  1.1  bjh21     if ( count == 0 ) {
     76  1.1  bjh21         z1 = a1;
     77  1.1  bjh21         z0 = a0;
     78  1.1  bjh21     }
     79  1.1  bjh21     else if ( count < 32 ) {
     80  1.1  bjh21         z1 = ( a0<<negCount ) | ( a1>>count );
     81  1.1  bjh21         z0 = a0>>count;
     82  1.1  bjh21     }
     83  1.1  bjh21     else {
     84  1.1  bjh21         z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
     85  1.1  bjh21         z0 = 0;
     86  1.1  bjh21     }
     87  1.1  bjh21     *z1Ptr = z1;
     88  1.1  bjh21     *z0Ptr = z0;
     89  1.1  bjh21 
     90  1.1  bjh21 }
     91  1.1  bjh21 
     92  1.1  bjh21 /*
     93  1.1  bjh21 -------------------------------------------------------------------------------
     94  1.1  bjh21 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
     95  1.1  bjh21 number of bits given in `count'.  If any nonzero bits are shifted off, they
     96  1.1  bjh21 are ``jammed'' into the least significant bit of the result by setting the
     97  1.1  bjh21 least significant bit to 1.  The value of `count' can be arbitrarily large;
     98  1.1  bjh21 in particular, if `count' is greater than 64, the result will be either 0
     99  1.1  bjh21 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    100  1.1  bjh21 nonzero.  The result is broken into two 32-bit pieces which are stored at
    101  1.1  bjh21 the locations pointed to by `z0Ptr' and `z1Ptr'.
    102  1.1  bjh21 -------------------------------------------------------------------------------
    103  1.1  bjh21 */
    104  1.1  bjh21 INLINE void
    105  1.1  bjh21  shift64RightJamming(
    106  1.1  bjh21      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
    107  1.1  bjh21 {
    108  1.1  bjh21     bits32 z0, z1;
    109  1.1  bjh21     int8 negCount = ( - count ) & 31;
    110  1.1  bjh21 
    111  1.1  bjh21     if ( count == 0 ) {
    112  1.1  bjh21         z1 = a1;
    113  1.1  bjh21         z0 = a0;
    114  1.1  bjh21     }
    115  1.1  bjh21     else if ( count < 32 ) {
    116  1.1  bjh21         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    117  1.1  bjh21         z0 = a0>>count;
    118  1.1  bjh21     }
    119  1.1  bjh21     else {
    120  1.1  bjh21         if ( count == 32 ) {
    121  1.1  bjh21             z1 = a0 | ( a1 != 0 );
    122  1.1  bjh21         }
    123  1.1  bjh21         else if ( count < 64 ) {
    124  1.1  bjh21             z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    125  1.1  bjh21         }
    126  1.1  bjh21         else {
    127  1.1  bjh21             z1 = ( ( a0 | a1 ) != 0 );
    128  1.1  bjh21         }
    129  1.1  bjh21         z0 = 0;
    130  1.1  bjh21     }
    131  1.1  bjh21     *z1Ptr = z1;
    132  1.1  bjh21     *z0Ptr = z0;
    133  1.1  bjh21 
    134  1.1  bjh21 }
    135  1.1  bjh21 
    136  1.1  bjh21 /*
    137  1.1  bjh21 -------------------------------------------------------------------------------
    138  1.1  bjh21 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
    139  1.1  bjh21 by 32 _plus_ the number of bits given in `count'.  The shifted result is
    140  1.1  bjh21 at most 64 nonzero bits; these are broken into two 32-bit pieces which are
    141  1.1  bjh21 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    142  1.1  bjh21 off form a third 32-bit result as follows:  The _last_ bit shifted off is
    143  1.1  bjh21 the most-significant bit of the extra result, and the other 31 bits of the
    144  1.1  bjh21 extra result are all zero if and only if _all_but_the_last_ bits shifted off
    145  1.1  bjh21 were all zero.  This extra result is stored in the location pointed to by
    146  1.1  bjh21 `z2Ptr'.  The value of `count' can be arbitrarily large.
    147  1.1  bjh21     (This routine makes more sense if `a0', `a1', and `a2' are considered
    148  1.1  bjh21 to form a fixed-point value with binary point between `a1' and `a2'.  This
    149  1.1  bjh21 fixed-point value is shifted right by the number of bits given in `count',
    150  1.1  bjh21 and the integer part of the result is returned at the locations pointed to
    151  1.1  bjh21 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    152  1.1  bjh21 corrupted as described above, and is returned at the location pointed to by
    153  1.1  bjh21 `z2Ptr'.)
    154  1.1  bjh21 -------------------------------------------------------------------------------
    155  1.1  bjh21 */
    156  1.1  bjh21 INLINE void
    157  1.1  bjh21  shift64ExtraRightJamming(
    158  1.1  bjh21      bits32 a0,
    159  1.1  bjh21      bits32 a1,
    160  1.1  bjh21      bits32 a2,
    161  1.1  bjh21      int16 count,
    162  1.1  bjh21      bits32 *z0Ptr,
    163  1.1  bjh21      bits32 *z1Ptr,
    164  1.1  bjh21      bits32 *z2Ptr
    165  1.1  bjh21  )
    166  1.1  bjh21 {
    167  1.1  bjh21     bits32 z0, z1, z2;
    168  1.1  bjh21     int8 negCount = ( - count ) & 31;
    169  1.1  bjh21 
    170  1.1  bjh21     if ( count == 0 ) {
    171  1.1  bjh21         z2 = a2;
    172  1.1  bjh21         z1 = a1;
    173  1.1  bjh21         z0 = a0;
    174  1.1  bjh21     }
    175  1.1  bjh21     else {
    176  1.1  bjh21         if ( count < 32 ) {
    177  1.1  bjh21             z2 = a1<<negCount;
    178  1.1  bjh21             z1 = ( a0<<negCount ) | ( a1>>count );
    179  1.1  bjh21             z0 = a0>>count;
    180  1.1  bjh21         }
    181  1.1  bjh21         else {
    182  1.1  bjh21             if ( count == 32 ) {
    183  1.1  bjh21                 z2 = a1;
    184  1.1  bjh21                 z1 = a0;
    185  1.1  bjh21             }
    186  1.1  bjh21             else {
    187  1.1  bjh21                 a2 |= a1;
    188  1.1  bjh21                 if ( count < 64 ) {
    189  1.1  bjh21                     z2 = a0<<negCount;
    190  1.1  bjh21                     z1 = a0>>( count & 31 );
    191  1.1  bjh21                 }
    192  1.1  bjh21                 else {
    193  1.1  bjh21                     z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
    194  1.1  bjh21                     z1 = 0;
    195  1.1  bjh21                 }
    196  1.1  bjh21             }
    197  1.1  bjh21             z0 = 0;
    198  1.1  bjh21         }
    199  1.1  bjh21         z2 |= ( a2 != 0 );
    200  1.1  bjh21     }
    201  1.1  bjh21     *z2Ptr = z2;
    202  1.1  bjh21     *z1Ptr = z1;
    203  1.1  bjh21     *z0Ptr = z0;
    204  1.1  bjh21 
    205  1.1  bjh21 }
    206  1.1  bjh21 
    207  1.1  bjh21 /*
    208  1.1  bjh21 -------------------------------------------------------------------------------
    209  1.1  bjh21 Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
    210  1.1  bjh21 number of bits given in `count'.  Any bits shifted off are lost.  The value
    211  1.1  bjh21 of `count' must be less than 32.  The result is broken into two 32-bit
    212  1.1  bjh21 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    213  1.1  bjh21 -------------------------------------------------------------------------------
    214  1.1  bjh21 */
    215  1.1  bjh21 INLINE void
    216  1.1  bjh21  shortShift64Left(
    217  1.1  bjh21      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
    218  1.1  bjh21 {
    219  1.1  bjh21 
    220  1.1  bjh21     *z1Ptr = a1<<count;
    221  1.1  bjh21     *z0Ptr =
    222  1.1  bjh21         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
    223  1.1  bjh21 
    224  1.1  bjh21 }
    225  1.1  bjh21 
    226  1.1  bjh21 /*
    227  1.1  bjh21 -------------------------------------------------------------------------------
    228  1.1  bjh21 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
    229  1.1  bjh21 by the number of bits given in `count'.  Any bits shifted off are lost.
    230  1.1  bjh21 The value of `count' must be less than 32.  The result is broken into three
    231  1.1  bjh21 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
    232  1.1  bjh21 `z1Ptr', and `z2Ptr'.
    233  1.1  bjh21 -------------------------------------------------------------------------------
    234  1.1  bjh21 */
    235  1.1  bjh21 INLINE void
    236  1.1  bjh21  shortShift96Left(
    237  1.1  bjh21      bits32 a0,
    238  1.1  bjh21      bits32 a1,
    239  1.1  bjh21      bits32 a2,
    240  1.1  bjh21      int16 count,
    241  1.1  bjh21      bits32 *z0Ptr,
    242  1.1  bjh21      bits32 *z1Ptr,
    243  1.1  bjh21      bits32 *z2Ptr
    244  1.1  bjh21  )
    245  1.1  bjh21 {
    246  1.1  bjh21     bits32 z0, z1, z2;
    247  1.1  bjh21     int8 negCount;
    248  1.1  bjh21 
    249  1.1  bjh21     z2 = a2<<count;
    250  1.1  bjh21     z1 = a1<<count;
    251  1.1  bjh21     z0 = a0<<count;
    252  1.1  bjh21     if ( 0 < count ) {
    253  1.1  bjh21         negCount = ( ( - count ) & 31 );
    254  1.1  bjh21         z1 |= a2>>negCount;
    255  1.1  bjh21         z0 |= a1>>negCount;
    256  1.1  bjh21     }
    257  1.1  bjh21     *z2Ptr = z2;
    258  1.1  bjh21     *z1Ptr = z1;
    259  1.1  bjh21     *z0Ptr = z0;
    260  1.1  bjh21 
    261  1.1  bjh21 }
    262  1.1  bjh21 
    263  1.1  bjh21 /*
    264  1.1  bjh21 -------------------------------------------------------------------------------
    265  1.1  bjh21 Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
    266  1.1  bjh21 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
    267  1.1  bjh21 any carry out is lost.  The result is broken into two 32-bit pieces which
    268  1.1  bjh21 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    269  1.1  bjh21 -------------------------------------------------------------------------------
    270  1.1  bjh21 */
    271  1.1  bjh21 INLINE void
    272  1.1  bjh21  add64(
    273  1.1  bjh21      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
    274  1.1  bjh21 {
    275  1.1  bjh21     bits32 z1;
    276  1.1  bjh21 
    277  1.1  bjh21     z1 = a1 + b1;
    278  1.1  bjh21     *z1Ptr = z1;
    279  1.1  bjh21     *z0Ptr = a0 + b0 + ( z1 < a1 );
    280  1.1  bjh21 
    281  1.1  bjh21 }
    282  1.1  bjh21 
    283  1.1  bjh21 /*
    284  1.1  bjh21 -------------------------------------------------------------------------------
    285  1.1  bjh21 Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
    286  1.1  bjh21 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    287  1.1  bjh21 modulo 2^96, so any carry out is lost.  The result is broken into three
    288  1.1  bjh21 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
    289  1.1  bjh21 `z1Ptr', and `z2Ptr'.
    290  1.1  bjh21 -------------------------------------------------------------------------------
    291  1.1  bjh21 */
    292  1.1  bjh21 INLINE void
    293  1.1  bjh21  add96(
    294  1.1  bjh21      bits32 a0,
    295  1.1  bjh21      bits32 a1,
    296  1.1  bjh21      bits32 a2,
    297  1.1  bjh21      bits32 b0,
    298  1.1  bjh21      bits32 b1,
    299  1.1  bjh21      bits32 b2,
    300  1.1  bjh21      bits32 *z0Ptr,
    301  1.1  bjh21      bits32 *z1Ptr,
    302  1.1  bjh21      bits32 *z2Ptr
    303  1.1  bjh21  )
    304  1.1  bjh21 {
    305  1.1  bjh21     bits32 z0, z1, z2;
    306  1.1  bjh21     int8 carry0, carry1;
    307  1.1  bjh21 
    308  1.1  bjh21     z2 = a2 + b2;
    309  1.1  bjh21     carry1 = ( z2 < a2 );
    310  1.1  bjh21     z1 = a1 + b1;
    311  1.1  bjh21     carry0 = ( z1 < a1 );
    312  1.1  bjh21     z0 = a0 + b0;
    313  1.1  bjh21     z1 += carry1;
    314  1.2  lukem     z0 += ( z1 < (bits32)carry1 );
    315  1.1  bjh21     z0 += carry0;
    316  1.1  bjh21     *z2Ptr = z2;
    317  1.1  bjh21     *z1Ptr = z1;
    318  1.1  bjh21     *z0Ptr = z0;
    319  1.1  bjh21 
    320  1.1  bjh21 }
    321  1.1  bjh21 
    322  1.1  bjh21 /*
    323  1.1  bjh21 -------------------------------------------------------------------------------
    324  1.1  bjh21 Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
    325  1.1  bjh21 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    326  1.1  bjh21 2^64, so any borrow out (carry out) is lost.  The result is broken into two
    327  1.1  bjh21 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    328  1.1  bjh21 `z1Ptr'.
    329  1.1  bjh21 -------------------------------------------------------------------------------
    330  1.1  bjh21 */
    331  1.1  bjh21 INLINE void
    332  1.1  bjh21  sub64(
    333  1.1  bjh21      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
    334  1.1  bjh21 {
    335  1.1  bjh21 
    336  1.1  bjh21     *z1Ptr = a1 - b1;
    337  1.1  bjh21     *z0Ptr = a0 - b0 - ( a1 < b1 );
    338  1.1  bjh21 
    339  1.1  bjh21 }
    340  1.1  bjh21 
    341  1.1  bjh21 /*
    342  1.1  bjh21 -------------------------------------------------------------------------------
    343  1.1  bjh21 Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
    344  1.1  bjh21 the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
    345  1.1  bjh21 is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
    346  1.1  bjh21 into three 32-bit pieces which are stored at the locations pointed to by
    347  1.1  bjh21 `z0Ptr', `z1Ptr', and `z2Ptr'.
    348  1.1  bjh21 -------------------------------------------------------------------------------
    349  1.1  bjh21 */
    350  1.1  bjh21 INLINE void
    351  1.1  bjh21  sub96(
    352  1.1  bjh21      bits32 a0,
    353  1.1  bjh21      bits32 a1,
    354  1.1  bjh21      bits32 a2,
    355  1.1  bjh21      bits32 b0,
    356  1.1  bjh21      bits32 b1,
    357  1.1  bjh21      bits32 b2,
    358  1.1  bjh21      bits32 *z0Ptr,
    359  1.1  bjh21      bits32 *z1Ptr,
    360  1.1  bjh21      bits32 *z2Ptr
    361  1.1  bjh21  )
    362  1.1  bjh21 {
    363  1.1  bjh21     bits32 z0, z1, z2;
    364  1.1  bjh21     int8 borrow0, borrow1;
    365  1.1  bjh21 
    366  1.1  bjh21     z2 = a2 - b2;
    367  1.1  bjh21     borrow1 = ( a2 < b2 );
    368  1.1  bjh21     z1 = a1 - b1;
    369  1.1  bjh21     borrow0 = ( a1 < b1 );
    370  1.1  bjh21     z0 = a0 - b0;
    371  1.2  lukem     z0 -= ( z1 < (bits32)borrow1 );
    372  1.1  bjh21     z1 -= borrow1;
    373  1.1  bjh21     z0 -= borrow0;
    374  1.1  bjh21     *z2Ptr = z2;
    375  1.1  bjh21     *z1Ptr = z1;
    376  1.1  bjh21     *z0Ptr = z0;
    377  1.1  bjh21 
    378  1.1  bjh21 }
    379  1.1  bjh21 
    380  1.1  bjh21 /*
    381  1.1  bjh21 -------------------------------------------------------------------------------
    382  1.1  bjh21 Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
    383  1.1  bjh21 into two 32-bit pieces which are stored at the locations pointed to by
    384  1.1  bjh21 `z0Ptr' and `z1Ptr'.
    385  1.1  bjh21 -------------------------------------------------------------------------------
    386  1.1  bjh21 */
    387  1.1  bjh21 INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
    388  1.1  bjh21 {
    389  1.1  bjh21     bits16 aHigh, aLow, bHigh, bLow;
    390  1.1  bjh21     bits32 z0, zMiddleA, zMiddleB, z1;
    391  1.1  bjh21 
    392  1.1  bjh21     aLow = a;
    393  1.1  bjh21     aHigh = a>>16;
    394  1.1  bjh21     bLow = b;
    395  1.1  bjh21     bHigh = b>>16;
    396  1.1  bjh21     z1 = ( (bits32) aLow ) * bLow;
    397  1.1  bjh21     zMiddleA = ( (bits32) aLow ) * bHigh;
    398  1.1  bjh21     zMiddleB = ( (bits32) aHigh ) * bLow;
    399  1.1  bjh21     z0 = ( (bits32) aHigh ) * bHigh;
    400  1.1  bjh21     zMiddleA += zMiddleB;
    401  1.1  bjh21     z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
    402  1.1  bjh21     zMiddleA <<= 16;
    403  1.1  bjh21     z1 += zMiddleA;
    404  1.1  bjh21     z0 += ( z1 < zMiddleA );
    405  1.1  bjh21     *z1Ptr = z1;
    406  1.1  bjh21     *z0Ptr = z0;
    407  1.1  bjh21 
    408  1.1  bjh21 }
    409  1.1  bjh21 
    410  1.1  bjh21 /*
    411  1.1  bjh21 -------------------------------------------------------------------------------
    412  1.1  bjh21 Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
    413  1.1  bjh21 to obtain a 96-bit product.  The product is broken into three 32-bit pieces
    414  1.1  bjh21 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    415  1.1  bjh21 `z2Ptr'.
    416  1.1  bjh21 -------------------------------------------------------------------------------
    417  1.1  bjh21 */
    418  1.1  bjh21 INLINE void
    419  1.1  bjh21  mul64By32To96(
    420  1.1  bjh21      bits32 a0,
    421  1.1  bjh21      bits32 a1,
    422  1.1  bjh21      bits32 b,
    423  1.1  bjh21      bits32 *z0Ptr,
    424  1.1  bjh21      bits32 *z1Ptr,
    425  1.1  bjh21      bits32 *z2Ptr
    426  1.1  bjh21  )
    427  1.1  bjh21 {
    428  1.1  bjh21     bits32 z0, z1, z2, more1;
    429  1.1  bjh21 
    430  1.1  bjh21     mul32To64( a1, b, &z1, &z2 );
    431  1.1  bjh21     mul32To64( a0, b, &z0, &more1 );
    432  1.1  bjh21     add64( z0, more1, 0, z1, &z0, &z1 );
    433  1.1  bjh21     *z2Ptr = z2;
    434  1.1  bjh21     *z1Ptr = z1;
    435  1.1  bjh21     *z0Ptr = z0;
    436  1.1  bjh21 
    437  1.1  bjh21 }
    438  1.1  bjh21 
    439  1.1  bjh21 /*
    440  1.1  bjh21 -------------------------------------------------------------------------------
    441  1.1  bjh21 Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
    442  1.1  bjh21 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
    443  1.1  bjh21 product.  The product is broken into four 32-bit pieces which are stored at
    444  1.1  bjh21 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    445  1.1  bjh21 -------------------------------------------------------------------------------
    446  1.1  bjh21 */
    447  1.1  bjh21 INLINE void
    448  1.1  bjh21  mul64To128(
    449  1.1  bjh21      bits32 a0,
    450  1.1  bjh21      bits32 a1,
    451  1.1  bjh21      bits32 b0,
    452  1.1  bjh21      bits32 b1,
    453  1.1  bjh21      bits32 *z0Ptr,
    454  1.1  bjh21      bits32 *z1Ptr,
    455  1.1  bjh21      bits32 *z2Ptr,
    456  1.1  bjh21      bits32 *z3Ptr
    457  1.1  bjh21  )
    458  1.1  bjh21 {
    459  1.1  bjh21     bits32 z0, z1, z2, z3;
    460  1.1  bjh21     bits32 more1, more2;
    461  1.1  bjh21 
    462  1.1  bjh21     mul32To64( a1, b1, &z2, &z3 );
    463  1.1  bjh21     mul32To64( a1, b0, &z1, &more2 );
    464  1.1  bjh21     add64( z1, more2, 0, z2, &z1, &z2 );
    465  1.1  bjh21     mul32To64( a0, b0, &z0, &more1 );
    466  1.1  bjh21     add64( z0, more1, 0, z1, &z0, &z1 );
    467  1.1  bjh21     mul32To64( a0, b1, &more1, &more2 );
    468  1.1  bjh21     add64( more1, more2, 0, z2, &more1, &z2 );
    469  1.1  bjh21     add64( z0, z1, 0, more1, &z0, &z1 );
    470  1.1  bjh21     *z3Ptr = z3;
    471  1.1  bjh21     *z2Ptr = z2;
    472  1.1  bjh21     *z1Ptr = z1;
    473  1.1  bjh21     *z0Ptr = z0;
    474  1.1  bjh21 
    475  1.1  bjh21 }
    476  1.1  bjh21 
    477  1.1  bjh21 /*
    478  1.1  bjh21 -------------------------------------------------------------------------------
    479  1.1  bjh21 Returns an approximation to the 32-bit integer quotient obtained by dividing
    480  1.1  bjh21 `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
    481  1.1  bjh21 divisor `b' must be at least 2^31.  If q is the exact quotient truncated
    482  1.1  bjh21 toward zero, the approximation returned lies between q and q + 2 inclusive.
    483  1.1  bjh21 If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
    484  1.1  bjh21 unsigned integer is returned.
    485  1.1  bjh21 -------------------------------------------------------------------------------
    486  1.1  bjh21 */
    487  1.1  bjh21 static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
    488  1.1  bjh21 {
    489  1.1  bjh21     bits32 b0, b1;
    490  1.1  bjh21     bits32 rem0, rem1, term0, term1;
    491  1.1  bjh21     bits32 z;
    492  1.1  bjh21 
    493  1.1  bjh21     if ( b <= a0 ) return 0xFFFFFFFF;
    494  1.1  bjh21     b0 = b>>16;
    495  1.1  bjh21     z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
    496  1.1  bjh21     mul32To64( b, z, &term0, &term1 );
    497  1.1  bjh21     sub64( a0, a1, term0, term1, &rem0, &rem1 );
    498  1.1  bjh21     while ( ( (sbits32) rem0 ) < 0 ) {
    499  1.1  bjh21         z -= 0x10000;
    500  1.1  bjh21         b1 = b<<16;
    501  1.1  bjh21         add64( rem0, rem1, b0, b1, &rem0, &rem1 );
    502  1.1  bjh21     }
    503  1.1  bjh21     rem0 = ( rem0<<16 ) | ( rem1>>16 );
    504  1.1  bjh21     z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
    505  1.1  bjh21     return z;
    506  1.1  bjh21 
    507  1.1  bjh21 }
    508  1.1  bjh21 
    509  1.1  bjh21 #ifndef SOFTFLOAT_FOR_GCC
    510  1.1  bjh21 /*
    511  1.1  bjh21 -------------------------------------------------------------------------------
    512  1.1  bjh21 Returns an approximation to the square root of the 32-bit significand given
    513  1.1  bjh21 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    514  1.1  bjh21 `aExp' (the least significant bit) is 1, the integer returned approximates
    515  1.1  bjh21 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    516  1.1  bjh21 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    517  1.1  bjh21 case, the approximation returned lies strictly within +/-2 of the exact
    518  1.1  bjh21 value.
    519  1.1  bjh21 -------------------------------------------------------------------------------
    520  1.1  bjh21 */
    521  1.1  bjh21 static bits32 estimateSqrt32( int16 aExp, bits32 a )
    522  1.1  bjh21 {
    523  1.1  bjh21     static const bits16 sqrtOddAdjustments[] = {
    524  1.1  bjh21         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    525  1.1  bjh21         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    526  1.1  bjh21     };
    527  1.1  bjh21     static const bits16 sqrtEvenAdjustments[] = {
    528  1.1  bjh21         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    529  1.1  bjh21         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    530  1.1  bjh21     };
    531  1.1  bjh21     int8 index;
    532  1.1  bjh21     bits32 z;
    533  1.1  bjh21 
    534  1.1  bjh21     index = ( a>>27 ) & 15;
    535  1.1  bjh21     if ( aExp & 1 ) {
    536  1.1  bjh21         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
    537  1.1  bjh21         z = ( ( a / z )<<14 ) + ( z<<15 );
    538  1.1  bjh21         a >>= 1;
    539  1.1  bjh21     }
    540  1.1  bjh21     else {
    541  1.1  bjh21         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
    542  1.1  bjh21         z = a / z + z;
    543  1.1  bjh21         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    544  1.1  bjh21         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    545  1.1  bjh21     }
    546  1.1  bjh21     return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
    547  1.1  bjh21 
    548  1.1  bjh21 }
    549  1.1  bjh21 #endif
    550  1.1  bjh21 
    551  1.1  bjh21 /*
    552  1.1  bjh21 -------------------------------------------------------------------------------
    553  1.1  bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of
    554  1.1  bjh21 `a'.  If `a' is zero, 32 is returned.
    555  1.1  bjh21 -------------------------------------------------------------------------------
    556  1.1  bjh21 */
    557  1.1  bjh21 static int8 countLeadingZeros32( bits32 a )
    558  1.1  bjh21 {
    559  1.1  bjh21     static const int8 countLeadingZerosHigh[] = {
    560  1.1  bjh21         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    561  1.1  bjh21         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    562  1.1  bjh21         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    563  1.1  bjh21         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    564  1.1  bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    565  1.1  bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    566  1.1  bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    567  1.1  bjh21         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    568  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    569  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    570  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    571  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    572  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    573  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    574  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    575  1.1  bjh21         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    576  1.1  bjh21     };
    577  1.1  bjh21     int8 shiftCount;
    578  1.1  bjh21 
    579  1.1  bjh21     shiftCount = 0;
    580  1.1  bjh21     if ( a < 0x10000 ) {
    581  1.1  bjh21         shiftCount += 16;
    582  1.1  bjh21         a <<= 16;
    583  1.1  bjh21     }
    584  1.1  bjh21     if ( a < 0x1000000 ) {
    585  1.1  bjh21         shiftCount += 8;
    586  1.1  bjh21         a <<= 8;
    587  1.1  bjh21     }
    588  1.1  bjh21     shiftCount += countLeadingZerosHigh[ a>>24 ];
    589  1.1  bjh21     return shiftCount;
    590  1.1  bjh21 
    591  1.1  bjh21 }
    592  1.1  bjh21 
    593  1.1  bjh21 /*
    594  1.1  bjh21 -------------------------------------------------------------------------------
    595  1.1  bjh21 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
    596  1.1  bjh21 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    597  1.1  bjh21 returns 0.
    598  1.1  bjh21 -------------------------------------------------------------------------------
    599  1.1  bjh21 */
    600  1.1  bjh21 INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    601  1.1  bjh21 {
    602  1.1  bjh21 
    603  1.1  bjh21     return ( a0 == b0 ) && ( a1 == b1 );
    604  1.1  bjh21 
    605  1.1  bjh21 }
    606  1.1  bjh21 
    607  1.1  bjh21 /*
    608  1.1  bjh21 -------------------------------------------------------------------------------
    609  1.1  bjh21 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
    610  1.1  bjh21 than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
    611  1.1  bjh21 Otherwise, returns 0.
    612  1.1  bjh21 -------------------------------------------------------------------------------
    613  1.1  bjh21 */
    614  1.1  bjh21 INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    615  1.1  bjh21 {
    616  1.1  bjh21 
    617  1.1  bjh21     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    618  1.1  bjh21 
    619  1.1  bjh21 }
    620  1.1  bjh21 
    621  1.1  bjh21 /*
    622  1.1  bjh21 -------------------------------------------------------------------------------
    623  1.1  bjh21 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
    624  1.1  bjh21 than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    625  1.1  bjh21 returns 0.
    626  1.1  bjh21 -------------------------------------------------------------------------------
    627  1.1  bjh21 */
    628  1.1  bjh21 INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    629  1.1  bjh21 {
    630  1.1  bjh21 
    631  1.1  bjh21     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    632  1.1  bjh21 
    633  1.1  bjh21 }
    634  1.1  bjh21 
    635  1.1  bjh21 /*
    636  1.1  bjh21 -------------------------------------------------------------------------------
    637  1.1  bjh21 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
    638  1.1  bjh21 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    639  1.1  bjh21 returns 0.
    640  1.1  bjh21 -------------------------------------------------------------------------------
    641  1.1  bjh21 */
    642  1.1  bjh21 INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    643  1.1  bjh21 {
    644  1.1  bjh21 
    645  1.1  bjh21     return ( a0 != b0 ) || ( a1 != b1 );
    646  1.1  bjh21 
    647  1.1  bjh21 }
    648  1.1  bjh21 
    649