Home | History | Annotate | Line # | Download | only in bits64
softfloat-macros revision 1.1.4.2
      1     /* $NetBSD: softfloat-macros,v 1.1.4.2 2002/05/21 23:51:09 bjh21 Exp $ */
      2     
      3     /*
      4     ===============================================================================
      5     
      6     This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      7     Arithmetic Package, Release 2a.
      8     
      9     Written by John R. Hauser.  This work was made possible in part by the
     10     International Computer Science Institute, located at Suite 600, 1947 Center
     11     Street, Berkeley, California 94704.  Funding was partially provided by the
     12     National Science Foundation under grant MIP-9311980.  The original version
     13     of this code was written as part of a project to build a fixed-point vector
     14     processor in collaboration with the University of California at Berkeley,
     15     overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     16     is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     17     arithmetic/SoftFloat.html'.
     18     
     19     THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     20     has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     21     TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     22     PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     23     AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     24     
     25     Derivative works are acceptable, even for commercial purposes, so long as
     26     (1) they include prominent notice that the work is derivative, and (2) they
     27     include prominent notice akin to these four paragraphs for those parts of
     28     this code that are retained.
     29     
     30     ===============================================================================
     31     */
     32     
     33     /*
     34     -------------------------------------------------------------------------------
     35     Shifts `a' right by the number of bits given in `count'.  If any nonzero
     36     bits are shifted off, they are ``jammed'' into the least significant bit of
     37     the result by setting the least significant bit to 1.  The value of `count'
     38     can be arbitrarily large; in particular, if `count' is greater than 32, the
     39     result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     40     The result is stored in the location pointed to by `zPtr'.
     41     -------------------------------------------------------------------------------
     42     */
     43     INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     44     {
     45         bits32 z;
     46     
     47         if ( count == 0 ) {
     48             z = a;
     49         }
     50         else if ( count < 32 ) {
     51             z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     52         }
     53         else {
     54             z = ( a != 0 );
     55         }
     56         *zPtr = z;
     57     
     58     }
     59     
     60     /*
     61     -------------------------------------------------------------------------------
     62     Shifts `a' right by the number of bits given in `count'.  If any nonzero
     63     bits are shifted off, they are ``jammed'' into the least significant bit of
     64     the result by setting the least significant bit to 1.  The value of `count'
     65     can be arbitrarily large; in particular, if `count' is greater than 64, the
     66     result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     67     The result is stored in the location pointed to by `zPtr'.
     68     -------------------------------------------------------------------------------
     69     */
     70     INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     71     {
     72         bits64 z;
     73     
     74         if ( count == 0 ) {
     75             z = a;
     76         }
     77         else if ( count < 64 ) {
     78             z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     79         }
     80         else {
     81             z = ( a != 0 );
     82         }
     83         *zPtr = z;
     84     
     85     }
     86     
     87     /*
     88     -------------------------------------------------------------------------------
     89     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     90     _plus_ the number of bits given in `count'.  The shifted result is at most
     91     64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     92     bits shifted off form a second 64-bit result as follows:  The _last_ bit
     93     shifted off is the most-significant bit of the extra result, and the other
     94     63 bits of the extra result are all zero if and only if _all_but_the_last_
     95     bits shifted off were all zero.  This extra result is stored in the location
     96     pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     97         (This routine makes more sense if `a0' and `a1' are considered to form a
     98     fixed-point value with binary point between `a0' and `a1'.  This fixed-point
     99     value is shifted right by the number of bits given in `count', and the
    100     integer part of the result is returned at the location pointed to by
    101     `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    102     described above, and is returned at the location pointed to by `z1Ptr'.)
    103     -------------------------------------------------------------------------------
    104     */
    105     INLINE void
    106      shift64ExtraRightJamming(
    107          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    108     {
    109         bits64 z0, z1;
    110         int8 negCount = ( - count ) & 63;
    111     
    112         if ( count == 0 ) {
    113             z1 = a1;
    114             z0 = a0;
    115         }
    116         else if ( count < 64 ) {
    117             z1 = ( a0<<negCount ) | ( a1 != 0 );
    118             z0 = a0>>count;
    119         }
    120         else {
    121             if ( count == 64 ) {
    122                 z1 = a0 | ( a1 != 0 );
    123             }
    124             else {
    125                 z1 = ( ( a0 | a1 ) != 0 );
    126             }
    127             z0 = 0;
    128         }
    129         *z1Ptr = z1;
    130         *z0Ptr = z0;
    131     
    132     }
    133     
    134     /*
    135     -------------------------------------------------------------------------------
    136     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    137     number of bits given in `count'.  Any bits shifted off are lost.  The value
    138     of `count' can be arbitrarily large; in particular, if `count' is greater
    139     than 128, the result will be 0.  The result is broken into two 64-bit pieces
    140     which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    141     -------------------------------------------------------------------------------
    142     */
    143     INLINE void
    144      shift128Right(
    145          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    146     {
    147         bits64 z0, z1;
    148         int8 negCount = ( - count ) & 63;
    149     
    150         if ( count == 0 ) {
    151             z1 = a1;
    152             z0 = a0;
    153         }
    154         else if ( count < 64 ) {
    155             z1 = ( a0<<negCount ) | ( a1>>count );
    156             z0 = a0>>count;
    157         }
    158         else {
    159             z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
    160             z0 = 0;
    161         }
    162         *z1Ptr = z1;
    163         *z0Ptr = z0;
    164     
    165     }
    166     
    167     /*
    168     -------------------------------------------------------------------------------
    169     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    170     number of bits given in `count'.  If any nonzero bits are shifted off, they
    171     are ``jammed'' into the least significant bit of the result by setting the
    172     least significant bit to 1.  The value of `count' can be arbitrarily large;
    173     in particular, if `count' is greater than 128, the result will be either
    174     0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    175     nonzero.  The result is broken into two 64-bit pieces which are stored at
    176     the locations pointed to by `z0Ptr' and `z1Ptr'.
    177     -------------------------------------------------------------------------------
    178     */
    179     INLINE void
    180      shift128RightJamming(
    181          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    182     {
    183         bits64 z0, z1;
    184         int8 negCount = ( - count ) & 63;
    185     
    186         if ( count == 0 ) {
    187             z1 = a1;
    188             z0 = a0;
    189         }
    190         else if ( count < 64 ) {
    191             z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    192             z0 = a0>>count;
    193         }
    194         else {
    195             if ( count == 64 ) {
    196                 z1 = a0 | ( a1 != 0 );
    197             }
    198             else if ( count < 128 ) {
    199                 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    200             }
    201             else {
    202                 z1 = ( ( a0 | a1 ) != 0 );
    203             }
    204             z0 = 0;
    205         }
    206         *z1Ptr = z1;
    207         *z0Ptr = z0;
    208     
    209     }
    210     
    211     /*
    212     -------------------------------------------------------------------------------
    213     Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    214     by 64 _plus_ the number of bits given in `count'.  The shifted result is
    215     at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    216     stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    217     off form a third 64-bit result as follows:  The _last_ bit shifted off is
    218     the most-significant bit of the extra result, and the other 63 bits of the
    219     extra result are all zero if and only if _all_but_the_last_ bits shifted off
    220     were all zero.  This extra result is stored in the location pointed to by
    221     `z2Ptr'.  The value of `count' can be arbitrarily large.
    222         (This routine makes more sense if `a0', `a1', and `a2' are considered
    223     to form a fixed-point value with binary point between `a1' and `a2'.  This
    224     fixed-point value is shifted right by the number of bits given in `count',
    225     and the integer part of the result is returned at the locations pointed to
    226     by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    227     corrupted as described above, and is returned at the location pointed to by
    228     `z2Ptr'.)
    229     -------------------------------------------------------------------------------
    230     */
    231     INLINE void
    232      shift128ExtraRightJamming(
    233          bits64 a0,
    234          bits64 a1,
    235          bits64 a2,
    236          int16 count,
    237          bits64 *z0Ptr,
    238          bits64 *z1Ptr,
    239          bits64 *z2Ptr
    240      )
    241     {
    242         bits64 z0, z1, z2;
    243         int8 negCount = ( - count ) & 63;
    244     
    245         if ( count == 0 ) {
    246             z2 = a2;
    247             z1 = a1;
    248             z0 = a0;
    249         }
    250         else {
    251             if ( count < 64 ) {
    252                 z2 = a1<<negCount;
    253                 z1 = ( a0<<negCount ) | ( a1>>count );
    254                 z0 = a0>>count;
    255             }
    256             else {
    257                 if ( count == 64 ) {
    258                     z2 = a1;
    259                     z1 = a0;
    260                 }
    261                 else {
    262                     a2 |= a1;
    263                     if ( count < 128 ) {
    264                         z2 = a0<<negCount;
    265                         z1 = a0>>( count & 63 );
    266                     }
    267                     else {
    268                         z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    269                         z1 = 0;
    270                     }
    271                 }
    272                 z0 = 0;
    273             }
    274             z2 |= ( a2 != 0 );
    275         }
    276         *z2Ptr = z2;
    277         *z1Ptr = z1;
    278         *z0Ptr = z0;
    279     
    280     }
    281     
    282     /*
    283     -------------------------------------------------------------------------------
    284     Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    285     number of bits given in `count'.  Any bits shifted off are lost.  The value
    286     of `count' must be less than 64.  The result is broken into two 64-bit
    287     pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    288     -------------------------------------------------------------------------------
    289     */
    290     INLINE void
    291      shortShift128Left(
    292          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    293     {
    294     
    295         *z1Ptr = a1<<count;
    296         *z0Ptr =
    297             ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    298     
    299     }
    300     
    301     /*
    302     -------------------------------------------------------------------------------
    303     Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    304     by the number of bits given in `count'.  Any bits shifted off are lost.
    305     The value of `count' must be less than 64.  The result is broken into three
    306     64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    307     `z1Ptr', and `z2Ptr'.
    308     -------------------------------------------------------------------------------
    309     */
    310     INLINE void
    311      shortShift192Left(
    312          bits64 a0,
    313          bits64 a1,
    314          bits64 a2,
    315          int16 count,
    316          bits64 *z0Ptr,
    317          bits64 *z1Ptr,
    318          bits64 *z2Ptr
    319      )
    320     {
    321         bits64 z0, z1, z2;
    322         int8 negCount;
    323     
    324         z2 = a2<<count;
    325         z1 = a1<<count;
    326         z0 = a0<<count;
    327         if ( 0 < count ) {
    328             negCount = ( ( - count ) & 63 );
    329             z1 |= a2>>negCount;
    330             z0 |= a1>>negCount;
    331         }
    332         *z2Ptr = z2;
    333         *z1Ptr = z1;
    334         *z0Ptr = z0;
    335     
    336     }
    337     
    338     /*
    339     -------------------------------------------------------------------------------
    340     Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    341     value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    342     any carry out is lost.  The result is broken into two 64-bit pieces which
    343     are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    344     -------------------------------------------------------------------------------
    345     */
    346     INLINE void
    347      add128(
    348          bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    349     {
    350         bits64 z1;
    351     
    352         z1 = a1 + b1;
    353         *z1Ptr = z1;
    354         *z0Ptr = a0 + b0 + ( z1 < a1 );
    355     
    356     }
    357     
    358     /*
    359     -------------------------------------------------------------------------------
    360     Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    361     192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    362     modulo 2^192, so any carry out is lost.  The result is broken into three
    363     64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    364     `z1Ptr', and `z2Ptr'.
    365     -------------------------------------------------------------------------------
    366     */
    367     INLINE void
    368      add192(
    369          bits64 a0,
    370          bits64 a1,
    371          bits64 a2,
    372          bits64 b0,
    373          bits64 b1,
    374          bits64 b2,
    375          bits64 *z0Ptr,
    376          bits64 *z1Ptr,
    377          bits64 *z2Ptr
    378      )
    379     {
    380         bits64 z0, z1, z2;
    381         int8 carry0, carry1;
    382     
    383         z2 = a2 + b2;
    384         carry1 = ( z2 < a2 );
    385         z1 = a1 + b1;
    386         carry0 = ( z1 < a1 );
    387         z0 = a0 + b0;
    388         z1 += carry1;
    389         z0 += ( z1 < carry1 );
    390         z0 += carry0;
    391         *z2Ptr = z2;
    392         *z1Ptr = z1;
    393         *z0Ptr = z0;
    394     
    395     }
    396     
    397     /*
    398     -------------------------------------------------------------------------------
    399     Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    400     128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    401     2^128, so any borrow out (carry out) is lost.  The result is broken into two
    402     64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    403     `z1Ptr'.
    404     -------------------------------------------------------------------------------
    405     */
    406     INLINE void
    407      sub128(
    408          bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    409     {
    410     
    411         *z1Ptr = a1 - b1;
    412         *z0Ptr = a0 - b0 - ( a1 < b1 );
    413     
    414     }
    415     
    416     /*
    417     -------------------------------------------------------------------------------
    418     Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    419     from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    420     Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    421     result is broken into three 64-bit pieces which are stored at the locations
    422     pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    423     -------------------------------------------------------------------------------
    424     */
    425     INLINE void
    426      sub192(
    427          bits64 a0,
    428          bits64 a1,
    429          bits64 a2,
    430          bits64 b0,
    431          bits64 b1,
    432          bits64 b2,
    433          bits64 *z0Ptr,
    434          bits64 *z1Ptr,
    435          bits64 *z2Ptr
    436      )
    437     {
    438         bits64 z0, z1, z2;
    439         int8 borrow0, borrow1;
    440     
    441         z2 = a2 - b2;
    442         borrow1 = ( a2 < b2 );
    443         z1 = a1 - b1;
    444         borrow0 = ( a1 < b1 );
    445         z0 = a0 - b0;
    446         z0 -= ( z1 < borrow1 );
    447         z1 -= borrow1;
    448         z0 -= borrow0;
    449         *z2Ptr = z2;
    450         *z1Ptr = z1;
    451         *z0Ptr = z0;
    452     
    453     }
    454     
    455     /*
    456     -------------------------------------------------------------------------------
    457     Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    458     into two 64-bit pieces which are stored at the locations pointed to by
    459     `z0Ptr' and `z1Ptr'.
    460     -------------------------------------------------------------------------------
    461     */
    462     INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    463     {
    464         bits32 aHigh, aLow, bHigh, bLow;
    465         bits64 z0, zMiddleA, zMiddleB, z1;
    466     
    467         aLow = a;
    468         aHigh = a>>32;
    469         bLow = b;
    470         bHigh = b>>32;
    471         z1 = ( (bits64) aLow ) * bLow;
    472         zMiddleA = ( (bits64) aLow ) * bHigh;
    473         zMiddleB = ( (bits64) aHigh ) * bLow;
    474         z0 = ( (bits64) aHigh ) * bHigh;
    475         zMiddleA += zMiddleB;
    476         z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    477         zMiddleA <<= 32;
    478         z1 += zMiddleA;
    479         z0 += ( z1 < zMiddleA );
    480         *z1Ptr = z1;
    481         *z0Ptr = z0;
    482     
    483     }
    484     
    485     /*
    486     -------------------------------------------------------------------------------
    487     Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    488     `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    489     pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    490     `z2Ptr'.
    491     -------------------------------------------------------------------------------
    492     */
    493     INLINE void
    494      mul128By64To192(
    495          bits64 a0,
    496          bits64 a1,
    497          bits64 b,
    498          bits64 *z0Ptr,
    499          bits64 *z1Ptr,
    500          bits64 *z2Ptr
    501      )
    502     {
    503         bits64 z0, z1, z2, more1;
    504     
    505         mul64To128( a1, b, &z1, &z2 );
    506         mul64To128( a0, b, &z0, &more1 );
    507         add128( z0, more1, 0, z1, &z0, &z1 );
    508         *z2Ptr = z2;
    509         *z1Ptr = z1;
    510         *z0Ptr = z0;
    511     
    512     }
    513     
    514     /*
    515     -------------------------------------------------------------------------------
    516     Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    517     128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    518     product.  The product is broken into four 64-bit pieces which are stored at
    519     the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    520     -------------------------------------------------------------------------------
    521     */
    522     INLINE void
    523      mul128To256(
    524          bits64 a0,
    525          bits64 a1,
    526          bits64 b0,
    527          bits64 b1,
    528          bits64 *z0Ptr,
    529          bits64 *z1Ptr,
    530          bits64 *z2Ptr,
    531          bits64 *z3Ptr
    532      )
    533     {
    534         bits64 z0, z1, z2, z3;
    535         bits64 more1, more2;
    536     
    537         mul64To128( a1, b1, &z2, &z3 );
    538         mul64To128( a1, b0, &z1, &more2 );
    539         add128( z1, more2, 0, z2, &z1, &z2 );
    540         mul64To128( a0, b0, &z0, &more1 );
    541         add128( z0, more1, 0, z1, &z0, &z1 );
    542         mul64To128( a0, b1, &more1, &more2 );
    543         add128( more1, more2, 0, z2, &more1, &z2 );
    544         add128( z0, z1, 0, more1, &z0, &z1 );
    545         *z3Ptr = z3;
    546         *z2Ptr = z2;
    547         *z1Ptr = z1;
    548         *z0Ptr = z0;
    549     
    550     }
    551     
    552     /*
    553     -------------------------------------------------------------------------------
    554     Returns an approximation to the 64-bit integer quotient obtained by dividing
    555     `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    556     divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    557     toward zero, the approximation returned lies between q and q + 2 inclusive.
    558     If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    559     unsigned integer is returned.
    560     -------------------------------------------------------------------------------
    561     */
    562     static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    563     {
    564         bits64 b0, b1;
    565         bits64 rem0, rem1, term0, term1;
    566         bits64 z;
    567     
    568         if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    569         b0 = b>>32;
    570         z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    571         mul64To128( b, z, &term0, &term1 );
    572         sub128( a0, a1, term0, term1, &rem0, &rem1 );
    573         while ( ( (sbits64) rem0 ) < 0 ) {
    574             z -= LIT64( 0x100000000 );
    575             b1 = b<<32;
    576             add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    577         }
    578         rem0 = ( rem0<<32 ) | ( rem1>>32 );
    579         z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    580         return z;
    581     
    582     }
    583     
    584     #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    585     /*
    586     -------------------------------------------------------------------------------
    587     Returns an approximation to the square root of the 32-bit significand given
    588     by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    589     `aExp' (the least significant bit) is 1, the integer returned approximates
    590     2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    591     is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    592     case, the approximation returned lies strictly within +/-2 of the exact
    593     value.
    594     -------------------------------------------------------------------------------
    595     */
    596     static bits32 estimateSqrt32( int16 aExp, bits32 a )
    597     {
    598         static const bits16 sqrtOddAdjustments[] = {
    599             0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    600             0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    601         };
    602         static const bits16 sqrtEvenAdjustments[] = {
    603             0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    604             0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    605         };
    606         int8 idx;
    607         bits32 z;
    608     
    609         idx = ( a>>27 ) & 15;
    610         if ( aExp & 1 ) {
    611             z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
    612             z = ( ( a / z )<<14 ) + ( z<<15 );
    613             a >>= 1;
    614         }
    615         else {
    616             z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
    617             z = a / z + z;
    618             z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    619             if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    620         }
    621         return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
    622     
    623     }
    624     #endif
    625     
    626     /*
    627     -------------------------------------------------------------------------------
    628     Returns the number of leading 0 bits before the most-significant 1 bit of
    629     `a'.  If `a' is zero, 32 is returned.
    630     -------------------------------------------------------------------------------
    631     */
    632     static int8 countLeadingZeros32( bits32 a )
    633     {
    634         static const int8 countLeadingZerosHigh[] = {
    635             8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    636             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    637             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    638             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    639             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    640             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    641             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    642             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    643             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    644             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    645             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    646             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    647             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    648             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    649             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    650             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    651         };
    652         int8 shiftCount;
    653     
    654         shiftCount = 0;
    655         if ( a < 0x10000 ) {
    656             shiftCount += 16;
    657             a <<= 16;
    658         }
    659         if ( a < 0x1000000 ) {
    660             shiftCount += 8;
    661             a <<= 8;
    662         }
    663         shiftCount += countLeadingZerosHigh[ a>>24 ];
    664         return shiftCount;
    665     
    666     }
    667     
    668     /*
    669     -------------------------------------------------------------------------------
    670     Returns the number of leading 0 bits before the most-significant 1 bit of
    671     `a'.  If `a' is zero, 64 is returned.
    672     -------------------------------------------------------------------------------
    673     */
    674     static int8 countLeadingZeros64( bits64 a )
    675     {
    676         int8 shiftCount;
    677     
    678         shiftCount = 0;
    679         if ( a < ( (bits64) 1 )<<32 ) {
    680             shiftCount += 32;
    681         }
    682         else {
    683             a >>= 32;
    684         }
    685         shiftCount += countLeadingZeros32( a );
    686         return shiftCount;
    687     
    688     }
    689     
    690     /*
    691     -------------------------------------------------------------------------------
    692     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    693     is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    694     Otherwise, returns 0.
    695     -------------------------------------------------------------------------------
    696     */
    697     INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    698     {
    699     
    700         return ( a0 == b0 ) && ( a1 == b1 );
    701     
    702     }
    703     
    704     /*
    705     -------------------------------------------------------------------------------
    706     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    707     than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    708     Otherwise, returns 0.
    709     -------------------------------------------------------------------------------
    710     */
    711     INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    712     {
    713     
    714         return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    715     
    716     }
    717     
    718     /*
    719     -------------------------------------------------------------------------------
    720     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    721     than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    722     returns 0.
    723     -------------------------------------------------------------------------------
    724     */
    725     INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    726     {
    727     
    728         return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    729     
    730     }
    731     
    732     /*
    733     -------------------------------------------------------------------------------
    734     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    735     not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    736     Otherwise, returns 0.
    737     -------------------------------------------------------------------------------
    738     */
    739     INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    740     {
    741     
    742         return ( a0 != b0 ) || ( a1 != b1 );
    743     
    744     }
    745     
    746