Home | History | Annotate | Line # | Download | only in libkern
      1 /* $NetBSD: softfloat-macros.h,v 1.3 2020/09/01 15:45:20 thorpej Exp $ */
      2 
      3 /*============================================================================
      4 
      5 This C source fragment is part of the Berkeley SoftFloat IEEE Floating-Point
      6 Arithmetic Package, Release 2c, by John R. Hauser.
      7 
      8 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
      9 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
     10 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
     11 AND ORGANIZATIONS WHO CAN AND WILL TOLERATE ALL LOSSES, COSTS, OR OTHER
     12 PROBLEMS THEY INCUR DUE TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR
     13 THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE EFFECTIVELY
     14 INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE
     15 (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR OTHER
     16 PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE, OR
     17 INCURRED BY ANYONE DUE TO A DERIVATIVE WORK THEY CREATE USING ANY PART OF THE
     18 SOFTWARE.
     19 
     20 Derivative works require also that (1) the source code for the derivative work
     21 includes prominent notice that the work is derivative, and (2) the source code
     22 includes prominent notice of these three paragraphs for those parts of this
     23 code that are retained.
     24 
     25 =============================================================================*/
     26 
     27 /*----------------------------------------------------------------------------
     28 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
     29 | bits are shifted off, they are "jammed" into the least significant bit of
     30 | the result by setting the least significant bit to 1.  The value of `count'
     31 | can be arbitrarily large; in particular, if `count' is greater than 32, the
     32 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     33 | The result is stored in the location pointed to by `zPtr'.
     34 *----------------------------------------------------------------------------*/
     35 
     36 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     37 {
     38     bits32 z;
     39 
     40     if ( count == 0 ) {
     41         z = a;
     42     }
     43     else if ( count < 32 ) {
     44         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     45     }
     46     else {
     47         z = ( a != 0 );
     48     }
     49     *zPtr = z;
     50 
     51 }
     52 
     53 /*----------------------------------------------------------------------------
     54 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
     55 | bits are shifted off, they are "jammed" into the least significant bit of
     56 | the result by setting the least significant bit to 1.  The value of `count'
     57 | can be arbitrarily large; in particular, if `count' is greater than 64, the
     58 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     59 | The result is stored in the location pointed to by `zPtr'.
     60 *----------------------------------------------------------------------------*/
     61 
     62 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     63 {
     64     bits64 z;
     65 
     66     if ( count == 0 ) {
     67         z = a;
     68     }
     69     else if ( count < 64 ) {
     70         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     71     }
     72     else {
     73         z = ( a != 0 );
     74     }
     75     *zPtr = z;
     76 
     77 }
     78 
     79 /*----------------------------------------------------------------------------
     80 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     81 | _plus_ the number of bits given in `count'.  The shifted result is at most
     82 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     83 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
     84 | shifted off is the most-significant bit of the extra result, and the other
     85 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
     86 | bits shifted off were all zero.  This extra result is stored in the location
     87 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     88 |     (This routine makes more sense if `a0' and `a1' are considered to form
     89 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
     90 | point value is shifted right by the number of bits given in `count', and
     91 | the integer part of the result is returned at the location pointed to by
     92 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
     93 | described above, and is returned at the location pointed to by `z1Ptr'.)
     94 *----------------------------------------------------------------------------*/
     95 
     96 INLINE void
     97  shift64ExtraRightJamming(
     98      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
     99 {
    100     bits64 z0, z1;
    101     int8 negCount = ( - count ) & 63;
    102 
    103     if ( count == 0 ) {
    104         z1 = a1;
    105         z0 = a0;
    106     }
    107     else if ( count < 64 ) {
    108         z1 = ( a0<<negCount ) | ( a1 != 0 );
    109         z0 = a0>>count;
    110     }
    111     else {
    112         if ( count == 64 ) {
    113             z1 = a0 | ( a1 != 0 );
    114         }
    115         else {
    116             z1 = ( ( a0 | a1 ) != 0 );
    117         }
    118         z0 = 0;
    119     }
    120     *z1Ptr = z1;
    121     *z0Ptr = z0;
    122 
    123 }
    124 
    125 /*----------------------------------------------------------------------------
    126 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    127 | number of bits given in `count'.  Any bits shifted off are lost.  The value
    128 | of `count' can be arbitrarily large; in particular, if `count' is greater
    129 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
    130 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    131 *----------------------------------------------------------------------------*/
    132 
    133 INLINE void
    134  shift128Right(
    135      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    136 {
    137     bits64 z0, z1;
    138     int8 negCount = ( - count ) & 63;
    139 
    140     if ( count == 0 ) {
    141         z1 = a1;
    142         z0 = a0;
    143     }
    144     else if ( count < 64 ) {
    145         z1 = ( a0<<negCount ) | ( a1>>count );
    146         z0 = a0>>count;
    147     }
    148     else {
    149         z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
    150         z0 = 0;
    151     }
    152     *z1Ptr = z1;
    153     *z0Ptr = z0;
    154 
    155 }
    156 
    157 /*----------------------------------------------------------------------------
    158 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    159 | number of bits given in `count'.  If any nonzero bits are shifted off, they
    160 | are "jammed" into the least significant bit of the result by setting the
    161 | least significant bit to 1.  The value of `count' can be arbitrarily large;
    162 | in particular, if `count' is greater than 128, the result will be either
    163 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    164 | nonzero.  The result is broken into two 64-bit pieces which are stored at
    165 | the locations pointed to by `z0Ptr' and `z1Ptr'.
    166 *----------------------------------------------------------------------------*/
    167 
    168 INLINE void
    169  shift128RightJamming(
    170      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    171 {
    172     bits64 z0, z1;
    173     int8 negCount = ( - count ) & 63;
    174 
    175     if ( count == 0 ) {
    176         z1 = a1;
    177         z0 = a0;
    178     }
    179     else if ( count < 64 ) {
    180         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    181         z0 = a0>>count;
    182     }
    183     else {
    184         if ( count == 64 ) {
    185             z1 = a0 | ( a1 != 0 );
    186         }
    187         else if ( count < 128 ) {
    188             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    189         }
    190         else {
    191             z1 = ( ( a0 | a1 ) != 0 );
    192         }
    193         z0 = 0;
    194     }
    195     *z1Ptr = z1;
    196     *z0Ptr = z0;
    197 
    198 }
    199 
    200 /*----------------------------------------------------------------------------
    201 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    202 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
    203 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    204 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    205 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
    206 | the most-significant bit of the extra result, and the other 63 bits of the
    207 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
    208 | were all zero.  This extra result is stored in the location pointed to by
    209 | `z2Ptr'.  The value of `count' can be arbitrarily large.
    210 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
    211 | to form a fixed-point value with binary point between `a1' and `a2'.  This
    212 | fixed-point value is shifted right by the number of bits given in `count',
    213 | and the integer part of the result is returned at the locations pointed to
    214 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    215 | corrupted as described above, and is returned at the location pointed to by
    216 | `z2Ptr'.)
    217 *----------------------------------------------------------------------------*/
    218 
    219 INLINE void
    220  shift128ExtraRightJamming(
    221      bits64 a0,
    222      bits64 a1,
    223      bits64 a2,
    224      int16 count,
    225      bits64 *z0Ptr,
    226      bits64 *z1Ptr,
    227      bits64 *z2Ptr
    228  )
    229 {
    230     bits64 z0, z1, z2;
    231     int8 negCount = ( - count ) & 63;
    232 
    233     if ( count == 0 ) {
    234         z2 = a2;
    235         z1 = a1;
    236         z0 = a0;
    237     }
    238     else {
    239         if ( count < 64 ) {
    240             z2 = a1<<negCount;
    241             z1 = ( a0<<negCount ) | ( a1>>count );
    242             z0 = a0>>count;
    243         }
    244         else {
    245             if ( count == 64 ) {
    246                 z2 = a1;
    247                 z1 = a0;
    248             }
    249             else {
    250                 a2 |= a1;
    251                 if ( count < 128 ) {
    252                     z2 = a0<<negCount;
    253                     z1 = a0>>( count & 63 );
    254                 }
    255                 else {
    256                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    257                     z1 = 0;
    258                 }
    259             }
    260             z0 = 0;
    261         }
    262         z2 |= ( a2 != 0 );
    263     }
    264     *z2Ptr = z2;
    265     *z1Ptr = z1;
    266     *z0Ptr = z0;
    267 
    268 }
    269 
    270 /*----------------------------------------------------------------------------
    271 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    272 | number of bits given in `count'.  Any bits shifted off are lost.  The value
    273 | of `count' must be less than 64.  The result is broken into two 64-bit
    274 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    275 *----------------------------------------------------------------------------*/
    276 
    277 INLINE void
    278  shortShift128Left(
    279      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    280 {
    281 
    282     *z1Ptr = a1<<count;
    283     *z0Ptr =
    284         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    285 
    286 }
    287 
    288 /*----------------------------------------------------------------------------
    289 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    290 | by the number of bits given in `count'.  Any bits shifted off are lost.
    291 | The value of `count' must be less than 64.  The result is broken into three
    292 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    293 | `z1Ptr', and `z2Ptr'.
    294 *----------------------------------------------------------------------------*/
    295 
    296 INLINE void
    297  shortShift192Left(
    298      bits64 a0,
    299      bits64 a1,
    300      bits64 a2,
    301      int16 count,
    302      bits64 *z0Ptr,
    303      bits64 *z1Ptr,
    304      bits64 *z2Ptr
    305  )
    306 {
    307     bits64 z0, z1, z2;
    308     int8 negCount;
    309 
    310     z2 = a2<<count;
    311     z1 = a1<<count;
    312     z0 = a0<<count;
    313     if ( 0 < count ) {
    314         negCount = ( ( - count ) & 63 );
    315         z1 |= a2>>negCount;
    316         z0 |= a1>>negCount;
    317     }
    318     *z2Ptr = z2;
    319     *z1Ptr = z1;
    320     *z0Ptr = z0;
    321 
    322 }
    323 
    324 /*----------------------------------------------------------------------------
    325 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    326 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    327 | any carry out is lost.  The result is broken into two 64-bit pieces which
    328 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    329 *----------------------------------------------------------------------------*/
    330 
    331 INLINE void
    332  add128(
    333      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    334 {
    335     bits64 z1;
    336 
    337     z1 = a1 + b1;
    338     *z1Ptr = z1;
    339     *z0Ptr = a0 + b0 + ( z1 < a1 );
    340 
    341 }
    342 
    343 /*----------------------------------------------------------------------------
    344 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    345 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    346 | modulo 2^192, so any carry out is lost.  The result is broken into three
    347 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    348 | `z1Ptr', and `z2Ptr'.
    349 *----------------------------------------------------------------------------*/
    350 
    351 INLINE void
    352  add192(
    353      bits64 a0,
    354      bits64 a1,
    355      bits64 a2,
    356      bits64 b0,
    357      bits64 b1,
    358      bits64 b2,
    359      bits64 *z0Ptr,
    360      bits64 *z1Ptr,
    361      bits64 *z2Ptr
    362  )
    363 {
    364     bits64 z0, z1, z2;
    365     int8 carry0, carry1;
    366 
    367     z2 = a2 + b2;
    368     carry1 = ( z2 < a2 );
    369     z1 = a1 + b1;
    370     carry0 = ( z1 < a1 );
    371     z0 = a0 + b0;
    372     z1 += carry1;
    373     z0 += ( z1 < carry1 );
    374     z0 += carry0;
    375     *z2Ptr = z2;
    376     *z1Ptr = z1;
    377     *z0Ptr = z0;
    378 
    379 }
    380 
    381 /*----------------------------------------------------------------------------
    382 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    383 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    384 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    385 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    386 | `z1Ptr'.
    387 *----------------------------------------------------------------------------*/
    388 
    389 INLINE void
    390  sub128(
    391      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    392 {
    393 
    394     *z1Ptr = a1 - b1;
    395     *z0Ptr = a0 - b0 - ( a1 < b1 );
    396 
    397 }
    398 
    399 /*----------------------------------------------------------------------------
    400 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    401 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    402 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    403 | result is broken into three 64-bit pieces which are stored at the locations
    404 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    405 *----------------------------------------------------------------------------*/
    406 
    407 INLINE void
    408  sub192(
    409      bits64 a0,
    410      bits64 a1,
    411      bits64 a2,
    412      bits64 b0,
    413      bits64 b1,
    414      bits64 b2,
    415      bits64 *z0Ptr,
    416      bits64 *z1Ptr,
    417      bits64 *z2Ptr
    418  )
    419 {
    420     bits64 z0, z1, z2;
    421     int8 borrow0, borrow1;
    422 
    423     z2 = a2 - b2;
    424     borrow1 = ( a2 < b2 );
    425     z1 = a1 - b1;
    426     borrow0 = ( a1 < b1 );
    427     z0 = a0 - b0;
    428     z0 -= ( z1 < borrow1 );
    429     z1 -= borrow1;
    430     z0 -= borrow0;
    431     *z2Ptr = z2;
    432     *z1Ptr = z1;
    433     *z0Ptr = z0;
    434 
    435 }
    436 
    437 /*----------------------------------------------------------------------------
    438 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    439 | into two 64-bit pieces which are stored at the locations pointed to by
    440 | `z0Ptr' and `z1Ptr'.
    441 *----------------------------------------------------------------------------*/
    442 
    443 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    444 {
    445     bits32 aHigh, aLow, bHigh, bLow;
    446     bits64 z0, zMiddleA, zMiddleB, z1;
    447 
    448     aLow = a;
    449     aHigh = a>>32;
    450     bLow = b;
    451     bHigh = b>>32;
    452     z1 = ( (bits64) aLow ) * bLow;
    453     zMiddleA = ( (bits64) aLow ) * bHigh;
    454     zMiddleB = ( (bits64) aHigh ) * bLow;
    455     z0 = ( (bits64) aHigh ) * bHigh;
    456     zMiddleA += zMiddleB;
    457     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    458     zMiddleA <<= 32;
    459     z1 += zMiddleA;
    460     z0 += ( z1 < zMiddleA );
    461     *z1Ptr = z1;
    462     *z0Ptr = z0;
    463 
    464 }
    465 
    466 /*----------------------------------------------------------------------------
    467 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    468 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    469 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    470 | `z2Ptr'.
    471 *----------------------------------------------------------------------------*/
    472 
    473 INLINE void
    474  mul128By64To192(
    475      bits64 a0,
    476      bits64 a1,
    477      bits64 b,
    478      bits64 *z0Ptr,
    479      bits64 *z1Ptr,
    480      bits64 *z2Ptr
    481  )
    482 {
    483     bits64 z0, z1, z2, more1;
    484 
    485     mul64To128( a1, b, &z1, &z2 );
    486     mul64To128( a0, b, &z0, &more1 );
    487     add128( z0, more1, 0, z1, &z0, &z1 );
    488     *z2Ptr = z2;
    489     *z1Ptr = z1;
    490     *z0Ptr = z0;
    491 
    492 }
    493 
    494 /*----------------------------------------------------------------------------
    495 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    496 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    497 | product.  The product is broken into four 64-bit pieces which are stored at
    498 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    499 *----------------------------------------------------------------------------*/
    500 
    501 INLINE void
    502  mul128To256(
    503      bits64 a0,
    504      bits64 a1,
    505      bits64 b0,
    506      bits64 b1,
    507      bits64 *z0Ptr,
    508      bits64 *z1Ptr,
    509      bits64 *z2Ptr,
    510      bits64 *z3Ptr
    511  )
    512 {
    513     bits64 z0, z1, z2, z3;
    514     bits64 more1, more2;
    515 
    516     mul64To128( a1, b1, &z2, &z3 );
    517     mul64To128( a1, b0, &z1, &more2 );
    518     add128( z1, more2, 0, z2, &z1, &z2 );
    519     mul64To128( a0, b0, &z0, &more1 );
    520     add128( z0, more1, 0, z1, &z0, &z1 );
    521     mul64To128( a0, b1, &more1, &more2 );
    522     add128( more1, more2, 0, z2, &more1, &z2 );
    523     add128( z0, z1, 0, more1, &z0, &z1 );
    524     *z3Ptr = z3;
    525     *z2Ptr = z2;
    526     *z1Ptr = z1;
    527     *z0Ptr = z0;
    528 
    529 }
    530 
    531 /*----------------------------------------------------------------------------
    532 | Returns an approximation to the 64-bit integer quotient obtained by dividing
    533 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    534 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    535 | toward zero, the approximation returned lies between q and q + 2 inclusive.
    536 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    537 | unsigned integer is returned.
    538 *----------------------------------------------------------------------------*/
    539 
    540 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    541 {
    542     bits64 b0, b1;
    543     bits64 rem0, rem1, term0, term1;
    544     bits64 z;
    545 
    546     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    547     b0 = b>>32;
    548     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    549     mul64To128( b, z, &term0, &term1 );
    550     sub128( a0, a1, term0, term1, &rem0, &rem1 );
    551     while ( ( (sbits64) rem0 ) < 0 ) {
    552         z -= LIT64( 0x100000000 );
    553         b1 = b<<32;
    554         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    555     }
    556     rem0 = ( rem0<<32 ) | ( rem1>>32 );
    557     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    558     return z;
    559 
    560 }
    561 
    562 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
    563 /*----------------------------------------------------------------------------
    564 | Returns an approximation to the square root of the 32-bit significand given
    565 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    566 | `aExp' (the least significant bit) is 1, the integer returned approximates
    567 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    568 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    569 | case, the approximation returned lies strictly within +/-2 of the exact
    570 | value.
    571 *----------------------------------------------------------------------------*/
    572 
    573 static bits32 estimateSqrt32( int16 aExp, bits32 a )
    574 {
    575     static const bits16 sqrtOddAdjustments[] = {
    576         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    577         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    578     };
    579     static const bits16 sqrtEvenAdjustments[] = {
    580         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    581         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    582     };
    583     int8 index;
    584     bits32 z;
    585 
    586     index = ( a>>27 ) & 15;
    587     if ( aExp & 1 ) {
    588         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
    589         z = ( ( a / z )<<14 ) + ( z<<15 );
    590         a >>= 1;
    591     }
    592     else {
    593         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
    594         z = a / z + z;
    595         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    596         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    597     }
    598     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
    599 
    600 }
    601 #endif
    602 
    603 /*----------------------------------------------------------------------------
    604 | Returns the number of leading 0 bits before the most-significant 1 bit of
    605 | `a'.  If `a' is zero, 32 is returned.
    606 *----------------------------------------------------------------------------*/
    607 
    608 static int8 countLeadingZeros32( bits32 a )
    609 {
    610     static const int8 countLeadingZerosHigh[] = {
    611         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    612         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    613         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    614         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    615         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    616         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    617         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    618         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    619         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    620         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    621         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    622         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    623         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    624         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    625         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    626         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    627     };
    628     int8 shiftCount;
    629 
    630     shiftCount = 0;
    631     if ( a < 0x10000 ) {
    632         shiftCount += 16;
    633         a <<= 16;
    634     }
    635     if ( a < 0x1000000 ) {
    636         shiftCount += 8;
    637         a <<= 8;
    638     }
    639     shiftCount += countLeadingZerosHigh[ a>>24 ];
    640     return shiftCount;
    641 
    642 }
    643 
    644 /*----------------------------------------------------------------------------
    645 | Returns the number of leading 0 bits before the most-significant 1 bit of
    646 | `a'.  If `a' is zero, 64 is returned.
    647 *----------------------------------------------------------------------------*/
    648 
    649 static int8 countLeadingZeros64( bits64 a )
    650 {
    651     int8 shiftCount;
    652 
    653     shiftCount = 0;
    654     if ( a < ( (bits64) 1 )<<32 ) {
    655         shiftCount += 32;
    656     }
    657     else {
    658         a >>= 32;
    659     }
    660     shiftCount += countLeadingZeros32( a );
    661     return shiftCount;
    662 
    663 }
    664 
    665 /*----------------------------------------------------------------------------
    666 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    667 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    668 | Otherwise, returns 0.
    669 *----------------------------------------------------------------------------*/
    670 
    671 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    672 {
    673 
    674     return ( a0 == b0 ) && ( a1 == b1 );
    675 
    676 }
    677 
    678 /*----------------------------------------------------------------------------
    679 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    680 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    681 | Otherwise, returns 0.
    682 *----------------------------------------------------------------------------*/
    683 
    684 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    685 {
    686 
    687     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    688 
    689 }
    690 
    691 /*----------------------------------------------------------------------------
    692 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    693 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    694 | returns 0.
    695 *----------------------------------------------------------------------------*/
    696 
    697 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    698 {
    699 
    700     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    701 
    702 }
    703 
    704 /*----------------------------------------------------------------------------
    705 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    706 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    707 | Otherwise, returns 0.
    708 *----------------------------------------------------------------------------*/
    709 
    710 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    711 {
    712 
    713     return ( a0 != b0 ) || ( a1 != b1 );
    714 
    715 }
    716 
    717