11.1Sbjh21
21.1Sbjh21/*
31.1Sbjh21===============================================================================
41.1Sbjh21
51.1Sbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
61.1Sbjh21Arithmetic Package, Release 2a.
71.1Sbjh21
81.1Sbjh21Written by John R. Hauser.  This work was made possible in part by the
91.1Sbjh21International Computer Science Institute, located at Suite 600, 1947 Center
101.1Sbjh21Street, Berkeley, California 94704.  Funding was partially provided by the
111.1Sbjh21National Science Foundation under grant MIP-9311980.  The original version
121.1Sbjh21of this code was written as part of a project to build a fixed-point vector
131.1Sbjh21processor in collaboration with the University of California at Berkeley,
141.1Sbjh21overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
151.1Sbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
161.1Sbjh21arithmetic/SoftFloat.html'.
171.1Sbjh21
181.1Sbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
191.1Sbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
201.1Sbjh21TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
211.1Sbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
221.1Sbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
231.1Sbjh21
241.1Sbjh21Derivative works are acceptable, even for commercial purposes, so long as
251.1Sbjh21(1) they include prominent notice that the work is derivative, and (2) they
261.1Sbjh21include prominent notice akin to these four paragraphs for those parts of
271.1Sbjh21this code that are retained.
281.1Sbjh21
291.1Sbjh21===============================================================================
301.1Sbjh21*/
311.1Sbjh21
321.1Sbjh21/*
331.1Sbjh21-------------------------------------------------------------------------------
341.1Sbjh21Shifts `a' right by the number of bits given in `count'.  If any nonzero
351.1Sbjh21bits are shifted off, they are ``jammed'' into the least significant bit of
361.1Sbjh21the result by setting the least significant bit to 1.  The value of `count'
371.1Sbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the
381.1Sbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero.
391.1Sbjh21The result is stored in the location pointed to by `zPtr'.
401.1Sbjh21-------------------------------------------------------------------------------
411.1Sbjh21*/
421.1Sbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
431.1Sbjh21{
441.1Sbjh21    bits32 z;
451.1Sbjh21
461.1Sbjh21    if ( count == 0 ) {
471.1Sbjh21        z = a;
481.1Sbjh21    }
491.1Sbjh21    else if ( count < 32 ) {
501.1Sbjh21        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
511.1Sbjh21    }
521.1Sbjh21    else {
531.1Sbjh21        z = ( a != 0 );
541.1Sbjh21    }
551.1Sbjh21    *zPtr = z;
561.1Sbjh21
571.1Sbjh21}
581.1Sbjh21
591.1Sbjh21/*
601.1Sbjh21-------------------------------------------------------------------------------
611.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
621.1Sbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
631.1Sbjh21of `count' can be arbitrarily large; in particular, if `count' is greater
641.1Sbjh21than 64, the result will be 0.  The result is broken into two 32-bit pieces
651.1Sbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
661.1Sbjh21-------------------------------------------------------------------------------
671.1Sbjh21*/
681.1Sbjh21INLINE void
691.1Sbjh21 shift64Right(
701.1Sbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
711.1Sbjh21{
721.1Sbjh21    bits32 z0, z1;
731.1Sbjh21    int8 negCount = ( - count ) & 31;
741.1Sbjh21
751.1Sbjh21    if ( count == 0 ) {
761.1Sbjh21        z1 = a1;
771.1Sbjh21        z0 = a0;
781.1Sbjh21    }
791.1Sbjh21    else if ( count < 32 ) {
801.1Sbjh21        z1 = ( a0<<negCount ) | ( a1>>count );
811.1Sbjh21        z0 = a0>>count;
821.1Sbjh21    }
831.1Sbjh21    else {
841.1Sbjh21        z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
851.1Sbjh21        z0 = 0;
861.1Sbjh21    }
871.1Sbjh21    *z1Ptr = z1;
881.1Sbjh21    *z0Ptr = z0;
891.1Sbjh21
901.1Sbjh21}
911.1Sbjh21
921.1Sbjh21/*
931.1Sbjh21-------------------------------------------------------------------------------
941.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
951.1Sbjh21number of bits given in `count'.  If any nonzero bits are shifted off, they
961.1Sbjh21are ``jammed'' into the least significant bit of the result by setting the
971.1Sbjh21least significant bit to 1.  The value of `count' can be arbitrarily large;
981.1Sbjh21in particular, if `count' is greater than 64, the result will be either 0
991.1Sbjh21or 1, depending on whether the concatenation of `a0' and `a1' is zero or
1001.1Sbjh21nonzero.  The result is broken into two 32-bit pieces which are stored at
1011.1Sbjh21the locations pointed to by `z0Ptr' and `z1Ptr'.
1021.1Sbjh21-------------------------------------------------------------------------------
1031.1Sbjh21*/
1041.1Sbjh21INLINE void
1051.1Sbjh21 shift64RightJamming(
1061.1Sbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
1071.1Sbjh21{
1081.1Sbjh21    bits32 z0, z1;
1091.1Sbjh21    int8 negCount = ( - count ) & 31;
1101.1Sbjh21
1111.1Sbjh21    if ( count == 0 ) {
1121.1Sbjh21        z1 = a1;
1131.1Sbjh21        z0 = a0;
1141.1Sbjh21    }
1151.1Sbjh21    else if ( count < 32 ) {
1161.1Sbjh21        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
1171.1Sbjh21        z0 = a0>>count;
1181.1Sbjh21    }
1191.1Sbjh21    else {
1201.1Sbjh21        if ( count == 32 ) {
1211.1Sbjh21            z1 = a0 | ( a1 != 0 );
1221.1Sbjh21        }
1231.1Sbjh21        else if ( count < 64 ) {
1241.1Sbjh21            z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
1251.1Sbjh21        }
1261.1Sbjh21        else {
1271.1Sbjh21            z1 = ( ( a0 | a1 ) != 0 );
1281.1Sbjh21        }
1291.1Sbjh21        z0 = 0;
1301.1Sbjh21    }
1311.1Sbjh21    *z1Ptr = z1;
1321.1Sbjh21    *z0Ptr = z0;
1331.1Sbjh21
1341.1Sbjh21}
1351.1Sbjh21
1361.1Sbjh21/*
1371.1Sbjh21-------------------------------------------------------------------------------
1381.1Sbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
1391.1Sbjh21by 32 _plus_ the number of bits given in `count'.  The shifted result is
1401.1Sbjh21at most 64 nonzero bits; these are broken into two 32-bit pieces which are
1411.1Sbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
1421.1Sbjh21off form a third 32-bit result as follows:  The _last_ bit shifted off is
1431.1Sbjh21the most-significant bit of the extra result, and the other 31 bits of the
1441.1Sbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off
1451.1Sbjh21were all zero.  This extra result is stored in the location pointed to by
1461.1Sbjh21`z2Ptr'.  The value of `count' can be arbitrarily large.
1471.1Sbjh21    (This routine makes more sense if `a0', `a1', and `a2' are considered
1481.1Sbjh21to form a fixed-point value with binary point between `a1' and `a2'.  This
1491.1Sbjh21fixed-point value is shifted right by the number of bits given in `count',
1501.1Sbjh21and the integer part of the result is returned at the locations pointed to
1511.1Sbjh21by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
1521.1Sbjh21corrupted as described above, and is returned at the location pointed to by
1531.1Sbjh21`z2Ptr'.)
1541.1Sbjh21-------------------------------------------------------------------------------
1551.1Sbjh21*/
1561.1Sbjh21INLINE void
1571.1Sbjh21 shift64ExtraRightJamming(
1581.1Sbjh21     bits32 a0,
1591.1Sbjh21     bits32 a1,
1601.1Sbjh21     bits32 a2,
1611.1Sbjh21     int16 count,
1621.1Sbjh21     bits32 *z0Ptr,
1631.1Sbjh21     bits32 *z1Ptr,
1641.1Sbjh21     bits32 *z2Ptr
1651.1Sbjh21 )
1661.1Sbjh21{
1671.1Sbjh21    bits32 z0, z1, z2;
1681.1Sbjh21    int8 negCount = ( - count ) & 31;
1691.1Sbjh21
1701.1Sbjh21    if ( count == 0 ) {
1711.1Sbjh21        z2 = a2;
1721.1Sbjh21        z1 = a1;
1731.1Sbjh21        z0 = a0;
1741.1Sbjh21    }
1751.1Sbjh21    else {
1761.1Sbjh21        if ( count < 32 ) {
1771.1Sbjh21            z2 = a1<<negCount;
1781.1Sbjh21            z1 = ( a0<<negCount ) | ( a1>>count );
1791.1Sbjh21            z0 = a0>>count;
1801.1Sbjh21        }
1811.1Sbjh21        else {
1821.1Sbjh21            if ( count == 32 ) {
1831.1Sbjh21                z2 = a1;
1841.1Sbjh21                z1 = a0;
1851.1Sbjh21            }
1861.1Sbjh21            else {
1871.1Sbjh21                a2 |= a1;
1881.1Sbjh21                if ( count < 64 ) {
1891.1Sbjh21                    z2 = a0<<negCount;
1901.1Sbjh21                    z1 = a0>>( count & 31 );
1911.1Sbjh21                }
1921.1Sbjh21                else {
1931.1Sbjh21                    z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
1941.1Sbjh21                    z1 = 0;
1951.1Sbjh21                }
1961.1Sbjh21            }
1971.1Sbjh21            z0 = 0;
1981.1Sbjh21        }
1991.1Sbjh21        z2 |= ( a2 != 0 );
2001.1Sbjh21    }
2011.1Sbjh21    *z2Ptr = z2;
2021.1Sbjh21    *z1Ptr = z1;
2031.1Sbjh21    *z0Ptr = z0;
2041.1Sbjh21
2051.1Sbjh21}
2061.1Sbjh21
2071.1Sbjh21/*
2081.1Sbjh21-------------------------------------------------------------------------------
2091.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
2101.1Sbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
2111.1Sbjh21of `count' must be less than 32.  The result is broken into two 32-bit
2121.1Sbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
2131.1Sbjh21-------------------------------------------------------------------------------
2141.1Sbjh21*/
2151.1Sbjh21INLINE void
2161.1Sbjh21 shortShift64Left(
2171.1Sbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
2181.1Sbjh21{
2191.1Sbjh21
2201.1Sbjh21    *z1Ptr = a1<<count;
2211.1Sbjh21    *z0Ptr =
2221.1Sbjh21        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
2231.1Sbjh21
2241.1Sbjh21}
2251.1Sbjh21
2261.1Sbjh21/*
2271.1Sbjh21-------------------------------------------------------------------------------
2281.1Sbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
2291.1Sbjh21by the number of bits given in `count'.  Any bits shifted off are lost.
2301.1Sbjh21The value of `count' must be less than 32.  The result is broken into three
2311.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr',
2321.1Sbjh21`z1Ptr', and `z2Ptr'.
2331.1Sbjh21-------------------------------------------------------------------------------
2341.1Sbjh21*/
2351.1Sbjh21INLINE void
2361.1Sbjh21 shortShift96Left(
2371.1Sbjh21     bits32 a0,
2381.1Sbjh21     bits32 a1,
2391.1Sbjh21     bits32 a2,
2401.1Sbjh21     int16 count,
2411.1Sbjh21     bits32 *z0Ptr,
2421.1Sbjh21     bits32 *z1Ptr,
2431.1Sbjh21     bits32 *z2Ptr
2441.1Sbjh21 )
2451.1Sbjh21{
2461.1Sbjh21    bits32 z0, z1, z2;
2471.1Sbjh21    int8 negCount;
2481.1Sbjh21
2491.1Sbjh21    z2 = a2<<count;
2501.1Sbjh21    z1 = a1<<count;
2511.1Sbjh21    z0 = a0<<count;
2521.1Sbjh21    if ( 0 < count ) {
2531.1Sbjh21        negCount = ( ( - count ) & 31 );
2541.1Sbjh21        z1 |= a2>>negCount;
2551.1Sbjh21        z0 |= a1>>negCount;
2561.1Sbjh21    }
2571.1Sbjh21    *z2Ptr = z2;
2581.1Sbjh21    *z1Ptr = z1;
2591.1Sbjh21    *z0Ptr = z0;
2601.1Sbjh21
2611.1Sbjh21}
2621.1Sbjh21
2631.1Sbjh21/*
2641.1Sbjh21-------------------------------------------------------------------------------
2651.1Sbjh21Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
2661.1Sbjh21value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
2671.1Sbjh21any carry out is lost.  The result is broken into two 32-bit pieces which
2681.1Sbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
2691.1Sbjh21-------------------------------------------------------------------------------
2701.1Sbjh21*/
2711.1Sbjh21INLINE void
2721.1Sbjh21 add64(
2731.1Sbjh21     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
2741.1Sbjh21{
2751.1Sbjh21    bits32 z1;
2761.1Sbjh21
2771.1Sbjh21    z1 = a1 + b1;
2781.1Sbjh21    *z1Ptr = z1;
2791.1Sbjh21    *z0Ptr = a0 + b0 + ( z1 < a1 );
2801.1Sbjh21
2811.1Sbjh21}
2821.1Sbjh21
2831.1Sbjh21/*
2841.1Sbjh21-------------------------------------------------------------------------------
2851.1Sbjh21Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
2861.1Sbjh2196-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
2871.1Sbjh21modulo 2^96, so any carry out is lost.  The result is broken into three
2881.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr',
2891.1Sbjh21`z1Ptr', and `z2Ptr'.
2901.1Sbjh21-------------------------------------------------------------------------------
2911.1Sbjh21*/
2921.1Sbjh21INLINE void
2931.1Sbjh21 add96(
2941.1Sbjh21     bits32 a0,
2951.1Sbjh21     bits32 a1,
2961.1Sbjh21     bits32 a2,
2971.1Sbjh21     bits32 b0,
2981.1Sbjh21     bits32 b1,
2991.1Sbjh21     bits32 b2,
3001.1Sbjh21     bits32 *z0Ptr,
3011.1Sbjh21     bits32 *z1Ptr,
3021.1Sbjh21     bits32 *z2Ptr
3031.1Sbjh21 )
3041.1Sbjh21{
3051.1Sbjh21    bits32 z0, z1, z2;
3061.1Sbjh21    int8 carry0, carry1;
3071.1Sbjh21
3081.1Sbjh21    z2 = a2 + b2;
3091.1Sbjh21    carry1 = ( z2 < a2 );
3101.1Sbjh21    z1 = a1 + b1;
3111.1Sbjh21    carry0 = ( z1 < a1 );
3121.1Sbjh21    z0 = a0 + b0;
3131.1Sbjh21    z1 += carry1;
3141.2Slukem    z0 += ( z1 < (bits32)carry1 );
3151.1Sbjh21    z0 += carry0;
3161.1Sbjh21    *z2Ptr = z2;
3171.1Sbjh21    *z1Ptr = z1;
3181.1Sbjh21    *z0Ptr = z0;
3191.1Sbjh21
3201.1Sbjh21}
3211.1Sbjh21
3221.1Sbjh21/*
3231.1Sbjh21-------------------------------------------------------------------------------
3241.1Sbjh21Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
3251.1Sbjh2164-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
3261.1Sbjh212^64, so any borrow out (carry out) is lost.  The result is broken into two
3271.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr' and
3281.1Sbjh21`z1Ptr'.
3291.1Sbjh21-------------------------------------------------------------------------------
3301.1Sbjh21*/
3311.1Sbjh21INLINE void
3321.1Sbjh21 sub64(
3331.1Sbjh21     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
3341.1Sbjh21{
3351.1Sbjh21
3361.1Sbjh21    *z1Ptr = a1 - b1;
3371.1Sbjh21    *z0Ptr = a0 - b0 - ( a1 < b1 );
3381.1Sbjh21
3391.1Sbjh21}
3401.1Sbjh21
3411.1Sbjh21/*
3421.1Sbjh21-------------------------------------------------------------------------------
3431.1Sbjh21Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
3441.1Sbjh21the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
3451.1Sbjh21is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
3461.1Sbjh21into three 32-bit pieces which are stored at the locations pointed to by
3471.1Sbjh21`z0Ptr', `z1Ptr', and `z2Ptr'.
3481.1Sbjh21-------------------------------------------------------------------------------
3491.1Sbjh21*/
3501.1Sbjh21INLINE void
3511.1Sbjh21 sub96(
3521.1Sbjh21     bits32 a0,
3531.1Sbjh21     bits32 a1,
3541.1Sbjh21     bits32 a2,
3551.1Sbjh21     bits32 b0,
3561.1Sbjh21     bits32 b1,
3571.1Sbjh21     bits32 b2,
3581.1Sbjh21     bits32 *z0Ptr,
3591.1Sbjh21     bits32 *z1Ptr,
3601.1Sbjh21     bits32 *z2Ptr
3611.1Sbjh21 )
3621.1Sbjh21{
3631.1Sbjh21    bits32 z0, z1, z2;
3641.1Sbjh21    int8 borrow0, borrow1;
3651.1Sbjh21
3661.1Sbjh21    z2 = a2 - b2;
3671.1Sbjh21    borrow1 = ( a2 < b2 );
3681.1Sbjh21    z1 = a1 - b1;
3691.1Sbjh21    borrow0 = ( a1 < b1 );
3701.1Sbjh21    z0 = a0 - b0;
3711.2Slukem    z0 -= ( z1 < (bits32)borrow1 );
3721.1Sbjh21    z1 -= borrow1;
3731.1Sbjh21    z0 -= borrow0;
3741.1Sbjh21    *z2Ptr = z2;
3751.1Sbjh21    *z1Ptr = z1;
3761.1Sbjh21    *z0Ptr = z0;
3771.1Sbjh21
3781.1Sbjh21}
3791.1Sbjh21
3801.1Sbjh21/*
3811.1Sbjh21-------------------------------------------------------------------------------
3821.1Sbjh21Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
3831.1Sbjh21into two 32-bit pieces which are stored at the locations pointed to by
3841.1Sbjh21`z0Ptr' and `z1Ptr'.
3851.1Sbjh21-------------------------------------------------------------------------------
3861.1Sbjh21*/
3871.1Sbjh21INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
3881.1Sbjh21{
3891.1Sbjh21    bits16 aHigh, aLow, bHigh, bLow;
3901.1Sbjh21    bits32 z0, zMiddleA, zMiddleB, z1;
3911.1Sbjh21
3921.1Sbjh21    aLow = a;
3931.1Sbjh21    aHigh = a>>16;
3941.1Sbjh21    bLow = b;
3951.1Sbjh21    bHigh = b>>16;
3961.1Sbjh21    z1 = ( (bits32) aLow ) * bLow;
3971.1Sbjh21    zMiddleA = ( (bits32) aLow ) * bHigh;
3981.1Sbjh21    zMiddleB = ( (bits32) aHigh ) * bLow;
3991.1Sbjh21    z0 = ( (bits32) aHigh ) * bHigh;
4001.1Sbjh21    zMiddleA += zMiddleB;
4011.1Sbjh21    z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
4021.1Sbjh21    zMiddleA <<= 16;
4031.1Sbjh21    z1 += zMiddleA;
4041.1Sbjh21    z0 += ( z1 < zMiddleA );
4051.1Sbjh21    *z1Ptr = z1;
4061.1Sbjh21    *z0Ptr = z0;
4071.1Sbjh21
4081.1Sbjh21}
4091.1Sbjh21
4101.1Sbjh21/*
4111.1Sbjh21-------------------------------------------------------------------------------
4121.1Sbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
4131.1Sbjh21to obtain a 96-bit product.  The product is broken into three 32-bit pieces
4141.1Sbjh21which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
4151.1Sbjh21`z2Ptr'.
4161.1Sbjh21-------------------------------------------------------------------------------
4171.1Sbjh21*/
4181.1Sbjh21INLINE void
4191.1Sbjh21 mul64By32To96(
4201.1Sbjh21     bits32 a0,
4211.1Sbjh21     bits32 a1,
4221.1Sbjh21     bits32 b,
4231.1Sbjh21     bits32 *z0Ptr,
4241.1Sbjh21     bits32 *z1Ptr,
4251.1Sbjh21     bits32 *z2Ptr
4261.1Sbjh21 )
4271.1Sbjh21{
4281.1Sbjh21    bits32 z0, z1, z2, more1;
4291.1Sbjh21
4301.1Sbjh21    mul32To64( a1, b, &z1, &z2 );
4311.1Sbjh21    mul32To64( a0, b, &z0, &more1 );
4321.1Sbjh21    add64( z0, more1, 0, z1, &z0, &z1 );
4331.1Sbjh21    *z2Ptr = z2;
4341.1Sbjh21    *z1Ptr = z1;
4351.1Sbjh21    *z0Ptr = z0;
4361.1Sbjh21
4371.1Sbjh21}
4381.1Sbjh21
4391.1Sbjh21/*
4401.1Sbjh21-------------------------------------------------------------------------------
4411.1Sbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
4421.1Sbjh2164-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
4431.1Sbjh21product.  The product is broken into four 32-bit pieces which are stored at
4441.1Sbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
4451.1Sbjh21-------------------------------------------------------------------------------
4461.1Sbjh21*/
4471.1Sbjh21INLINE void
4481.1Sbjh21 mul64To128(
4491.1Sbjh21     bits32 a0,
4501.1Sbjh21     bits32 a1,
4511.1Sbjh21     bits32 b0,
4521.1Sbjh21     bits32 b1,
4531.1Sbjh21     bits32 *z0Ptr,
4541.1Sbjh21     bits32 *z1Ptr,
4551.1Sbjh21     bits32 *z2Ptr,
4561.1Sbjh21     bits32 *z3Ptr
4571.1Sbjh21 )
4581.1Sbjh21{
4591.1Sbjh21    bits32 z0, z1, z2, z3;
4601.1Sbjh21    bits32 more1, more2;
4611.1Sbjh21
4621.1Sbjh21    mul32To64( a1, b1, &z2, &z3 );
4631.1Sbjh21    mul32To64( a1, b0, &z1, &more2 );
4641.1Sbjh21    add64( z1, more2, 0, z2, &z1, &z2 );
4651.1Sbjh21    mul32To64( a0, b0, &z0, &more1 );
4661.1Sbjh21    add64( z0, more1, 0, z1, &z0, &z1 );
4671.1Sbjh21    mul32To64( a0, b1, &more1, &more2 );
4681.1Sbjh21    add64( more1, more2, 0, z2, &more1, &z2 );
4691.1Sbjh21    add64( z0, z1, 0, more1, &z0, &z1 );
4701.1Sbjh21    *z3Ptr = z3;
4711.1Sbjh21    *z2Ptr = z2;
4721.1Sbjh21    *z1Ptr = z1;
4731.1Sbjh21    *z0Ptr = z0;
4741.1Sbjh21
4751.1Sbjh21}
4761.1Sbjh21
4771.1Sbjh21/*
4781.1Sbjh21-------------------------------------------------------------------------------
4791.1Sbjh21Returns an approximation to the 32-bit integer quotient obtained by dividing
4801.1Sbjh21`b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
4811.1Sbjh21divisor `b' must be at least 2^31.  If q is the exact quotient truncated
4821.1Sbjh21toward zero, the approximation returned lies between q and q + 2 inclusive.
4831.1Sbjh21If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
4841.1Sbjh21unsigned integer is returned.
4851.1Sbjh21-------------------------------------------------------------------------------
4861.1Sbjh21*/
4871.1Sbjh21static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
4881.1Sbjh21{
4891.1Sbjh21    bits32 b0, b1;
4901.1Sbjh21    bits32 rem0, rem1, term0, term1;
4911.1Sbjh21    bits32 z;
4921.1Sbjh21
4931.1Sbjh21    if ( b <= a0 ) return 0xFFFFFFFF;
4941.1Sbjh21    b0 = b>>16;
4951.1Sbjh21    z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
4961.1Sbjh21    mul32To64( b, z, &term0, &term1 );
4971.1Sbjh21    sub64( a0, a1, term0, term1, &rem0, &rem1 );
4981.1Sbjh21    while ( ( (sbits32) rem0 ) < 0 ) {
4991.1Sbjh21        z -= 0x10000;
5001.1Sbjh21        b1 = b<<16;
5011.1Sbjh21        add64( rem0, rem1, b0, b1, &rem0, &rem1 );
5021.1Sbjh21    }
5031.1Sbjh21    rem0 = ( rem0<<16 ) | ( rem1>>16 );
5041.1Sbjh21    z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
5051.1Sbjh21    return z;
5061.1Sbjh21
5071.1Sbjh21}
5081.1Sbjh21
5091.1Sbjh21#ifndef SOFTFLOAT_FOR_GCC
5101.1Sbjh21/*
5111.1Sbjh21-------------------------------------------------------------------------------
5121.1Sbjh21Returns an approximation to the square root of the 32-bit significand given
5131.1Sbjh21by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
5141.1Sbjh21`aExp' (the least significant bit) is 1, the integer returned approximates
5151.1Sbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
5161.1Sbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
5171.1Sbjh21case, the approximation returned lies strictly within +/-2 of the exact
5181.1Sbjh21value.
5191.1Sbjh21-------------------------------------------------------------------------------
5201.1Sbjh21*/
5211.1Sbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a )
5221.1Sbjh21{
5231.1Sbjh21    static const bits16 sqrtOddAdjustments[] = {
5241.1Sbjh21        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
5251.1Sbjh21        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
5261.1Sbjh21    };
5271.1Sbjh21    static const bits16 sqrtEvenAdjustments[] = {
5281.1Sbjh21        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
5291.1Sbjh21        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
5301.1Sbjh21    };
5311.1Sbjh21    int8 index;
5321.1Sbjh21    bits32 z;
5331.1Sbjh21
5341.1Sbjh21    index = ( a>>27 ) & 15;
5351.1Sbjh21    if ( aExp & 1 ) {
5361.1Sbjh21        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
5371.1Sbjh21        z = ( ( a / z )<<14 ) + ( z<<15 );
5381.1Sbjh21        a >>= 1;
5391.1Sbjh21    }
5401.1Sbjh21    else {
5411.1Sbjh21        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
5421.1Sbjh21        z = a / z + z;
5431.1Sbjh21        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
5441.1Sbjh21        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
5451.1Sbjh21    }
5461.1Sbjh21    return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
5471.1Sbjh21
5481.1Sbjh21}
5491.1Sbjh21#endif
5501.1Sbjh21
5511.1Sbjh21/*
5521.1Sbjh21-------------------------------------------------------------------------------
5531.1Sbjh21Returns the number of leading 0 bits before the most-significant 1 bit of
5541.1Sbjh21`a'.  If `a' is zero, 32 is returned.
5551.1Sbjh21-------------------------------------------------------------------------------
5561.1Sbjh21*/
5571.1Sbjh21static int8 countLeadingZeros32( bits32 a )
5581.1Sbjh21{
5591.1Sbjh21    static const int8 countLeadingZerosHigh[] = {
5601.1Sbjh21        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
5611.1Sbjh21        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
5621.1Sbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5631.1Sbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5641.1Sbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5651.1Sbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5661.1Sbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5671.1Sbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5681.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5691.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5701.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5711.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5721.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5731.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5741.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5751.1Sbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5761.1Sbjh21    };
5771.1Sbjh21    int8 shiftCount;
5781.1Sbjh21
5791.1Sbjh21    shiftCount = 0;
5801.1Sbjh21    if ( a < 0x10000 ) {
5811.1Sbjh21        shiftCount += 16;
5821.1Sbjh21        a <<= 16;
5831.1Sbjh21    }
5841.1Sbjh21    if ( a < 0x1000000 ) {
5851.1Sbjh21        shiftCount += 8;
5861.1Sbjh21        a <<= 8;
5871.1Sbjh21    }
5881.1Sbjh21    shiftCount += countLeadingZerosHigh[ a>>24 ];
5891.1Sbjh21    return shiftCount;
5901.1Sbjh21
5911.1Sbjh21}
5921.1Sbjh21
5931.1Sbjh21/*
5941.1Sbjh21-------------------------------------------------------------------------------
5951.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
5961.1Sbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
5971.1Sbjh21returns 0.
5981.1Sbjh21-------------------------------------------------------------------------------
5991.1Sbjh21*/
6001.1Sbjh21INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
6011.1Sbjh21{
6021.1Sbjh21
6031.1Sbjh21    return ( a0 == b0 ) && ( a1 == b1 );
6041.1Sbjh21
6051.1Sbjh21}
6061.1Sbjh21
6071.1Sbjh21/*
6081.1Sbjh21-------------------------------------------------------------------------------
6091.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
6101.1Sbjh21than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
6111.1Sbjh21Otherwise, returns 0.
6121.1Sbjh21-------------------------------------------------------------------------------
6131.1Sbjh21*/
6141.1Sbjh21INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
6151.1Sbjh21{
6161.1Sbjh21
6171.1Sbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
6181.1Sbjh21
6191.1Sbjh21}
6201.1Sbjh21
6211.1Sbjh21/*
6221.1Sbjh21-------------------------------------------------------------------------------
6231.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
6241.1Sbjh21than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
6251.1Sbjh21returns 0.
6261.1Sbjh21-------------------------------------------------------------------------------
6271.1Sbjh21*/
6281.1Sbjh21INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
6291.1Sbjh21{
6301.1Sbjh21
6311.1Sbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
6321.1Sbjh21
6331.1Sbjh21}
6341.1Sbjh21
6351.1Sbjh21/*
6361.1Sbjh21-------------------------------------------------------------------------------
6371.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
6381.1Sbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
6391.1Sbjh21returns 0.
6401.1Sbjh21-------------------------------------------------------------------------------
6411.1Sbjh21*/
6421.1Sbjh21INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
6431.1Sbjh21{
6441.1Sbjh21
6451.1Sbjh21    return ( a0 != b0 ) || ( a1 != b1 );
6461.1Sbjh21
6471.1Sbjh21}
6481.1Sbjh21
649