softfloat-macros revision 1.1
11.1Sbjh21 21.1Sbjh21/* 31.1Sbjh21=============================================================================== 41.1Sbjh21 51.1Sbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 61.1Sbjh21Arithmetic Package, Release 2a. 71.1Sbjh21 81.1Sbjh21Written by John R. Hauser. This work was made possible in part by the 91.1Sbjh21International Computer Science Institute, located at Suite 600, 1947 Center 101.1Sbjh21Street, Berkeley, California 94704. Funding was partially provided by the 111.1Sbjh21National Science Foundation under grant MIP-9311980. The original version 121.1Sbjh21of this code was written as part of a project to build a fixed-point vector 131.1Sbjh21processor in collaboration with the University of California at Berkeley, 141.1Sbjh21overseen by Profs. Nelson Morgan and John Wawrzynek. More information 151.1Sbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 161.1Sbjh21arithmetic/SoftFloat.html'. 171.1Sbjh21 181.1Sbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 191.1Sbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 201.1Sbjh21TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 211.1Sbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 221.1Sbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 231.1Sbjh21 241.1Sbjh21Derivative works are acceptable, even for commercial purposes, so long as 251.1Sbjh21(1) they include prominent notice that the work is derivative, and (2) they 261.1Sbjh21include prominent notice akin to these four paragraphs for those parts of 271.1Sbjh21this code that are retained. 281.1Sbjh21 291.1Sbjh21=============================================================================== 301.1Sbjh21*/ 311.1Sbjh21 321.1Sbjh21/* 331.1Sbjh21------------------------------------------------------------------------------- 341.1Sbjh21Shifts `a' right by the number of bits given in `count'. If any nonzero 351.1Sbjh21bits are shifted off, they are ``jammed'' into the least significant bit of 361.1Sbjh21the result by setting the least significant bit to 1. The value of `count' 371.1Sbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the 381.1Sbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero. 391.1Sbjh21The result is stored in the location pointed to by `zPtr'. 401.1Sbjh21------------------------------------------------------------------------------- 411.1Sbjh21*/ 421.1Sbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 431.1Sbjh21{ 441.1Sbjh21 bits32 z; 451.1Sbjh21 461.1Sbjh21 if ( count == 0 ) { 471.1Sbjh21 z = a; 481.1Sbjh21 } 491.1Sbjh21 else if ( count < 32 ) { 501.1Sbjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 511.1Sbjh21 } 521.1Sbjh21 else { 531.1Sbjh21 z = ( a != 0 ); 541.1Sbjh21 } 551.1Sbjh21 *zPtr = z; 561.1Sbjh21 571.1Sbjh21} 581.1Sbjh21 591.1Sbjh21/* 601.1Sbjh21------------------------------------------------------------------------------- 611.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 621.1Sbjh21number of bits given in `count'. Any bits shifted off are lost. The value 631.1Sbjh21of `count' can be arbitrarily large; in particular, if `count' is greater 641.1Sbjh21than 64, the result will be 0. The result is broken into two 32-bit pieces 651.1Sbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 661.1Sbjh21------------------------------------------------------------------------------- 671.1Sbjh21*/ 681.1Sbjh21INLINE void 691.1Sbjh21 shift64Right( 701.1Sbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 711.1Sbjh21{ 721.1Sbjh21 bits32 z0, z1; 731.1Sbjh21 int8 negCount = ( - count ) & 31; 741.1Sbjh21 751.1Sbjh21 if ( count == 0 ) { 761.1Sbjh21 z1 = a1; 771.1Sbjh21 z0 = a0; 781.1Sbjh21 } 791.1Sbjh21 else if ( count < 32 ) { 801.1Sbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 811.1Sbjh21 z0 = a0>>count; 821.1Sbjh21 } 831.1Sbjh21 else { 841.1Sbjh21 z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; 851.1Sbjh21 z0 = 0; 861.1Sbjh21 } 871.1Sbjh21 *z1Ptr = z1; 881.1Sbjh21 *z0Ptr = z0; 891.1Sbjh21 901.1Sbjh21} 911.1Sbjh21 921.1Sbjh21/* 931.1Sbjh21------------------------------------------------------------------------------- 941.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 951.1Sbjh21number of bits given in `count'. If any nonzero bits are shifted off, they 961.1Sbjh21are ``jammed'' into the least significant bit of the result by setting the 971.1Sbjh21least significant bit to 1. The value of `count' can be arbitrarily large; 981.1Sbjh21in particular, if `count' is greater than 64, the result will be either 0 991.1Sbjh21or 1, depending on whether the concatenation of `a0' and `a1' is zero or 1001.1Sbjh21nonzero. The result is broken into two 32-bit pieces which are stored at 1011.1Sbjh21the locations pointed to by `z0Ptr' and `z1Ptr'. 1021.1Sbjh21------------------------------------------------------------------------------- 1031.1Sbjh21*/ 1041.1Sbjh21INLINE void 1051.1Sbjh21 shift64RightJamming( 1061.1Sbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 1071.1Sbjh21{ 1081.1Sbjh21 bits32 z0, z1; 1091.1Sbjh21 int8 negCount = ( - count ) & 31; 1101.1Sbjh21 1111.1Sbjh21 if ( count == 0 ) { 1121.1Sbjh21 z1 = a1; 1131.1Sbjh21 z0 = a0; 1141.1Sbjh21 } 1151.1Sbjh21 else if ( count < 32 ) { 1161.1Sbjh21 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 1171.1Sbjh21 z0 = a0>>count; 1181.1Sbjh21 } 1191.1Sbjh21 else { 1201.1Sbjh21 if ( count == 32 ) { 1211.1Sbjh21 z1 = a0 | ( a1 != 0 ); 1221.1Sbjh21 } 1231.1Sbjh21 else if ( count < 64 ) { 1241.1Sbjh21 z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 1251.1Sbjh21 } 1261.1Sbjh21 else { 1271.1Sbjh21 z1 = ( ( a0 | a1 ) != 0 ); 1281.1Sbjh21 } 1291.1Sbjh21 z0 = 0; 1301.1Sbjh21 } 1311.1Sbjh21 *z1Ptr = z1; 1321.1Sbjh21 *z0Ptr = z0; 1331.1Sbjh21 1341.1Sbjh21} 1351.1Sbjh21 1361.1Sbjh21/* 1371.1Sbjh21------------------------------------------------------------------------------- 1381.1Sbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right 1391.1Sbjh21by 32 _plus_ the number of bits given in `count'. The shifted result is 1401.1Sbjh21at most 64 nonzero bits; these are broken into two 32-bit pieces which are 1411.1Sbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 1421.1Sbjh21off form a third 32-bit result as follows: The _last_ bit shifted off is 1431.1Sbjh21the most-significant bit of the extra result, and the other 31 bits of the 1441.1Sbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off 1451.1Sbjh21were all zero. This extra result is stored in the location pointed to by 1461.1Sbjh21`z2Ptr'. The value of `count' can be arbitrarily large. 1471.1Sbjh21 (This routine makes more sense if `a0', `a1', and `a2' are considered 1481.1Sbjh21to form a fixed-point value with binary point between `a1' and `a2'. This 1491.1Sbjh21fixed-point value is shifted right by the number of bits given in `count', 1501.1Sbjh21and the integer part of the result is returned at the locations pointed to 1511.1Sbjh21by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 1521.1Sbjh21corrupted as described above, and is returned at the location pointed to by 1531.1Sbjh21`z2Ptr'.) 1541.1Sbjh21------------------------------------------------------------------------------- 1551.1Sbjh21*/ 1561.1Sbjh21INLINE void 1571.1Sbjh21 shift64ExtraRightJamming( 1581.1Sbjh21 bits32 a0, 1591.1Sbjh21 bits32 a1, 1601.1Sbjh21 bits32 a2, 1611.1Sbjh21 int16 count, 1621.1Sbjh21 bits32 *z0Ptr, 1631.1Sbjh21 bits32 *z1Ptr, 1641.1Sbjh21 bits32 *z2Ptr 1651.1Sbjh21 ) 1661.1Sbjh21{ 1671.1Sbjh21 bits32 z0, z1, z2; 1681.1Sbjh21 int8 negCount = ( - count ) & 31; 1691.1Sbjh21 1701.1Sbjh21 if ( count == 0 ) { 1711.1Sbjh21 z2 = a2; 1721.1Sbjh21 z1 = a1; 1731.1Sbjh21 z0 = a0; 1741.1Sbjh21 } 1751.1Sbjh21 else { 1761.1Sbjh21 if ( count < 32 ) { 1771.1Sbjh21 z2 = a1<<negCount; 1781.1Sbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 1791.1Sbjh21 z0 = a0>>count; 1801.1Sbjh21 } 1811.1Sbjh21 else { 1821.1Sbjh21 if ( count == 32 ) { 1831.1Sbjh21 z2 = a1; 1841.1Sbjh21 z1 = a0; 1851.1Sbjh21 } 1861.1Sbjh21 else { 1871.1Sbjh21 a2 |= a1; 1881.1Sbjh21 if ( count < 64 ) { 1891.1Sbjh21 z2 = a0<<negCount; 1901.1Sbjh21 z1 = a0>>( count & 31 ); 1911.1Sbjh21 } 1921.1Sbjh21 else { 1931.1Sbjh21 z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); 1941.1Sbjh21 z1 = 0; 1951.1Sbjh21 } 1961.1Sbjh21 } 1971.1Sbjh21 z0 = 0; 1981.1Sbjh21 } 1991.1Sbjh21 z2 |= ( a2 != 0 ); 2001.1Sbjh21 } 2011.1Sbjh21 *z2Ptr = z2; 2021.1Sbjh21 *z1Ptr = z1; 2031.1Sbjh21 *z0Ptr = z0; 2041.1Sbjh21 2051.1Sbjh21} 2061.1Sbjh21 2071.1Sbjh21/* 2081.1Sbjh21------------------------------------------------------------------------------- 2091.1Sbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the 2101.1Sbjh21number of bits given in `count'. Any bits shifted off are lost. The value 2111.1Sbjh21of `count' must be less than 32. The result is broken into two 32-bit 2121.1Sbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 2131.1Sbjh21------------------------------------------------------------------------------- 2141.1Sbjh21*/ 2151.1Sbjh21INLINE void 2161.1Sbjh21 shortShift64Left( 2171.1Sbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 2181.1Sbjh21{ 2191.1Sbjh21 2201.1Sbjh21 *z1Ptr = a1<<count; 2211.1Sbjh21 *z0Ptr = 2221.1Sbjh21 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) ); 2231.1Sbjh21 2241.1Sbjh21} 2251.1Sbjh21 2261.1Sbjh21/* 2271.1Sbjh21------------------------------------------------------------------------------- 2281.1Sbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left 2291.1Sbjh21by the number of bits given in `count'. Any bits shifted off are lost. 2301.1Sbjh21The value of `count' must be less than 32. The result is broken into three 2311.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr', 2321.1Sbjh21`z1Ptr', and `z2Ptr'. 2331.1Sbjh21------------------------------------------------------------------------------- 2341.1Sbjh21*/ 2351.1Sbjh21INLINE void 2361.1Sbjh21 shortShift96Left( 2371.1Sbjh21 bits32 a0, 2381.1Sbjh21 bits32 a1, 2391.1Sbjh21 bits32 a2, 2401.1Sbjh21 int16 count, 2411.1Sbjh21 bits32 *z0Ptr, 2421.1Sbjh21 bits32 *z1Ptr, 2431.1Sbjh21 bits32 *z2Ptr 2441.1Sbjh21 ) 2451.1Sbjh21{ 2461.1Sbjh21 bits32 z0, z1, z2; 2471.1Sbjh21 int8 negCount; 2481.1Sbjh21 2491.1Sbjh21 z2 = a2<<count; 2501.1Sbjh21 z1 = a1<<count; 2511.1Sbjh21 z0 = a0<<count; 2521.1Sbjh21 if ( 0 < count ) { 2531.1Sbjh21 negCount = ( ( - count ) & 31 ); 2541.1Sbjh21 z1 |= a2>>negCount; 2551.1Sbjh21 z0 |= a1>>negCount; 2561.1Sbjh21 } 2571.1Sbjh21 *z2Ptr = z2; 2581.1Sbjh21 *z1Ptr = z1; 2591.1Sbjh21 *z0Ptr = z0; 2601.1Sbjh21 2611.1Sbjh21} 2621.1Sbjh21 2631.1Sbjh21/* 2641.1Sbjh21------------------------------------------------------------------------------- 2651.1Sbjh21Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit 2661.1Sbjh21value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so 2671.1Sbjh21any carry out is lost. The result is broken into two 32-bit pieces which 2681.1Sbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 2691.1Sbjh21------------------------------------------------------------------------------- 2701.1Sbjh21*/ 2711.1Sbjh21INLINE void 2721.1Sbjh21 add64( 2731.1Sbjh21 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) 2741.1Sbjh21{ 2751.1Sbjh21 bits32 z1; 2761.1Sbjh21 2771.1Sbjh21 z1 = a1 + b1; 2781.1Sbjh21 *z1Ptr = z1; 2791.1Sbjh21 *z0Ptr = a0 + b0 + ( z1 < a1 ); 2801.1Sbjh21 2811.1Sbjh21} 2821.1Sbjh21 2831.1Sbjh21/* 2841.1Sbjh21------------------------------------------------------------------------------- 2851.1Sbjh21Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the 2861.1Sbjh2196-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 2871.1Sbjh21modulo 2^96, so any carry out is lost. The result is broken into three 2881.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr', 2891.1Sbjh21`z1Ptr', and `z2Ptr'. 2901.1Sbjh21------------------------------------------------------------------------------- 2911.1Sbjh21*/ 2921.1Sbjh21INLINE void 2931.1Sbjh21 add96( 2941.1Sbjh21 bits32 a0, 2951.1Sbjh21 bits32 a1, 2961.1Sbjh21 bits32 a2, 2971.1Sbjh21 bits32 b0, 2981.1Sbjh21 bits32 b1, 2991.1Sbjh21 bits32 b2, 3001.1Sbjh21 bits32 *z0Ptr, 3011.1Sbjh21 bits32 *z1Ptr, 3021.1Sbjh21 bits32 *z2Ptr 3031.1Sbjh21 ) 3041.1Sbjh21{ 3051.1Sbjh21 bits32 z0, z1, z2; 3061.1Sbjh21 int8 carry0, carry1; 3071.1Sbjh21 3081.1Sbjh21 z2 = a2 + b2; 3091.1Sbjh21 carry1 = ( z2 < a2 ); 3101.1Sbjh21 z1 = a1 + b1; 3111.1Sbjh21 carry0 = ( z1 < a1 ); 3121.1Sbjh21 z0 = a0 + b0; 3131.1Sbjh21 z1 += carry1; 3141.1Sbjh21 z0 += ( z1 < carry1 ); 3151.1Sbjh21 z0 += carry0; 3161.1Sbjh21 *z2Ptr = z2; 3171.1Sbjh21 *z1Ptr = z1; 3181.1Sbjh21 *z0Ptr = z0; 3191.1Sbjh21 3201.1Sbjh21} 3211.1Sbjh21 3221.1Sbjh21/* 3231.1Sbjh21------------------------------------------------------------------------------- 3241.1Sbjh21Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the 3251.1Sbjh2164-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 3261.1Sbjh212^64, so any borrow out (carry out) is lost. The result is broken into two 3271.1Sbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr' and 3281.1Sbjh21`z1Ptr'. 3291.1Sbjh21------------------------------------------------------------------------------- 3301.1Sbjh21*/ 3311.1Sbjh21INLINE void 3321.1Sbjh21 sub64( 3331.1Sbjh21 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) 3341.1Sbjh21{ 3351.1Sbjh21 3361.1Sbjh21 *z1Ptr = a1 - b1; 3371.1Sbjh21 *z0Ptr = a0 - b0 - ( a1 < b1 ); 3381.1Sbjh21 3391.1Sbjh21} 3401.1Sbjh21 3411.1Sbjh21/* 3421.1Sbjh21------------------------------------------------------------------------------- 3431.1Sbjh21Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from 3441.1Sbjh21the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction 3451.1Sbjh21is modulo 2^96, so any borrow out (carry out) is lost. The result is broken 3461.1Sbjh21into three 32-bit pieces which are stored at the locations pointed to by 3471.1Sbjh21`z0Ptr', `z1Ptr', and `z2Ptr'. 3481.1Sbjh21------------------------------------------------------------------------------- 3491.1Sbjh21*/ 3501.1Sbjh21INLINE void 3511.1Sbjh21 sub96( 3521.1Sbjh21 bits32 a0, 3531.1Sbjh21 bits32 a1, 3541.1Sbjh21 bits32 a2, 3551.1Sbjh21 bits32 b0, 3561.1Sbjh21 bits32 b1, 3571.1Sbjh21 bits32 b2, 3581.1Sbjh21 bits32 *z0Ptr, 3591.1Sbjh21 bits32 *z1Ptr, 3601.1Sbjh21 bits32 *z2Ptr 3611.1Sbjh21 ) 3621.1Sbjh21{ 3631.1Sbjh21 bits32 z0, z1, z2; 3641.1Sbjh21 int8 borrow0, borrow1; 3651.1Sbjh21 3661.1Sbjh21 z2 = a2 - b2; 3671.1Sbjh21 borrow1 = ( a2 < b2 ); 3681.1Sbjh21 z1 = a1 - b1; 3691.1Sbjh21 borrow0 = ( a1 < b1 ); 3701.1Sbjh21 z0 = a0 - b0; 3711.1Sbjh21 z0 -= ( z1 < borrow1 ); 3721.1Sbjh21 z1 -= borrow1; 3731.1Sbjh21 z0 -= borrow0; 3741.1Sbjh21 *z2Ptr = z2; 3751.1Sbjh21 *z1Ptr = z1; 3761.1Sbjh21 *z0Ptr = z0; 3771.1Sbjh21 3781.1Sbjh21} 3791.1Sbjh21 3801.1Sbjh21/* 3811.1Sbjh21------------------------------------------------------------------------------- 3821.1Sbjh21Multiplies `a' by `b' to obtain a 64-bit product. The product is broken 3831.1Sbjh21into two 32-bit pieces which are stored at the locations pointed to by 3841.1Sbjh21`z0Ptr' and `z1Ptr'. 3851.1Sbjh21------------------------------------------------------------------------------- 3861.1Sbjh21*/ 3871.1Sbjh21INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) 3881.1Sbjh21{ 3891.1Sbjh21 bits16 aHigh, aLow, bHigh, bLow; 3901.1Sbjh21 bits32 z0, zMiddleA, zMiddleB, z1; 3911.1Sbjh21 3921.1Sbjh21 aLow = a; 3931.1Sbjh21 aHigh = a>>16; 3941.1Sbjh21 bLow = b; 3951.1Sbjh21 bHigh = b>>16; 3961.1Sbjh21 z1 = ( (bits32) aLow ) * bLow; 3971.1Sbjh21 zMiddleA = ( (bits32) aLow ) * bHigh; 3981.1Sbjh21 zMiddleB = ( (bits32) aHigh ) * bLow; 3991.1Sbjh21 z0 = ( (bits32) aHigh ) * bHigh; 4001.1Sbjh21 zMiddleA += zMiddleB; 4011.1Sbjh21 z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); 4021.1Sbjh21 zMiddleA <<= 16; 4031.1Sbjh21 z1 += zMiddleA; 4041.1Sbjh21 z0 += ( z1 < zMiddleA ); 4051.1Sbjh21 *z1Ptr = z1; 4061.1Sbjh21 *z0Ptr = z0; 4071.1Sbjh21 4081.1Sbjh21} 4091.1Sbjh21 4101.1Sbjh21/* 4111.1Sbjh21------------------------------------------------------------------------------- 4121.1Sbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' 4131.1Sbjh21to obtain a 96-bit product. The product is broken into three 32-bit pieces 4141.1Sbjh21which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 4151.1Sbjh21`z2Ptr'. 4161.1Sbjh21------------------------------------------------------------------------------- 4171.1Sbjh21*/ 4181.1Sbjh21INLINE void 4191.1Sbjh21 mul64By32To96( 4201.1Sbjh21 bits32 a0, 4211.1Sbjh21 bits32 a1, 4221.1Sbjh21 bits32 b, 4231.1Sbjh21 bits32 *z0Ptr, 4241.1Sbjh21 bits32 *z1Ptr, 4251.1Sbjh21 bits32 *z2Ptr 4261.1Sbjh21 ) 4271.1Sbjh21{ 4281.1Sbjh21 bits32 z0, z1, z2, more1; 4291.1Sbjh21 4301.1Sbjh21 mul32To64( a1, b, &z1, &z2 ); 4311.1Sbjh21 mul32To64( a0, b, &z0, &more1 ); 4321.1Sbjh21 add64( z0, more1, 0, z1, &z0, &z1 ); 4331.1Sbjh21 *z2Ptr = z2; 4341.1Sbjh21 *z1Ptr = z1; 4351.1Sbjh21 *z0Ptr = z0; 4361.1Sbjh21 4371.1Sbjh21} 4381.1Sbjh21 4391.1Sbjh21/* 4401.1Sbjh21------------------------------------------------------------------------------- 4411.1Sbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the 4421.1Sbjh2164-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit 4431.1Sbjh21product. The product is broken into four 32-bit pieces which are stored at 4441.1Sbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 4451.1Sbjh21------------------------------------------------------------------------------- 4461.1Sbjh21*/ 4471.1Sbjh21INLINE void 4481.1Sbjh21 mul64To128( 4491.1Sbjh21 bits32 a0, 4501.1Sbjh21 bits32 a1, 4511.1Sbjh21 bits32 b0, 4521.1Sbjh21 bits32 b1, 4531.1Sbjh21 bits32 *z0Ptr, 4541.1Sbjh21 bits32 *z1Ptr, 4551.1Sbjh21 bits32 *z2Ptr, 4561.1Sbjh21 bits32 *z3Ptr 4571.1Sbjh21 ) 4581.1Sbjh21{ 4591.1Sbjh21 bits32 z0, z1, z2, z3; 4601.1Sbjh21 bits32 more1, more2; 4611.1Sbjh21 4621.1Sbjh21 mul32To64( a1, b1, &z2, &z3 ); 4631.1Sbjh21 mul32To64( a1, b0, &z1, &more2 ); 4641.1Sbjh21 add64( z1, more2, 0, z2, &z1, &z2 ); 4651.1Sbjh21 mul32To64( a0, b0, &z0, &more1 ); 4661.1Sbjh21 add64( z0, more1, 0, z1, &z0, &z1 ); 4671.1Sbjh21 mul32To64( a0, b1, &more1, &more2 ); 4681.1Sbjh21 add64( more1, more2, 0, z2, &more1, &z2 ); 4691.1Sbjh21 add64( z0, z1, 0, more1, &z0, &z1 ); 4701.1Sbjh21 *z3Ptr = z3; 4711.1Sbjh21 *z2Ptr = z2; 4721.1Sbjh21 *z1Ptr = z1; 4731.1Sbjh21 *z0Ptr = z0; 4741.1Sbjh21 4751.1Sbjh21} 4761.1Sbjh21 4771.1Sbjh21/* 4781.1Sbjh21------------------------------------------------------------------------------- 4791.1Sbjh21Returns an approximation to the 32-bit integer quotient obtained by dividing 4801.1Sbjh21`b' into the 64-bit value formed by concatenating `a0' and `a1'. The 4811.1Sbjh21divisor `b' must be at least 2^31. If q is the exact quotient truncated 4821.1Sbjh21toward zero, the approximation returned lies between q and q + 2 inclusive. 4831.1Sbjh21If the exact quotient q is larger than 32 bits, the maximum positive 32-bit 4841.1Sbjh21unsigned integer is returned. 4851.1Sbjh21------------------------------------------------------------------------------- 4861.1Sbjh21*/ 4871.1Sbjh21static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) 4881.1Sbjh21{ 4891.1Sbjh21 bits32 b0, b1; 4901.1Sbjh21 bits32 rem0, rem1, term0, term1; 4911.1Sbjh21 bits32 z; 4921.1Sbjh21 4931.1Sbjh21 if ( b <= a0 ) return 0xFFFFFFFF; 4941.1Sbjh21 b0 = b>>16; 4951.1Sbjh21 z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; 4961.1Sbjh21 mul32To64( b, z, &term0, &term1 ); 4971.1Sbjh21 sub64( a0, a1, term0, term1, &rem0, &rem1 ); 4981.1Sbjh21 while ( ( (sbits32) rem0 ) < 0 ) { 4991.1Sbjh21 z -= 0x10000; 5001.1Sbjh21 b1 = b<<16; 5011.1Sbjh21 add64( rem0, rem1, b0, b1, &rem0, &rem1 ); 5021.1Sbjh21 } 5031.1Sbjh21 rem0 = ( rem0<<16 ) | ( rem1>>16 ); 5041.1Sbjh21 z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; 5051.1Sbjh21 return z; 5061.1Sbjh21 5071.1Sbjh21} 5081.1Sbjh21 5091.1Sbjh21#ifndef SOFTFLOAT_FOR_GCC 5101.1Sbjh21/* 5111.1Sbjh21------------------------------------------------------------------------------- 5121.1Sbjh21Returns an approximation to the square root of the 32-bit significand given 5131.1Sbjh21by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 5141.1Sbjh21`aExp' (the least significant bit) is 1, the integer returned approximates 5151.1Sbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 5161.1Sbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 5171.1Sbjh21case, the approximation returned lies strictly within +/-2 of the exact 5181.1Sbjh21value. 5191.1Sbjh21------------------------------------------------------------------------------- 5201.1Sbjh21*/ 5211.1Sbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a ) 5221.1Sbjh21{ 5231.1Sbjh21 static const bits16 sqrtOddAdjustments[] = { 5241.1Sbjh21 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 5251.1Sbjh21 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 5261.1Sbjh21 }; 5271.1Sbjh21 static const bits16 sqrtEvenAdjustments[] = { 5281.1Sbjh21 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 5291.1Sbjh21 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 5301.1Sbjh21 }; 5311.1Sbjh21 int8 index; 5321.1Sbjh21 bits32 z; 5331.1Sbjh21 5341.1Sbjh21 index = ( a>>27 ) & 15; 5351.1Sbjh21 if ( aExp & 1 ) { 5361.1Sbjh21 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; 5371.1Sbjh21 z = ( ( a / z )<<14 ) + ( z<<15 ); 5381.1Sbjh21 a >>= 1; 5391.1Sbjh21 } 5401.1Sbjh21 else { 5411.1Sbjh21 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; 5421.1Sbjh21 z = a / z + z; 5431.1Sbjh21 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 5441.1Sbjh21 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 5451.1Sbjh21 } 5461.1Sbjh21 return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); 5471.1Sbjh21 5481.1Sbjh21} 5491.1Sbjh21#endif 5501.1Sbjh21 5511.1Sbjh21/* 5521.1Sbjh21------------------------------------------------------------------------------- 5531.1Sbjh21Returns the number of leading 0 bits before the most-significant 1 bit of 5541.1Sbjh21`a'. If `a' is zero, 32 is returned. 5551.1Sbjh21------------------------------------------------------------------------------- 5561.1Sbjh21*/ 5571.1Sbjh21static int8 countLeadingZeros32( bits32 a ) 5581.1Sbjh21{ 5591.1Sbjh21 static const int8 countLeadingZerosHigh[] = { 5601.1Sbjh21 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 5611.1Sbjh21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5621.1Sbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5631.1Sbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5641.1Sbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5651.1Sbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5661.1Sbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5671.1Sbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5681.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5691.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5701.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5711.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5721.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5731.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5741.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5751.1Sbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 5761.1Sbjh21 }; 5771.1Sbjh21 int8 shiftCount; 5781.1Sbjh21 5791.1Sbjh21 shiftCount = 0; 5801.1Sbjh21 if ( a < 0x10000 ) { 5811.1Sbjh21 shiftCount += 16; 5821.1Sbjh21 a <<= 16; 5831.1Sbjh21 } 5841.1Sbjh21 if ( a < 0x1000000 ) { 5851.1Sbjh21 shiftCount += 8; 5861.1Sbjh21 a <<= 8; 5871.1Sbjh21 } 5881.1Sbjh21 shiftCount += countLeadingZerosHigh[ a>>24 ]; 5891.1Sbjh21 return shiftCount; 5901.1Sbjh21 5911.1Sbjh21} 5921.1Sbjh21 5931.1Sbjh21/* 5941.1Sbjh21------------------------------------------------------------------------------- 5951.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is 5961.1Sbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 5971.1Sbjh21returns 0. 5981.1Sbjh21------------------------------------------------------------------------------- 5991.1Sbjh21*/ 6001.1Sbjh21INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 6011.1Sbjh21{ 6021.1Sbjh21 6031.1Sbjh21 return ( a0 == b0 ) && ( a1 == b1 ); 6041.1Sbjh21 6051.1Sbjh21} 6061.1Sbjh21 6071.1Sbjh21/* 6081.1Sbjh21------------------------------------------------------------------------------- 6091.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less 6101.1Sbjh21than or equal to the 64-bit value formed by concatenating `b0' and `b1'. 6111.1Sbjh21Otherwise, returns 0. 6121.1Sbjh21------------------------------------------------------------------------------- 6131.1Sbjh21*/ 6141.1Sbjh21INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 6151.1Sbjh21{ 6161.1Sbjh21 6171.1Sbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 6181.1Sbjh21 6191.1Sbjh21} 6201.1Sbjh21 6211.1Sbjh21/* 6221.1Sbjh21------------------------------------------------------------------------------- 6231.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less 6241.1Sbjh21than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 6251.1Sbjh21returns 0. 6261.1Sbjh21------------------------------------------------------------------------------- 6271.1Sbjh21*/ 6281.1Sbjh21INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 6291.1Sbjh21{ 6301.1Sbjh21 6311.1Sbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 6321.1Sbjh21 6331.1Sbjh21} 6341.1Sbjh21 6351.1Sbjh21/* 6361.1Sbjh21------------------------------------------------------------------------------- 6371.1Sbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not 6381.1Sbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 6391.1Sbjh21returns 0. 6401.1Sbjh21------------------------------------------------------------------------------- 6411.1Sbjh21*/ 6421.1Sbjh21INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 6431.1Sbjh21{ 6441.1Sbjh21 6451.1Sbjh21 return ( a0 != b0 ) || ( a1 != b1 ); 6461.1Sbjh21 6471.1Sbjh21} 6481.1Sbjh21 649