lib/libkern/softfloat-macros.h

1.1  ross /* $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $ */
1.1  ross
1.1  ross /*
1.1  ross ===============================================================================
1.1  ross
1.1  ross This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
1.1  ross Arithmetic Package, Release 2a.
1.1  ross
1.1  ross Written by John R. Hauser.  This work was made possible in part by the
1.1  ross International Computer Science Institute, located at Suite 600, 1947 Center
1.1  ross Street, Berkeley, California 94704.  Funding was partially provided by the
1.1  ross National Science Foundation under grant MIP-9311980.  The original version
1.1  ross of this code was written as part of a project to build a fixed-point vector
1.1  ross processor in collaboration with the University of California at Berkeley,
1.1  ross overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
1.1  ross is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
1.1  ross arithmetic/SoftFloat.html'.
1.1  ross
1.1  ross THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
1.1  ross has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
1.1  ross TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
1.1  ross PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
1.1  ross AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
1.1  ross
1.1  ross Derivative works are acceptable, even for commercial purposes, so long as
1.1  ross (1) they include prominent notice that the work is derivative, and (2) they
1.1  ross include prominent notice akin to these four paragraphs for those parts of
1.1  ross this code that are retained.
1.1  ross
1.1  ross ===============================================================================
1.1  ross */
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts `a' right by the number of bits given in `count'.  If any nonzero
1.1  ross bits are shifted off, they are ``jammed'' into the least significant bit of
1.1  ross the result by setting the least significant bit to 1.  The value of `count'
1.1  ross can be arbitrarily large; in particular, if `count' is greater than 32, the
1.1  ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
1.1  ross The result is stored in the location pointed to by `zPtr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
1.1  ross {
1.1  ross     bits32 z;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z = a;
1.1  ross     }
1.1  ross     else if ( count < 32 ) {
1.1  ross         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
1.1  ross     }
1.1  ross     else {
1.1  ross         z = ( a != 0 );
1.1  ross     }
1.1  ross     *zPtr = z;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts `a' right by the number of bits given in `count'.  If any nonzero
1.1  ross bits are shifted off, they are ``jammed'' into the least significant bit of
1.1  ross the result by setting the least significant bit to 1.  The value of `count'
1.1  ross can be arbitrarily large; in particular, if `count' is greater than 64, the
1.1  ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
1.1  ross The result is stored in the location pointed to by `zPtr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
1.1  ross {
1.1  ross     bits64 z;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z = a;
1.1  ross     }
1.1  ross     else if ( count < 64 ) {
1.1  ross         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
1.1  ross     }
1.1  ross     else {
1.1  ross         z = ( a != 0 );
1.1  ross     }
1.1  ross     *zPtr = z;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
1.1  ross _plus_ the number of bits given in `count'.  The shifted result is at most
1.1  ross 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
1.1  ross bits shifted off form a second 64-bit result as follows:  The _last_ bit
1.1  ross shifted off is the most-significant bit of the extra result, and the other
1.1  ross 63 bits of the extra result are all zero if and only if _all_but_the_last_
1.1  ross bits shifted off were all zero.  This extra result is stored in the location
1.1  ross pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
1.1  ross     (This routine makes more sense if `a0' and `a1' are considered to form a
1.1  ross fixed-point value with binary point between `a0' and `a1'.  This fixed-point
1.1  ross value is shifted right by the number of bits given in `count', and the
1.1  ross integer part of the result is returned at the location pointed to by
1.1  ross `z0Ptr'.  The fractional part of the result may be slightly corrupted as
1.1  ross described above, and is returned at the location pointed to by `z1Ptr'.)
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shift64ExtraRightJamming(
1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross     bits64 z0, z1;
1.1  ross     int8 negCount = ( - count ) & 63;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z1 = a1;
1.1  ross         z0 = a0;
1.1  ross     }
1.1  ross     else if ( count < 64 ) {
1.1  ross         z1 = ( a0<<negCount ) | ( a1 != 0 );
1.1  ross         z0 = a0>>count;
1.1  ross     }
1.1  ross     else {
1.1  ross         if ( count == 64 ) {
1.1  ross             z1 = a0 | ( a1 != 0 );
1.1  ross         }
1.1  ross         else {
1.1  ross             z1 = ( ( a0 | a1 ) != 0 );
1.1  ross         }
1.1  ross         z0 = 0;
1.1  ross     }
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1.1  ross number of bits given in `count'.  Any bits shifted off are lost.  The value
1.1  ross of `count' can be arbitrarily large; in particular, if `count' is greater
1.1  ross than 128, the result will be 0.  The result is broken into two 64-bit pieces
1.1  ross which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shift128Right(
1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross     bits64 z0, z1;
1.1  ross     int8 negCount = ( - count ) & 63;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z1 = a1;
1.1  ross         z0 = a0;
1.1  ross     }
1.1  ross     else if ( count < 64 ) {
1.1  ross         z1 = ( a0<<negCount ) | ( a1>>count );
1.1  ross         z0 = a0>>count;
1.1  ross     }
1.1  ross     else {
1.1  ross         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
1.1  ross         z0 = 0;
1.1  ross     }
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1.1  ross number of bits given in `count'.  If any nonzero bits are shifted off, they
1.1  ross are ``jammed'' into the least significant bit of the result by setting the
1.1  ross least significant bit to 1.  The value of `count' can be arbitrarily large;
1.1  ross in particular, if `count' is greater than 128, the result will be either
1.1  ross 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
1.1  ross nonzero.  The result is broken into two 64-bit pieces which are stored at
1.1  ross the locations pointed to by `z0Ptr' and `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shift128RightJamming(
1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross     bits64 z0, z1;
1.1  ross     int8 negCount = ( - count ) & 63;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z1 = a1;
1.1  ross         z0 = a0;
1.1  ross     }
1.1  ross     else if ( count < 64 ) {
1.1  ross         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
1.1  ross         z0 = a0>>count;
1.1  ross     }
1.1  ross     else {
1.1  ross         if ( count == 64 ) {
1.1  ross             z1 = a0 | ( a1 != 0 );
1.1  ross         }
1.1  ross         else if ( count < 128 ) {
1.1  ross             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
1.1  ross         }
1.1  ross         else {
1.1  ross             z1 = ( ( a0 | a1 ) != 0 );
1.1  ross         }
1.1  ross         z0 = 0;
1.1  ross     }
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
1.1  ross by 64 _plus_ the number of bits given in `count'.  The shifted result is
1.1  ross at most 128 nonzero bits; these are broken into two 64-bit pieces which are
1.1  ross stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
1.1  ross off form a third 64-bit result as follows:  The _last_ bit shifted off is
1.1  ross the most-significant bit of the extra result, and the other 63 bits of the
1.1  ross extra result are all zero if and only if _all_but_the_last_ bits shifted off
1.1  ross were all zero.  This extra result is stored in the location pointed to by
1.1  ross `z2Ptr'.  The value of `count' can be arbitrarily large.
1.1  ross     (This routine makes more sense if `a0', `a1', and `a2' are considered
1.1  ross to form a fixed-point value with binary point between `a1' and `a2'.  This
1.1  ross fixed-point value is shifted right by the number of bits given in `count',
1.1  ross and the integer part of the result is returned at the locations pointed to
1.1  ross by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
1.1  ross corrupted as described above, and is returned at the location pointed to by
1.1  ross `z2Ptr'.)
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shift128ExtraRightJamming(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 a2,
1.1  ross      int16 count,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2;
1.1  ross     int8 negCount = ( - count ) & 63;
1.1  ross
1.1  ross     if ( count == 0 ) {
1.1  ross         z2 = a2;
1.1  ross         z1 = a1;
1.1  ross         z0 = a0;
1.1  ross     }
1.1  ross     else {
1.1  ross         if ( count < 64 ) {
1.1  ross             z2 = a1<<negCount;
1.1  ross             z1 = ( a0<<negCount ) | ( a1>>count );
1.1  ross             z0 = a0>>count;
1.1  ross         }
1.1  ross         else {
1.1  ross             if ( count == 64 ) {
1.1  ross                 z2 = a1;
1.1  ross                 z1 = a0;
1.1  ross             }
1.1  ross             else {
1.1  ross                 a2 |= a1;
1.1  ross                 if ( count < 128 ) {
1.1  ross                     z2 = a0<<negCount;
1.1  ross                     z1 = a0>>( count & 63 );
1.1  ross                 }
1.1  ross                 else {
1.1  ross                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
1.1  ross                     z1 = 0;
1.1  ross                 }
1.1  ross             }
1.1  ross             z0 = 0;
1.1  ross         }
1.1  ross         z2 |= ( a2 != 0 );
1.1  ross     }
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
1.1  ross number of bits given in `count'.  Any bits shifted off are lost.  The value
1.1  ross of `count' must be less than 64.  The result is broken into two 64-bit
1.1  ross pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shortShift128Left(
1.1  ross      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross
1.1  ross     *z1Ptr = a1<<count;
1.1  ross     *z0Ptr =
1.1  ross         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
1.1  ross by the number of bits given in `count'.  Any bits shifted off are lost.
1.1  ross The value of `count' must be less than 64.  The result is broken into three
1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
1.1  ross `z1Ptr', and `z2Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  shortShift192Left(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 a2,
1.1  ross      int16 count,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2;
1.1  ross     int8 negCount;
1.1  ross
1.1  ross     z2 = a2<<count;
1.1  ross     z1 = a1<<count;
1.1  ross     z0 = a0<<count;
1.1  ross     if ( 0 < count ) {
1.1  ross         negCount = ( ( - count ) & 63 );
1.1  ross         z1 |= a2>>negCount;
1.1  ross         z0 |= a1>>negCount;
1.1  ross     }
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
1.1  ross value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
1.1  ross any carry out is lost.  The result is broken into two 64-bit pieces which
1.1  ross are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  add128(
1.1  ross      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross     bits64 z1;
1.1  ross
1.1  ross     z1 = a1 + b1;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = a0 + b0 + ( z1 < a1 );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
1.1  ross 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
1.1  ross modulo 2^192, so any carry out is lost.  The result is broken into three
1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
1.1  ross `z1Ptr', and `z2Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  add192(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 a2,
1.1  ross      bits64 b0,
1.1  ross      bits64 b1,
1.1  ross      bits64 b2,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2;
1.1  ross     int8 carry0, carry1;
1.1  ross
1.1  ross     z2 = a2 + b2;
1.1  ross     carry1 = ( z2 < a2 );
1.1  ross     z1 = a1 + b1;
1.1  ross     carry0 = ( z1 < a1 );
1.1  ross     z0 = a0 + b0;
1.1  ross     z1 += carry1;
1.1  ross     z0 += ( z1 < carry1 );
1.1  ross     z0 += carry0;
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
1.1  ross 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
1.1  ross 2^128, so any borrow out (carry out) is lost.  The result is broken into two
1.1  ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
1.1  ross `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  sub128(
1.1  ross      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross
1.1  ross     *z1Ptr = a1 - b1;
1.1  ross     *z0Ptr = a0 - b0 - ( a1 < b1 );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
1.1  ross from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
1.1  ross Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
1.1  ross result is broken into three 64-bit pieces which are stored at the locations
1.1  ross pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  sub192(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 a2,
1.1  ross      bits64 b0,
1.1  ross      bits64 b1,
1.1  ross      bits64 b2,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2;
1.1  ross     int8 borrow0, borrow1;
1.1  ross
1.1  ross     z2 = a2 - b2;
1.1  ross     borrow1 = ( a2 < b2 );
1.1  ross     z1 = a1 - b1;
1.1  ross     borrow0 = ( a1 < b1 );
1.1  ross     z0 = a0 - b0;
1.1  ross     z0 -= ( z1 < borrow1 );
1.1  ross     z1 -= borrow1;
1.1  ross     z0 -= borrow0;
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
1.1  ross into two 64-bit pieces which are stored at the locations pointed to by
1.1  ross `z0Ptr' and `z1Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
1.1  ross {
1.1  ross     bits32 aHigh, aLow, bHigh, bLow;
1.1  ross     bits64 z0, zMiddleA, zMiddleB, z1;
1.1  ross
1.1  ross     aLow = a;
1.1  ross     aHigh = a>>32;
1.1  ross     bLow = b;
1.1  ross     bHigh = b>>32;
1.1  ross     z1 = ( (bits64) aLow ) * bLow;
1.1  ross     zMiddleA = ( (bits64) aLow ) * bHigh;
1.1  ross     zMiddleB = ( (bits64) aHigh ) * bLow;
1.1  ross     z0 = ( (bits64) aHigh ) * bHigh;
1.1  ross     zMiddleA += zMiddleB;
1.1  ross     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
1.1  ross     zMiddleA <<= 32;
1.1  ross     z1 += zMiddleA;
1.1  ross     z0 += ( z1 < zMiddleA );
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
1.1  ross `b' to obtain a 192-bit product.  The product is broken into three 64-bit
1.1  ross pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
1.1  ross `z2Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  mul128By64To192(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 b,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2, more1;
1.1  ross
1.1  ross     mul64To128( a1, b, &z1, &z2 );
1.1  ross     mul64To128( a0, b, &z0, &more1 );
1.1  ross     add128( z0, more1, 0, z1, &z0, &z1 );
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
1.1  ross 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
1.1  ross product.  The product is broken into four 64-bit pieces which are stored at
1.1  ross the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE void
1.1  ross  mul128To256(
1.1  ross      bits64 a0,
1.1  ross      bits64 a1,
1.1  ross      bits64 b0,
1.1  ross      bits64 b1,
1.1  ross      bits64 *z0Ptr,
1.1  ross      bits64 *z1Ptr,
1.1  ross      bits64 *z2Ptr,
1.1  ross      bits64 *z3Ptr
1.1  ross  )
1.1  ross {
1.1  ross     bits64 z0, z1, z2, z3;
1.1  ross     bits64 more1, more2;
1.1  ross
1.1  ross     mul64To128( a1, b1, &z2, &z3 );
1.1  ross     mul64To128( a1, b0, &z1, &more2 );
1.1  ross     add128( z1, more2, 0, z2, &z1, &z2 );
1.1  ross     mul64To128( a0, b0, &z0, &more1 );
1.1  ross     add128( z0, more1, 0, z1, &z0, &z1 );
1.1  ross     mul64To128( a0, b1, &more1, &more2 );
1.1  ross     add128( more1, more2, 0, z2, &more1, &z2 );
1.1  ross     add128( z0, z1, 0, more1, &z0, &z1 );
1.1  ross     *z3Ptr = z3;
1.1  ross     *z2Ptr = z2;
1.1  ross     *z1Ptr = z1;
1.1  ross     *z0Ptr = z0;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns an approximation to the 64-bit integer quotient obtained by dividing
1.1  ross `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
1.1  ross divisor `b' must be at least 2^63.  If q is the exact quotient truncated
1.1  ross toward zero, the approximation returned lies between q and q + 2 inclusive.
1.1  ross If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
1.1  ross unsigned integer is returned.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
1.1  ross {
1.1  ross     bits64 b0, b1;
1.1  ross     bits64 rem0, rem1, term0, term1;
1.1  ross     bits64 z;
1.1  ross
1.1  ross     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
1.1  ross     b0 = b>>32;
1.1  ross     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
1.1  ross     mul64To128( b, z, &term0, &term1 );
1.1  ross     sub128( a0, a1, term0, term1, &rem0, &rem1 );
1.1  ross     while ( ( (sbits64) rem0 ) < 0 ) {
1.1  ross         z -= LIT64( 0x100000000 );
1.1  ross         b1 = b<<32;
1.1  ross         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
1.1  ross     }
1.1  ross     rem0 = ( rem0<<32 ) | ( rem1>>32 );
1.1  ross     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
1.1  ross     return z;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross #ifndef SOFTFLOAT_FOR_GCC /* Not used */
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns an approximation to the square root of the 32-bit significand given
1.1  ross by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
1.1  ross `aExp' (the least significant bit) is 1, the integer returned approximates
1.1  ross 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
1.1  ross is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
1.1  ross case, the approximation returned lies strictly within +/-2 of the exact
1.1  ross value.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross static bits32 estimateSqrt32( int16 aExp, bits32 a )
1.1  ross {
1.1  ross     static const bits16 sqrtOddAdjustments[] = {
1.1  ross         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
1.1  ross         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
1.1  ross     };
1.1  ross     static const bits16 sqrtEvenAdjustments[] = {
1.1  ross         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
1.1  ross         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
1.1  ross     };
1.1  ross     int8 index;
1.1  ross     bits32 z;
1.1  ross
1.1  ross     index = ( a>>27 ) & 15;
1.1  ross     if ( aExp & 1 ) {
1.1  ross         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
1.1  ross         z = ( ( a / z )<<14 ) + ( z<<15 );
1.1  ross         a >>= 1;
1.1  ross     }
1.1  ross     else {
1.1  ross         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
1.1  ross         z = a / z + z;
1.1  ross         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
1.1  ross         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
1.1  ross     }
1.1  ross     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
1.1  ross
1.1  ross }
1.1  ross #endif
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns the number of leading 0 bits before the most-significant 1 bit of
1.1  ross `a'.  If `a' is zero, 32 is returned.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross static int8 countLeadingZeros32( bits32 a )
1.1  ross {
1.1  ross     static const int8 countLeadingZerosHigh[] = {
1.1  ross         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
1.1  ross         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1.1  ross         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1.1  ross         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1.1  ross         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.1  ross         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1.1  ross     };
1.1  ross     int8 shiftCount;
1.1  ross
1.1  ross     shiftCount = 0;
1.1  ross     if ( a < 0x10000 ) {
1.1  ross         shiftCount += 16;
1.1  ross         a <<= 16;
1.1  ross     }
1.1  ross     if ( a < 0x1000000 ) {
1.1  ross         shiftCount += 8;
1.1  ross         a <<= 8;
1.1  ross     }
1.1  ross     shiftCount += countLeadingZerosHigh[ a>>24 ];
1.1  ross     return shiftCount;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns the number of leading 0 bits before the most-significant 1 bit of
1.1  ross `a'.  If `a' is zero, 64 is returned.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross static int8 countLeadingZeros64( bits64 a )
1.1  ross {
1.1  ross     int8 shiftCount;
1.1  ross
1.1  ross     shiftCount = 0;
1.1  ross     if ( a < ( (bits64) 1 )<<32 ) {
1.1  ross         shiftCount += 32;
1.1  ross     }
1.1  ross     else {
1.1  ross         a >>= 32;
1.1  ross     }
1.1  ross     shiftCount += countLeadingZeros32( a );
1.1  ross     return shiftCount;
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
1.1  ross is equal to the 128-bit value formed by concatenating `b0' and `b1'.
1.1  ross Otherwise, returns 0.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
1.1  ross {
1.1  ross
1.1  ross     return ( a0 == b0 ) && ( a1 == b1 );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
1.1  ross than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
1.1  ross Otherwise, returns 0.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
1.1  ross {
1.1  ross
1.1  ross     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
1.1  ross than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
1.1  ross returns 0.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
1.1  ross {
1.1  ross
1.1  ross     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
1.1  ross
1.1  ross }
1.1  ross
1.1  ross /*
1.1  ross -------------------------------------------------------------------------------
1.1  ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
1.1  ross not equal to the 128-bit value formed by concatenating `b0' and `b1'.
1.1  ross Otherwise, returns 0.
1.1  ross -------------------------------------------------------------------------------
1.1  ross */
1.1  ross INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
1.1  ross {
1.1  ross
1.1  ross     return ( a0 != b0 ) || ( a1 != b1 );
1.1  ross
1.1  ross }
1.1  ross