float64.glsl revision 7e102996
17e102996Smaya/* 27e102996Smaya * The implementations contained in this file are heavily based on the 37e102996Smaya * implementations found in the Berkeley SoftFloat library. As such, they are 47e102996Smaya * licensed under the same 3-clause BSD license: 57e102996Smaya * 67e102996Smaya * License for Berkeley SoftFloat Release 3e 77e102996Smaya * 87e102996Smaya * John R. Hauser 97e102996Smaya * 2018 January 20 107e102996Smaya * 117e102996Smaya * The following applies to the whole of SoftFloat Release 3e as well as to 127e102996Smaya * each source file individually. 137e102996Smaya * 147e102996Smaya * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 157e102996Smaya * University of California. All rights reserved. 167e102996Smaya * 177e102996Smaya * Redistribution and use in source and binary forms, with or without 187e102996Smaya * modification, are permitted provided that the following conditions are met: 197e102996Smaya * 207e102996Smaya * 1. Redistributions of source code must retain the above copyright notice, 217e102996Smaya * this list of conditions, and the following disclaimer. 227e102996Smaya * 237e102996Smaya * 2. Redistributions in binary form must reproduce the above copyright 247e102996Smaya * notice, this list of conditions, and the following disclaimer in the 257e102996Smaya * documentation and/or other materials provided with the distribution. 267e102996Smaya * 277e102996Smaya * 3. Neither the name of the University nor the names of its contributors 287e102996Smaya * may be used to endorse or promote products derived from this software 297e102996Smaya * without specific prior written permission. 307e102996Smaya * 317e102996Smaya * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 327e102996Smaya * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 337e102996Smaya * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 347e102996Smaya * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 357e102996Smaya * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 367e102996Smaya * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 377e102996Smaya * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 387e102996Smaya * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 397e102996Smaya * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 407e102996Smaya * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 417e102996Smaya*/ 427e102996Smaya 437e102996Smaya#version 430 447e102996Smaya#extension GL_ARB_gpu_shader_int64 : enable 457e102996Smaya#extension GL_ARB_shader_bit_encoding : enable 467e102996Smaya#extension GL_EXT_shader_integer_mix : enable 477e102996Smaya#extension GL_MESA_shader_integer_functions : enable 487e102996Smaya 497e102996Smaya#pragma warning(off) 507e102996Smaya 517e102996Smaya/* Software IEEE floating-point rounding mode. 527e102996Smaya * GLSL spec section "4.7.1 Range and Precision": 537e102996Smaya * The rounding mode cannot be set and is undefined. 547e102996Smaya * But here, we are able to define the rounding mode at the compilation time. 557e102996Smaya */ 567e102996Smaya#define FLOAT_ROUND_NEAREST_EVEN 0 577e102996Smaya#define FLOAT_ROUND_TO_ZERO 1 587e102996Smaya#define FLOAT_ROUND_DOWN 2 597e102996Smaya#define FLOAT_ROUND_UP 3 607e102996Smaya#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN 617e102996Smaya 627e102996Smaya/* Absolute value of a Float64 : 637e102996Smaya * Clear the sign bit 647e102996Smaya */ 657e102996Smayauint64_t 667e102996Smaya__fabs64(uint64_t __a) 677e102996Smaya{ 687e102996Smaya uvec2 a = unpackUint2x32(__a); 697e102996Smaya a.y &= 0x7FFFFFFFu; 707e102996Smaya return packUint2x32(a); 717e102996Smaya} 727e102996Smaya 737e102996Smaya/* Returns 1 if the double-precision floating-point value `a' is a NaN; 747e102996Smaya * otherwise returns 0. 757e102996Smaya */ 767e102996Smayabool 777e102996Smaya__is_nan(uint64_t __a) 787e102996Smaya{ 797e102996Smaya uvec2 a = unpackUint2x32(__a); 807e102996Smaya return (0xFFE00000u <= (a.y<<1)) && 817e102996Smaya ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u)); 827e102996Smaya} 837e102996Smaya 847e102996Smaya/* Negate value of a Float64 : 857e102996Smaya * Toggle the sign bit 867e102996Smaya */ 877e102996Smayauint64_t 887e102996Smaya__fneg64(uint64_t __a) 897e102996Smaya{ 907e102996Smaya uvec2 a = unpackUint2x32(__a); 917e102996Smaya uint t = a.y; 927e102996Smaya 937e102996Smaya t ^= (1u << 31); 947e102996Smaya a.y = mix(t, a.y, __is_nan(__a)); 957e102996Smaya return packUint2x32(a); 967e102996Smaya} 977e102996Smaya 987e102996Smayauint64_t 997e102996Smaya__fsign64(uint64_t __a) 1007e102996Smaya{ 1017e102996Smaya uvec2 a = unpackUint2x32(__a); 1027e102996Smaya uvec2 retval; 1037e102996Smaya retval.x = 0u; 1047e102996Smaya retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u); 1057e102996Smaya return packUint2x32(retval); 1067e102996Smaya} 1077e102996Smaya 1087e102996Smaya/* Returns the fraction bits of the double-precision floating-point value `a'.*/ 1097e102996Smayauint 1107e102996Smaya__extractFloat64FracLo(uint64_t a) 1117e102996Smaya{ 1127e102996Smaya return unpackUint2x32(a).x; 1137e102996Smaya} 1147e102996Smaya 1157e102996Smayauint 1167e102996Smaya__extractFloat64FracHi(uint64_t a) 1177e102996Smaya{ 1187e102996Smaya return unpackUint2x32(a).y & 0x000FFFFFu; 1197e102996Smaya} 1207e102996Smaya 1217e102996Smaya/* Returns the exponent bits of the double-precision floating-point value `a'.*/ 1227e102996Smayaint 1237e102996Smaya__extractFloat64Exp(uint64_t __a) 1247e102996Smaya{ 1257e102996Smaya uvec2 a = unpackUint2x32(__a); 1267e102996Smaya return int((a.y>>20) & 0x7FFu); 1277e102996Smaya} 1287e102996Smaya 1297e102996Smayabool 1307e102996Smaya__feq64_nonnan(uint64_t __a, uint64_t __b) 1317e102996Smaya{ 1327e102996Smaya uvec2 a = unpackUint2x32(__a); 1337e102996Smaya uvec2 b = unpackUint2x32(__b); 1347e102996Smaya return (a.x == b.x) && 1357e102996Smaya ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u))); 1367e102996Smaya} 1377e102996Smaya 1387e102996Smaya/* Returns true if the double-precision floating-point value `a' is equal to the 1397e102996Smaya * corresponding value `b', and false otherwise. The comparison is performed 1407e102996Smaya * according to the IEEE Standard for Floating-Point Arithmetic. 1417e102996Smaya */ 1427e102996Smayabool 1437e102996Smaya__feq64(uint64_t a, uint64_t b) 1447e102996Smaya{ 1457e102996Smaya if (__is_nan(a) || __is_nan(b)) 1467e102996Smaya return false; 1477e102996Smaya 1487e102996Smaya return __feq64_nonnan(a, b); 1497e102996Smaya} 1507e102996Smaya 1517e102996Smaya/* Returns true if the double-precision floating-point value `a' is not equal 1527e102996Smaya * to the corresponding value `b', and false otherwise. The comparison is 1537e102996Smaya * performed according to the IEEE Standard for Floating-Point Arithmetic. 1547e102996Smaya */ 1557e102996Smayabool 1567e102996Smaya__fne64(uint64_t a, uint64_t b) 1577e102996Smaya{ 1587e102996Smaya if (__is_nan(a) || __is_nan(b)) 1597e102996Smaya return true; 1607e102996Smaya 1617e102996Smaya return !__feq64_nonnan(a, b); 1627e102996Smaya} 1637e102996Smaya 1647e102996Smaya/* Returns the sign bit of the double-precision floating-point value `a'.*/ 1657e102996Smayauint 1667e102996Smaya__extractFloat64Sign(uint64_t a) 1677e102996Smaya{ 1687e102996Smaya return unpackUint2x32(a).y >> 31; 1697e102996Smaya} 1707e102996Smaya 1717e102996Smaya/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less 1727e102996Smaya * than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 1737e102996Smaya * returns false. 1747e102996Smaya */ 1757e102996Smayabool 1767e102996Smayalt64(uint a0, uint a1, uint b0, uint b1) 1777e102996Smaya{ 1787e102996Smaya return (a0 < b0) || ((a0 == b0) && (a1 < b1)); 1797e102996Smaya} 1807e102996Smaya 1817e102996Smayabool 1827e102996Smaya__flt64_nonnan(uint64_t __a, uint64_t __b) 1837e102996Smaya{ 1847e102996Smaya uvec2 a = unpackUint2x32(__a); 1857e102996Smaya uvec2 b = unpackUint2x32(__b); 1867e102996Smaya uint aSign = __extractFloat64Sign(__a); 1877e102996Smaya uint bSign = __extractFloat64Sign(__b); 1887e102996Smaya if (aSign != bSign) 1897e102996Smaya return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u); 1907e102996Smaya 1917e102996Smaya return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u); 1927e102996Smaya} 1937e102996Smaya 1947e102996Smaya/* Returns true if the double-precision floating-point value `a' is less than 1957e102996Smaya * the corresponding value `b', and false otherwise. The comparison is performed 1967e102996Smaya * according to the IEEE Standard for Floating-Point Arithmetic. 1977e102996Smaya */ 1987e102996Smayabool 1997e102996Smaya__flt64(uint64_t a, uint64_t b) 2007e102996Smaya{ 2017e102996Smaya if (__is_nan(a) || __is_nan(b)) 2027e102996Smaya return false; 2037e102996Smaya 2047e102996Smaya return __flt64_nonnan(a, b); 2057e102996Smaya} 2067e102996Smaya 2077e102996Smaya/* Returns true if the double-precision floating-point value `a' is greater 2087e102996Smaya * than or equal to * the corresponding value `b', and false otherwise. The 2097e102996Smaya * comparison is performed * according to the IEEE Standard for Floating-Point 2107e102996Smaya * Arithmetic. 2117e102996Smaya */ 2127e102996Smayabool 2137e102996Smaya__fge64(uint64_t a, uint64_t b) 2147e102996Smaya{ 2157e102996Smaya if (__is_nan(a) || __is_nan(b)) 2167e102996Smaya return false; 2177e102996Smaya 2187e102996Smaya return !__flt64_nonnan(a, b); 2197e102996Smaya} 2207e102996Smaya 2217e102996Smaya/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit 2227e102996Smaya * value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so 2237e102996Smaya * any carry out is lost. The result is broken into two 32-bit pieces which 2247e102996Smaya * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 2257e102996Smaya */ 2267e102996Smayavoid 2277e102996Smaya__add64(uint a0, uint a1, uint b0, uint b1, 2287e102996Smaya out uint z0Ptr, 2297e102996Smaya out uint z1Ptr) 2307e102996Smaya{ 2317e102996Smaya uint z1 = a1 + b1; 2327e102996Smaya z1Ptr = z1; 2337e102996Smaya z0Ptr = a0 + b0 + uint(z1 < a1); 2347e102996Smaya} 2357e102996Smaya 2367e102996Smaya 2377e102996Smaya/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the 2387e102996Smaya * 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 2397e102996Smaya * 2^64, so any borrow out (carry out) is lost. The result is broken into two 2407e102996Smaya * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and 2417e102996Smaya * `z1Ptr'. 2427e102996Smaya */ 2437e102996Smayavoid 2447e102996Smaya__sub64(uint a0, uint a1, uint b0, uint b1, 2457e102996Smaya out uint z0Ptr, 2467e102996Smaya out uint z1Ptr) 2477e102996Smaya{ 2487e102996Smaya z1Ptr = a1 - b1; 2497e102996Smaya z0Ptr = a0 - b0 - uint(a1 < b1); 2507e102996Smaya} 2517e102996Smaya 2527e102996Smaya/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 2537e102996Smaya * number of bits given in `count'. If any nonzero bits are shifted off, they 2547e102996Smaya * are "jammed" into the least significant bit of the result by setting the 2557e102996Smaya * least significant bit to 1. The value of `count' can be arbitrarily large; 2567e102996Smaya * in particular, if `count' is greater than 64, the result will be either 0 2577e102996Smaya * or 1, depending on whether the concatenation of `a0' and `a1' is zero or 2587e102996Smaya * nonzero. The result is broken into two 32-bit pieces which are stored at 2597e102996Smaya * the locations pointed to by `z0Ptr' and `z1Ptr'. 2607e102996Smaya */ 2617e102996Smayavoid 2627e102996Smaya__shift64RightJamming(uint a0, 2637e102996Smaya uint a1, 2647e102996Smaya int count, 2657e102996Smaya out uint z0Ptr, 2667e102996Smaya out uint z1Ptr) 2677e102996Smaya{ 2687e102996Smaya uint z0; 2697e102996Smaya uint z1; 2707e102996Smaya int negCount = (-count) & 31; 2717e102996Smaya 2727e102996Smaya z0 = mix(0u, a0, count == 0); 2737e102996Smaya z0 = mix(z0, (a0 >> count), count < 32); 2747e102996Smaya 2757e102996Smaya z1 = uint((a0 | a1) != 0u); /* count >= 64 */ 2767e102996Smaya uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u); 2777e102996Smaya z1 = mix(z1, z1_lt64, count < 64); 2787e102996Smaya z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32); 2797e102996Smaya uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u); 2807e102996Smaya z1 = mix(z1, z1_lt32, count < 32); 2817e102996Smaya z1 = mix(z1, a1, count == 0); 2827e102996Smaya z1Ptr = z1; 2837e102996Smaya z0Ptr = z0; 2847e102996Smaya} 2857e102996Smaya 2867e102996Smaya/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right 2877e102996Smaya * by 32 _plus_ the number of bits given in `count'. The shifted result is 2887e102996Smaya * at most 64 nonzero bits; these are broken into two 32-bit pieces which are 2897e102996Smaya * stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 2907e102996Smaya * off form a third 32-bit result as follows: The _last_ bit shifted off is 2917e102996Smaya * the most-significant bit of the extra result, and the other 31 bits of the 2927e102996Smaya * extra result are all zero if and only if _all_but_the_last_ bits shifted off 2937e102996Smaya * were all zero. This extra result is stored in the location pointed to by 2947e102996Smaya * `z2Ptr'. The value of `count' can be arbitrarily large. 2957e102996Smaya * (This routine makes more sense if `a0', `a1', and `a2' are considered 2967e102996Smaya * to form a fixed-point value with binary point between `a1' and `a2'. This 2977e102996Smaya * fixed-point value is shifted right by the number of bits given in `count', 2987e102996Smaya * and the integer part of the result is returned at the locations pointed to 2997e102996Smaya * by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 3007e102996Smaya * corrupted as described above, and is returned at the location pointed to by 3017e102996Smaya * `z2Ptr'.) 3027e102996Smaya */ 3037e102996Smayavoid 3047e102996Smaya__shift64ExtraRightJamming(uint a0, uint a1, uint a2, 3057e102996Smaya int count, 3067e102996Smaya out uint z0Ptr, 3077e102996Smaya out uint z1Ptr, 3087e102996Smaya out uint z2Ptr) 3097e102996Smaya{ 3107e102996Smaya uint z0 = 0u; 3117e102996Smaya uint z1; 3127e102996Smaya uint z2; 3137e102996Smaya int negCount = (-count) & 31; 3147e102996Smaya 3157e102996Smaya z2 = mix(uint(a0 != 0u), a0, count == 64); 3167e102996Smaya z2 = mix(z2, a0 << negCount, count < 64); 3177e102996Smaya z2 = mix(z2, a1 << negCount, count < 32); 3187e102996Smaya 3197e102996Smaya z1 = mix(0u, (a0 >> (count & 31)), count < 64); 3207e102996Smaya z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 3217e102996Smaya 3227e102996Smaya a2 = mix(a2 | a1, a2, count < 32); 3237e102996Smaya z0 = mix(z0, a0 >> count, count < 32); 3247e102996Smaya z2 |= uint(a2 != 0u); 3257e102996Smaya 3267e102996Smaya z0 = mix(z0, 0u, (count == 32)); 3277e102996Smaya z1 = mix(z1, a0, (count == 32)); 3287e102996Smaya z2 = mix(z2, a1, (count == 32)); 3297e102996Smaya z0 = mix(z0, a0, (count == 0)); 3307e102996Smaya z1 = mix(z1, a1, (count == 0)); 3317e102996Smaya z2 = mix(z2, a2, (count == 0)); 3327e102996Smaya z2Ptr = z2; 3337e102996Smaya z1Ptr = z1; 3347e102996Smaya z0Ptr = z0; 3357e102996Smaya} 3367e102996Smaya 3377e102996Smaya/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the 3387e102996Smaya * number of bits given in `count'. Any bits shifted off are lost. The value 3397e102996Smaya * of `count' must be less than 32. The result is broken into two 32-bit 3407e102996Smaya * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 3417e102996Smaya */ 3427e102996Smayavoid 3437e102996Smaya__shortShift64Left(uint a0, uint a1, 3447e102996Smaya int count, 3457e102996Smaya out uint z0Ptr, 3467e102996Smaya out uint z1Ptr) 3477e102996Smaya{ 3487e102996Smaya z1Ptr = a1<<count; 3497e102996Smaya z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); 3507e102996Smaya} 3517e102996Smaya 3527e102996Smaya/* Packs the sign `zSign', the exponent `zExp', and the significand formed by 3537e102996Smaya * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating- 3547e102996Smaya * point value, returning the result. After being shifted into the proper 3557e102996Smaya * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added 3567e102996Smaya * together to form the most significant 32 bits of the result. This means 3577e102996Smaya * that any integer portion of `zFrac0' will be added into the exponent. Since 3587e102996Smaya * a properly normalized significand will have an integer portion equal to 1, 3597e102996Smaya * the `zExp' input should be 1 less than the desired result exponent whenever 3607e102996Smaya * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand. 3617e102996Smaya */ 3627e102996Smayauint64_t 3637e102996Smaya__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1) 3647e102996Smaya{ 3657e102996Smaya uvec2 z; 3667e102996Smaya 3677e102996Smaya z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0; 3687e102996Smaya z.x = zFrac1; 3697e102996Smaya return packUint2x32(z); 3707e102996Smaya} 3717e102996Smaya 3727e102996Smaya/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 3737e102996Smaya * and extended significand formed by the concatenation of `zFrac0', `zFrac1', 3747e102996Smaya * and `zFrac2', and returns the proper double-precision floating-point value 3757e102996Smaya * corresponding to the abstract input. Ordinarily, the abstract value is 3767e102996Smaya * simply rounded and packed into the double-precision format, with the inexact 3777e102996Smaya * exception raised if the abstract input cannot be represented exactly. 3787e102996Smaya * However, if the abstract value is too large, the overflow and inexact 3797e102996Smaya * exceptions are raised and an infinity or maximal finite value is returned. 3807e102996Smaya * If the abstract value is too small, the input value is rounded to a 3817e102996Smaya * subnormal number, and the underflow and inexact exceptions are raised if the 3827e102996Smaya * abstract input cannot be represented exactly as a subnormal double-precision 3837e102996Smaya * floating-point number. 3847e102996Smaya * The input significand must be normalized or smaller. If the input 3857e102996Smaya * significand is not normalized, `zExp' must be 0; in that case, the result 3867e102996Smaya * returned is a subnormal number, and it must not require rounding. In the 3877e102996Smaya * usual case that the input significand is normalized, `zExp' must be 1 less 3887e102996Smaya * than the "true" floating-point exponent. The handling of underflow and 3897e102996Smaya * overflow follows the IEEE Standard for Floating-Point Arithmetic. 3907e102996Smaya */ 3917e102996Smayauint64_t 3927e102996Smaya__roundAndPackFloat64(uint zSign, 3937e102996Smaya int zExp, 3947e102996Smaya uint zFrac0, 3957e102996Smaya uint zFrac1, 3967e102996Smaya uint zFrac2) 3977e102996Smaya{ 3987e102996Smaya bool roundNearestEven; 3997e102996Smaya bool increment; 4007e102996Smaya 4017e102996Smaya roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 4027e102996Smaya increment = int(zFrac2) < 0; 4037e102996Smaya if (!roundNearestEven) { 4047e102996Smaya if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 4057e102996Smaya increment = false; 4067e102996Smaya } else { 4077e102996Smaya if (zSign != 0u) { 4087e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 4097e102996Smaya (zFrac2 != 0u); 4107e102996Smaya } else { 4117e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 4127e102996Smaya (zFrac2 != 0u); 4137e102996Smaya } 4147e102996Smaya } 4157e102996Smaya } 4167e102996Smaya if (0x7FD <= zExp) { 4177e102996Smaya if ((0x7FD < zExp) || 4187e102996Smaya ((zExp == 0x7FD) && 4197e102996Smaya (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && 4207e102996Smaya increment)) { 4217e102996Smaya if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || 4227e102996Smaya ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || 4237e102996Smaya ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) { 4247e102996Smaya return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); 4257e102996Smaya } 4267e102996Smaya return __packFloat64(zSign, 0x7FF, 0u, 0u); 4277e102996Smaya } 4287e102996Smaya if (zExp < 0) { 4297e102996Smaya __shift64ExtraRightJamming( 4307e102996Smaya zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2); 4317e102996Smaya zExp = 0; 4327e102996Smaya if (roundNearestEven) { 4337e102996Smaya increment = zFrac2 < 0u; 4347e102996Smaya } else { 4357e102996Smaya if (zSign != 0u) { 4367e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 4377e102996Smaya (zFrac2 != 0u); 4387e102996Smaya } else { 4397e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 4407e102996Smaya (zFrac2 != 0u); 4417e102996Smaya } 4427e102996Smaya } 4437e102996Smaya } 4447e102996Smaya } 4457e102996Smaya if (increment) { 4467e102996Smaya __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 4477e102996Smaya zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven)); 4487e102996Smaya } else { 4497e102996Smaya zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u); 4507e102996Smaya } 4517e102996Smaya return __packFloat64(zSign, zExp, zFrac0, zFrac1); 4527e102996Smaya} 4537e102996Smaya 4547e102996Smayauint64_t 4557e102996Smaya__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 4567e102996Smaya{ 4577e102996Smaya bool roundNearestEven; 4587e102996Smaya bool increment; 4597e102996Smaya uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 4607e102996Smaya 4617e102996Smaya roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 4627e102996Smaya 4637e102996Smaya if (zFrac2 >= 0x80000000u) 4647e102996Smaya increment = false; 4657e102996Smaya 4667e102996Smaya if (!roundNearestEven) { 4677e102996Smaya if (zSign != 0u) { 4687e102996Smaya if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) { 4697e102996Smaya increment = false; 4707e102996Smaya } 4717e102996Smaya } else { 4727e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 4737e102996Smaya (zFrac2 != 0u); 4747e102996Smaya } 4757e102996Smaya } 4767e102996Smaya 4777e102996Smaya if (increment) { 4787e102996Smaya __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 4797e102996Smaya if ((zFrac0 | zFrac1) != 0u) 4807e102996Smaya zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 4817e102996Smaya } 4827e102996Smaya return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan, 4837e102996Smaya (zSign !=0u && (zFrac0 | zFrac1) != 0u)); 4847e102996Smaya} 4857e102996Smaya 4867e102996Smayaint64_t 4877e102996Smaya__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 4887e102996Smaya{ 4897e102996Smaya bool roundNearestEven; 4907e102996Smaya bool increment; 4917e102996Smaya int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 4927e102996Smaya int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 4937e102996Smaya 4947e102996Smaya roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 4957e102996Smaya 4967e102996Smaya if (zFrac2 >= 0x80000000u) 4977e102996Smaya increment = false; 4987e102996Smaya 4997e102996Smaya if (!roundNearestEven) { 5007e102996Smaya if (zSign != 0u) { 5017e102996Smaya increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 5027e102996Smaya (zFrac2 != 0u)); 5037e102996Smaya } else { 5047e102996Smaya increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 5057e102996Smaya (zFrac2 != 0u); 5067e102996Smaya } 5077e102996Smaya } 5087e102996Smaya 5097e102996Smaya if (increment) { 5107e102996Smaya __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 5117e102996Smaya if ((zFrac0 | zFrac1) != 0u) 5127e102996Smaya zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 5137e102996Smaya } 5147e102996Smaya 5157e102996Smaya int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 5167e102996Smaya -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 5177e102996Smaya (zSign != 0u)); 5187e102996Smaya int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign)); 5197e102996Smaya return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ)); 5207e102996Smaya} 5217e102996Smaya 5227e102996Smaya/* Returns the number of leading 0 bits before the most-significant 1 bit of 5237e102996Smaya * `a'. If `a' is zero, 32 is returned. 5247e102996Smaya */ 5257e102996Smayaint 5267e102996Smaya__countLeadingZeros32(uint a) 5277e102996Smaya{ 5287e102996Smaya int shiftCount; 5297e102996Smaya shiftCount = mix(31 - findMSB(a), 32, a == 0u); 5307e102996Smaya return shiftCount; 5317e102996Smaya} 5327e102996Smaya 5337e102996Smaya/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 5347e102996Smaya * and significand formed by the concatenation of `zSig0' and `zSig1', and 5357e102996Smaya * returns the proper double-precision floating-point value corresponding 5367e102996Smaya * to the abstract input. This routine is just like `__roundAndPackFloat64' 5377e102996Smaya * except that the input significand has fewer bits and does not have to be 5387e102996Smaya * normalized. In all cases, `zExp' must be 1 less than the "true" floating- 5397e102996Smaya * point exponent. 5407e102996Smaya */ 5417e102996Smayauint64_t 5427e102996Smaya__normalizeRoundAndPackFloat64(uint zSign, 5437e102996Smaya int zExp, 5447e102996Smaya uint zFrac0, 5457e102996Smaya uint zFrac1) 5467e102996Smaya{ 5477e102996Smaya int shiftCount; 5487e102996Smaya uint zFrac2; 5497e102996Smaya 5507e102996Smaya if (zFrac0 == 0u) { 5517e102996Smaya zExp -= 32; 5527e102996Smaya zFrac0 = zFrac1; 5537e102996Smaya zFrac1 = 0u; 5547e102996Smaya } 5557e102996Smaya 5567e102996Smaya shiftCount = __countLeadingZeros32(zFrac0) - 11; 5577e102996Smaya if (0 <= shiftCount) { 5587e102996Smaya zFrac2 = 0u; 5597e102996Smaya __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1); 5607e102996Smaya } else { 5617e102996Smaya __shift64ExtraRightJamming( 5627e102996Smaya zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2); 5637e102996Smaya } 5647e102996Smaya zExp -= shiftCount; 5657e102996Smaya return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 5667e102996Smaya} 5677e102996Smaya 5687e102996Smaya/* Takes two double-precision floating-point values `a' and `b', one of which 5697e102996Smaya * is a NaN, and returns the appropriate NaN result. 5707e102996Smaya */ 5717e102996Smayauint64_t 5727e102996Smaya__propagateFloat64NaN(uint64_t __a, uint64_t __b) 5737e102996Smaya{ 5747e102996Smaya bool aIsNaN = __is_nan(__a); 5757e102996Smaya bool bIsNaN = __is_nan(__b); 5767e102996Smaya uvec2 a = unpackUint2x32(__a); 5777e102996Smaya uvec2 b = unpackUint2x32(__b); 5787e102996Smaya a.y |= 0x00080000u; 5797e102996Smaya b.y |= 0x00080000u; 5807e102996Smaya 5817e102996Smaya return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN))); 5827e102996Smaya} 5837e102996Smaya 5847e102996Smaya/* Returns the result of adding the double-precision floating-point values 5857e102996Smaya * `a' and `b'. The operation is performed according to the IEEE Standard for 5867e102996Smaya * Floating-Point Arithmetic. 5877e102996Smaya */ 5887e102996Smayauint64_t 5897e102996Smaya__fadd64(uint64_t a, uint64_t b) 5907e102996Smaya{ 5917e102996Smaya uint aSign = __extractFloat64Sign(a); 5927e102996Smaya uint bSign = __extractFloat64Sign(b); 5937e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 5947e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 5957e102996Smaya uint bFracLo = __extractFloat64FracLo(b); 5967e102996Smaya uint bFracHi = __extractFloat64FracHi(b); 5977e102996Smaya int aExp = __extractFloat64Exp(a); 5987e102996Smaya int bExp = __extractFloat64Exp(b); 5997e102996Smaya uint zFrac0 = 0u; 6007e102996Smaya uint zFrac1 = 0u; 6017e102996Smaya int expDiff = aExp - bExp; 6027e102996Smaya if (aSign == bSign) { 6037e102996Smaya uint zFrac2 = 0u; 6047e102996Smaya int zExp; 6057e102996Smaya bool orig_exp_diff_is_zero = (expDiff == 0); 6067e102996Smaya 6077e102996Smaya if (orig_exp_diff_is_zero) { 6087e102996Smaya if (aExp == 0x7FF) { 6097e102996Smaya bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; 6107e102996Smaya return mix(a, __propagateFloat64NaN(a, b), propagate); 6117e102996Smaya } 6127e102996Smaya __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 6137e102996Smaya if (aExp == 0) 6147e102996Smaya return __packFloat64(aSign, 0, zFrac0, zFrac1); 6157e102996Smaya zFrac2 = 0u; 6167e102996Smaya zFrac0 |= 0x00200000u; 6177e102996Smaya zExp = aExp; 6187e102996Smaya __shift64ExtraRightJamming( 6197e102996Smaya zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 6207e102996Smaya } else if (0 < expDiff) { 6217e102996Smaya if (aExp == 0x7FF) { 6227e102996Smaya bool propagate = (aFracHi | aFracLo) != 0u; 6237e102996Smaya return mix(a, __propagateFloat64NaN(a, b), propagate); 6247e102996Smaya } 6257e102996Smaya 6267e102996Smaya expDiff = mix(expDiff, expDiff - 1, bExp == 0); 6277e102996Smaya bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0); 6287e102996Smaya __shift64ExtraRightJamming( 6297e102996Smaya bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2); 6307e102996Smaya zExp = aExp; 6317e102996Smaya } else if (expDiff < 0) { 6327e102996Smaya if (bExp == 0x7FF) { 6337e102996Smaya bool propagate = (bFracHi | bFracLo) != 0u; 6347e102996Smaya return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 6357e102996Smaya } 6367e102996Smaya expDiff = mix(expDiff, expDiff + 1, aExp == 0); 6377e102996Smaya aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0); 6387e102996Smaya __shift64ExtraRightJamming( 6397e102996Smaya aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2); 6407e102996Smaya zExp = bExp; 6417e102996Smaya } 6427e102996Smaya if (!orig_exp_diff_is_zero) { 6437e102996Smaya aFracHi |= 0x00100000u; 6447e102996Smaya __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 6457e102996Smaya --zExp; 6467e102996Smaya if (!(zFrac0 < 0x00200000u)) { 6477e102996Smaya __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 6487e102996Smaya ++zExp; 6497e102996Smaya } 6507e102996Smaya } 6517e102996Smaya return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2); 6527e102996Smaya 6537e102996Smaya } else { 6547e102996Smaya int zExp; 6557e102996Smaya 6567e102996Smaya __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo); 6577e102996Smaya __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo); 6587e102996Smaya if (0 < expDiff) { 6597e102996Smaya if (aExp == 0x7FF) { 6607e102996Smaya bool propagate = (aFracHi | aFracLo) != 0u; 6617e102996Smaya return mix(a, __propagateFloat64NaN(a, b), propagate); 6627e102996Smaya } 6637e102996Smaya expDiff = mix(expDiff, expDiff - 1, bExp == 0); 6647e102996Smaya bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0); 6657e102996Smaya __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo); 6667e102996Smaya aFracHi |= 0x40000000u; 6677e102996Smaya __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 6687e102996Smaya zExp = aExp; 6697e102996Smaya --zExp; 6707e102996Smaya return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); 6717e102996Smaya } 6727e102996Smaya if (expDiff < 0) { 6737e102996Smaya if (bExp == 0x7FF) { 6747e102996Smaya bool propagate = (bFracHi | bFracLo) != 0u; 6757e102996Smaya return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 6767e102996Smaya } 6777e102996Smaya expDiff = mix(expDiff, expDiff + 1, aExp == 0); 6787e102996Smaya aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0); 6797e102996Smaya __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo); 6807e102996Smaya bFracHi |= 0x40000000u; 6817e102996Smaya __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 6827e102996Smaya zExp = bExp; 6837e102996Smaya aSign ^= 1u; 6847e102996Smaya --zExp; 6857e102996Smaya return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); 6867e102996Smaya } 6877e102996Smaya if (aExp == 0x7FF) { 6887e102996Smaya bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; 6897e102996Smaya return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); 6907e102996Smaya } 6917e102996Smaya bExp = mix(bExp, 1, aExp == 0); 6927e102996Smaya aExp = mix(aExp, 1, aExp == 0); 6937e102996Smaya bool zexp_normal = false; 6947e102996Smaya bool blta = true; 6957e102996Smaya if (bFracHi < aFracHi) { 6967e102996Smaya __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 6977e102996Smaya zexp_normal = true; 6987e102996Smaya } 6997e102996Smaya else if (aFracHi < bFracHi) { 7007e102996Smaya __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 7017e102996Smaya blta = false; 7027e102996Smaya zexp_normal = true; 7037e102996Smaya } 7047e102996Smaya else if (bFracLo < aFracLo) { 7057e102996Smaya __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 7067e102996Smaya zexp_normal = true; 7077e102996Smaya } 7087e102996Smaya else if (aFracLo < bFracLo) { 7097e102996Smaya __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 7107e102996Smaya blta = false; 7117e102996Smaya zexp_normal = true; 7127e102996Smaya } 7137e102996Smaya zExp = mix(bExp, aExp, blta); 7147e102996Smaya aSign = mix(aSign ^ 1u, aSign, blta); 7157e102996Smaya uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u); 7167e102996Smaya uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1); 7177e102996Smaya return mix(retval_0, retval_1, zexp_normal); 7187e102996Smaya } 7197e102996Smaya} 7207e102996Smaya 7217e102996Smaya/* Multiplies `a' by `b' to obtain a 64-bit product. The product is broken 7227e102996Smaya * into two 32-bit pieces which are stored at the locations pointed to by 7237e102996Smaya * `z0Ptr' and `z1Ptr'. 7247e102996Smaya */ 7257e102996Smayavoid 7267e102996Smaya__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr) 7277e102996Smaya{ 7287e102996Smaya uint aLow = a & 0x0000FFFFu; 7297e102996Smaya uint aHigh = a>>16; 7307e102996Smaya uint bLow = b & 0x0000FFFFu; 7317e102996Smaya uint bHigh = b>>16; 7327e102996Smaya uint z1 = aLow * bLow; 7337e102996Smaya uint zMiddleA = aLow * bHigh; 7347e102996Smaya uint zMiddleB = aHigh * bLow; 7357e102996Smaya uint z0 = aHigh * bHigh; 7367e102996Smaya zMiddleA += zMiddleB; 7377e102996Smaya z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16); 7387e102996Smaya zMiddleA <<= 16; 7397e102996Smaya z1 += zMiddleA; 7407e102996Smaya z0 += uint(z1 < zMiddleA); 7417e102996Smaya z1Ptr = z1; 7427e102996Smaya z0Ptr = z0; 7437e102996Smaya} 7447e102996Smaya 7457e102996Smaya/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the 7467e102996Smaya * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit 7477e102996Smaya * product. The product is broken into four 32-bit pieces which are stored at 7487e102996Smaya * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 7497e102996Smaya */ 7507e102996Smayavoid 7517e102996Smaya__mul64To128(uint a0, uint a1, uint b0, uint b1, 7527e102996Smaya out uint z0Ptr, 7537e102996Smaya out uint z1Ptr, 7547e102996Smaya out uint z2Ptr, 7557e102996Smaya out uint z3Ptr) 7567e102996Smaya{ 7577e102996Smaya uint z0 = 0u; 7587e102996Smaya uint z1 = 0u; 7597e102996Smaya uint z2 = 0u; 7607e102996Smaya uint z3 = 0u; 7617e102996Smaya uint more1 = 0u; 7627e102996Smaya uint more2 = 0u; 7637e102996Smaya 7647e102996Smaya __mul32To64(a1, b1, z2, z3); 7657e102996Smaya __mul32To64(a1, b0, z1, more2); 7667e102996Smaya __add64(z1, more2, 0u, z2, z1, z2); 7677e102996Smaya __mul32To64(a0, b0, z0, more1); 7687e102996Smaya __add64(z0, more1, 0u, z1, z0, z1); 7697e102996Smaya __mul32To64(a0, b1, more1, more2); 7707e102996Smaya __add64(more1, more2, 0u, z2, more1, z2); 7717e102996Smaya __add64(z0, z1, 0u, more1, z0, z1); 7727e102996Smaya z3Ptr = z3; 7737e102996Smaya z2Ptr = z2; 7747e102996Smaya z1Ptr = z1; 7757e102996Smaya z0Ptr = z0; 7767e102996Smaya} 7777e102996Smaya 7787e102996Smaya/* Normalizes the subnormal double-precision floating-point value represented 7797e102996Smaya * by the denormalized significand formed by the concatenation of `aFrac0' and 7807e102996Smaya * `aFrac1'. The normalized exponent is stored at the location pointed to by 7817e102996Smaya * `zExpPtr'. The most significant 21 bits of the normalized significand are 7827e102996Smaya * stored at the location pointed to by `zFrac0Ptr', and the least significant 7837e102996Smaya * 32 bits of the normalized significand are stored at the location pointed to 7847e102996Smaya * by `zFrac1Ptr'. 7857e102996Smaya */ 7867e102996Smayavoid 7877e102996Smaya__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1, 7887e102996Smaya out int zExpPtr, 7897e102996Smaya out uint zFrac0Ptr, 7907e102996Smaya out uint zFrac1Ptr) 7917e102996Smaya{ 7927e102996Smaya int shiftCount; 7937e102996Smaya uint temp_zfrac0, temp_zfrac1; 7947e102996Smaya shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11; 7957e102996Smaya zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u); 7967e102996Smaya 7977e102996Smaya temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0); 7987e102996Smaya temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0); 7997e102996Smaya 8007e102996Smaya __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr); 8017e102996Smaya 8027e102996Smaya zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0); 8037e102996Smaya zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0); 8047e102996Smaya} 8057e102996Smaya 8067e102996Smaya/* Returns the result of multiplying the double-precision floating-point values 8077e102996Smaya * `a' and `b'. The operation is performed according to the IEEE Standard for 8087e102996Smaya * Floating-Point Arithmetic. 8097e102996Smaya */ 8107e102996Smayauint64_t 8117e102996Smaya__fmul64(uint64_t a, uint64_t b) 8127e102996Smaya{ 8137e102996Smaya uint zFrac0 = 0u; 8147e102996Smaya uint zFrac1 = 0u; 8157e102996Smaya uint zFrac2 = 0u; 8167e102996Smaya uint zFrac3 = 0u; 8177e102996Smaya int zExp; 8187e102996Smaya 8197e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 8207e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 8217e102996Smaya uint bFracLo = __extractFloat64FracLo(b); 8227e102996Smaya uint bFracHi = __extractFloat64FracHi(b); 8237e102996Smaya int aExp = __extractFloat64Exp(a); 8247e102996Smaya uint aSign = __extractFloat64Sign(a); 8257e102996Smaya int bExp = __extractFloat64Exp(b); 8267e102996Smaya uint bSign = __extractFloat64Sign(b); 8277e102996Smaya uint zSign = aSign ^ bSign; 8287e102996Smaya if (aExp == 0x7FF) { 8297e102996Smaya if (((aFracHi | aFracLo) != 0u) || 8307e102996Smaya ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) { 8317e102996Smaya return __propagateFloat64NaN(a, b); 8327e102996Smaya } 8337e102996Smaya if ((uint(bExp) | bFracHi | bFracLo) == 0u) 8347e102996Smaya return 0xFFFFFFFFFFFFFFFFUL; 8357e102996Smaya return __packFloat64(zSign, 0x7FF, 0u, 0u); 8367e102996Smaya } 8377e102996Smaya if (bExp == 0x7FF) { 8387e102996Smaya if ((bFracHi | bFracLo) != 0u) 8397e102996Smaya return __propagateFloat64NaN(a, b); 8407e102996Smaya if ((uint(aExp) | aFracHi | aFracLo) == 0u) 8417e102996Smaya return 0xFFFFFFFFFFFFFFFFUL; 8427e102996Smaya return __packFloat64(zSign, 0x7FF, 0u, 0u); 8437e102996Smaya } 8447e102996Smaya if (aExp == 0) { 8457e102996Smaya if ((aFracHi | aFracLo) == 0u) 8467e102996Smaya return __packFloat64(zSign, 0, 0u, 0u); 8477e102996Smaya __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 8487e102996Smaya } 8497e102996Smaya if (bExp == 0) { 8507e102996Smaya if ((bFracHi | bFracLo) == 0u) 8517e102996Smaya return __packFloat64(zSign, 0, 0u, 0u); 8527e102996Smaya __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo); 8537e102996Smaya } 8547e102996Smaya zExp = aExp + bExp - 0x400; 8557e102996Smaya aFracHi |= 0x00100000u; 8567e102996Smaya __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo); 8577e102996Smaya __mul64To128( 8587e102996Smaya aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3); 8597e102996Smaya __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1); 8607e102996Smaya zFrac2 |= uint(zFrac3 != 0u); 8617e102996Smaya if (0x00200000u <= zFrac0) { 8627e102996Smaya __shift64ExtraRightJamming( 8637e102996Smaya zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 8647e102996Smaya ++zExp; 8657e102996Smaya } 8667e102996Smaya return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 8677e102996Smaya} 8687e102996Smaya 8697e102996Smayauint64_t 8707e102996Smaya__ffma64(uint64_t a, uint64_t b, uint64_t c) 8717e102996Smaya{ 8727e102996Smaya return __fadd64(__fmul64(a, b), c); 8737e102996Smaya} 8747e102996Smaya 8757e102996Smaya/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 8767e102996Smaya * number of bits given in `count'. Any bits shifted off are lost. The value 8777e102996Smaya * of `count' can be arbitrarily large; in particular, if `count' is greater 8787e102996Smaya * than 64, the result will be 0. The result is broken into two 32-bit pieces 8797e102996Smaya * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 8807e102996Smaya */ 8817e102996Smayavoid 8827e102996Smaya__shift64Right(uint a0, uint a1, 8837e102996Smaya int count, 8847e102996Smaya out uint z0Ptr, 8857e102996Smaya out uint z1Ptr) 8867e102996Smaya{ 8877e102996Smaya uint z0; 8887e102996Smaya uint z1; 8897e102996Smaya int negCount = (-count) & 31; 8907e102996Smaya 8917e102996Smaya z0 = 0u; 8927e102996Smaya z0 = mix(z0, (a0 >> count), count < 32); 8937e102996Smaya z0 = mix(z0, a0, count == 0); 8947e102996Smaya 8957e102996Smaya z1 = mix(0u, (a0 >> (count & 31)), count < 64); 8967e102996Smaya z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 8977e102996Smaya z1 = mix(z1, a0, count == 0); 8987e102996Smaya 8997e102996Smaya z1Ptr = z1; 9007e102996Smaya z0Ptr = z0; 9017e102996Smaya} 9027e102996Smaya 9037e102996Smaya/* Returns the result of converting the double-precision floating-point value 9047e102996Smaya * `a' to the unsigned integer format. The conversion is performed according 9057e102996Smaya * to the IEEE Standard for Floating-Point Arithmetic. 9067e102996Smaya */ 9077e102996Smayauint 9087e102996Smaya__fp64_to_uint(uint64_t a) 9097e102996Smaya{ 9107e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 9117e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 9127e102996Smaya int aExp = __extractFloat64Exp(a); 9137e102996Smaya uint aSign = __extractFloat64Sign(a); 9147e102996Smaya 9157e102996Smaya if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u)) 9167e102996Smaya return 0xFFFFFFFFu; 9177e102996Smaya 9187e102996Smaya aFracHi |= mix(0u, 0x00100000u, aExp != 0); 9197e102996Smaya 9207e102996Smaya int shiftDist = 0x427 - aExp; 9217e102996Smaya if (0 < shiftDist) 9227e102996Smaya __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo); 9237e102996Smaya 9247e102996Smaya if ((aFracHi & 0xFFFFF000u) != 0u) 9257e102996Smaya return mix(~0u, 0u, (aSign != 0u)); 9267e102996Smaya 9277e102996Smaya uint z = 0u; 9287e102996Smaya uint zero = 0u; 9297e102996Smaya __shift64Right(aFracHi, aFracLo, 12, zero, z); 9307e102996Smaya 9317e102996Smaya uint expt = mix(~0u, 0u, (aSign != 0u)); 9327e102996Smaya 9337e102996Smaya return mix(z, expt, (aSign != 0u) && (z != 0u)); 9347e102996Smaya} 9357e102996Smaya 9367e102996Smayauint64_t 9377e102996Smaya__uint_to_fp64(uint a) 9387e102996Smaya{ 9397e102996Smaya if (a == 0u) 9407e102996Smaya return 0ul; 9417e102996Smaya 9427e102996Smaya int shiftDist = __countLeadingZeros32(a) + 21; 9437e102996Smaya 9447e102996Smaya uint aHigh = 0u; 9457e102996Smaya uint aLow = 0u; 9467e102996Smaya int negCount = (- shiftDist) & 31; 9477e102996Smaya 9487e102996Smaya aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64); 9497e102996Smaya aLow = 0u; 9507e102996Smaya aHigh = mix(aHigh, 0u, shiftDist == 0); 9517e102996Smaya aLow = mix(aLow, a, shiftDist ==0); 9527e102996Smaya aHigh = mix(aHigh, a >> negCount, shiftDist < 32); 9537e102996Smaya aLow = mix(aLow, a << shiftDist, shiftDist < 32); 9547e102996Smaya 9557e102996Smaya return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow); 9567e102996Smaya} 9577e102996Smaya 9587e102996Smayauint64_t 9597e102996Smaya__uint64_to_fp64(uint64_t a) 9607e102996Smaya{ 9617e102996Smaya if (a == 0u) 9627e102996Smaya return 0ul; 9637e102996Smaya 9647e102996Smaya uvec2 aFrac = unpackUint2x32(a); 9657e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 9667e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 9677e102996Smaya 9687e102996Smaya if ((aFracHi & 0x80000000u) != 0u) { 9697e102996Smaya __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo); 9707e102996Smaya return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u); 9717e102996Smaya } else { 9727e102996Smaya return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x); 9737e102996Smaya } 9747e102996Smaya} 9757e102996Smaya 9767e102996Smayauint64_t 9777e102996Smaya__fp64_to_uint64(uint64_t a) 9787e102996Smaya{ 9797e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 9807e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 9817e102996Smaya int aExp = __extractFloat64Exp(a); 9827e102996Smaya uint aSign = __extractFloat64Sign(a); 9837e102996Smaya uint zFrac2 = 0u; 9847e102996Smaya uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 9857e102996Smaya 9867e102996Smaya aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 9877e102996Smaya int shiftCount = 0x433 - aExp; 9887e102996Smaya 9897e102996Smaya if ( shiftCount <= 0 ) { 9907e102996Smaya if (shiftCount < -11 && aExp == 0x7FF) { 9917e102996Smaya if ((aFracHi | aFracLo) != 0u) 9927e102996Smaya return __propagateFloat64NaN(a, a); 9937e102996Smaya return mix(default_nan, a, aSign == 0u); 9947e102996Smaya } 9957e102996Smaya __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 9967e102996Smaya } else { 9977e102996Smaya __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 9987e102996Smaya aFracHi, aFracLo, zFrac2); 9997e102996Smaya } 10007e102996Smaya return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2); 10017e102996Smaya} 10027e102996Smaya 10037e102996Smayaint64_t 10047e102996Smaya__fp64_to_int64(uint64_t a) 10057e102996Smaya{ 10067e102996Smaya uint zFrac2 = 0u; 10077e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 10087e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 10097e102996Smaya int aExp = __extractFloat64Exp(a); 10107e102996Smaya uint aSign = __extractFloat64Sign(a); 10117e102996Smaya int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 10127e102996Smaya int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 10137e102996Smaya 10147e102996Smaya aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 10157e102996Smaya int shiftCount = 0x433 - aExp; 10167e102996Smaya 10177e102996Smaya if (shiftCount <= 0) { 10187e102996Smaya if (shiftCount < -11 && aExp == 0x7FF) { 10197e102996Smaya if ((aFracHi | aFracLo) != 0u) 10207e102996Smaya return default_NegNaN; 10217e102996Smaya return mix(default_NegNaN, default_PosNaN, aSign == 0u); 10227e102996Smaya } 10237e102996Smaya __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 10247e102996Smaya } else { 10257e102996Smaya __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 10267e102996Smaya aFracHi, aFracLo, zFrac2); 10277e102996Smaya } 10287e102996Smaya 10297e102996Smaya return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2); 10307e102996Smaya} 10317e102996Smaya 10327e102996Smayauint64_t 10337e102996Smaya__fp32_to_uint64(float f) 10347e102996Smaya{ 10357e102996Smaya uint a = floatBitsToUint(f); 10367e102996Smaya uint aFrac = a & 0x007FFFFFu; 10377e102996Smaya int aExp = int((a>>23) & 0xFFu); 10387e102996Smaya uint aSign = a>>31; 10397e102996Smaya uint zFrac0 = 0u; 10407e102996Smaya uint zFrac1 = 0u; 10417e102996Smaya uint zFrac2 = 0u; 10427e102996Smaya uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 10437e102996Smaya int shiftCount = 0xBE - aExp; 10447e102996Smaya 10457e102996Smaya if (shiftCount <0) { 10467e102996Smaya if (aExp == 0xFF) 10477e102996Smaya return default_nan; 10487e102996Smaya } 10497e102996Smaya 10507e102996Smaya aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0); 10517e102996Smaya __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1); 10527e102996Smaya 10537e102996Smaya if (shiftCount != 0) { 10547e102996Smaya __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount, 10557e102996Smaya zFrac0, zFrac1, zFrac2); 10567e102996Smaya } 10577e102996Smaya 10587e102996Smaya return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2); 10597e102996Smaya} 10607e102996Smaya 10617e102996Smayaint64_t 10627e102996Smaya__fp32_to_int64(float f) 10637e102996Smaya{ 10647e102996Smaya uint a = floatBitsToUint(f); 10657e102996Smaya uint aFrac = a & 0x007FFFFFu; 10667e102996Smaya int aExp = int((a>>23) & 0xFFu); 10677e102996Smaya uint aSign = a>>31; 10687e102996Smaya uint zFrac0 = 0u; 10697e102996Smaya uint zFrac1 = 0u; 10707e102996Smaya uint zFrac2 = 0u; 10717e102996Smaya int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 10727e102996Smaya int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 10737e102996Smaya int shiftCount = 0xBE - aExp; 10747e102996Smaya 10757e102996Smaya if (shiftCount <0) { 10767e102996Smaya if (aExp == 0xFF && aFrac != 0u) 10777e102996Smaya return default_NegNaN; 10787e102996Smaya return mix(default_NegNaN, default_PosNaN, aSign == 0u); 10797e102996Smaya } 10807e102996Smaya 10817e102996Smaya aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0); 10827e102996Smaya __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1); 10837e102996Smaya 10847e102996Smaya if (shiftCount != 0) { 10857e102996Smaya __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount, 10867e102996Smaya zFrac0, zFrac1, zFrac2); 10877e102996Smaya } 10887e102996Smaya 10897e102996Smaya return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2); 10907e102996Smaya} 10917e102996Smaya 10927e102996Smayauint64_t 10937e102996Smaya__int64_to_fp64(int64_t a) 10947e102996Smaya{ 10957e102996Smaya if (a==0) 10967e102996Smaya return 0ul; 10977e102996Smaya 10987e102996Smaya uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0); 10997e102996Smaya uint aFracHi = __extractFloat64FracHi(absA); 11007e102996Smaya uvec2 aFrac = unpackUint2x32(absA); 11017e102996Smaya uint zSign = uint(a < 0); 11027e102996Smaya 11037e102996Smaya if ((aFracHi & 0x80000000u) != 0u) { 11047e102996Smaya return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0); 11057e102996Smaya } 11067e102996Smaya 11077e102996Smaya return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x); 11087e102996Smaya} 11097e102996Smaya 11107e102996Smaya/* Returns the result of converting the double-precision floating-point value 11117e102996Smaya * `a' to the 32-bit two's complement integer format. The conversion is 11127e102996Smaya * performed according to the IEEE Standard for Floating-Point Arithmetic--- 11137e102996Smaya * which means in particular that the conversion is rounded according to the 11147e102996Smaya * current rounding mode. If `a' is a NaN, the largest positive integer is 11157e102996Smaya * returned. Otherwise, if the conversion overflows, the largest integer with 11167e102996Smaya * the same sign as `a' is returned. 11177e102996Smaya */ 11187e102996Smayaint 11197e102996Smaya__fp64_to_int(uint64_t a) 11207e102996Smaya{ 11217e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 11227e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 11237e102996Smaya int aExp = __extractFloat64Exp(a); 11247e102996Smaya uint aSign = __extractFloat64Sign(a); 11257e102996Smaya 11267e102996Smaya uint absZ = 0u; 11277e102996Smaya uint aFracExtra = 0u; 11287e102996Smaya int shiftCount = aExp - 0x413; 11297e102996Smaya 11307e102996Smaya if (0 <= shiftCount) { 11317e102996Smaya if (0x41E < aExp) { 11327e102996Smaya if ((aExp == 0x7FF) && bool(aFracHi | aFracLo)) 11337e102996Smaya aSign = 0u; 11347e102996Smaya return mix(0x7FFFFFFF, 0x80000000, bool(aSign)); 11357e102996Smaya } 11367e102996Smaya __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra); 11377e102996Smaya } else { 11387e102996Smaya if (aExp < 0x3FF) 11397e102996Smaya return 0; 11407e102996Smaya 11417e102996Smaya aFracHi |= 0x00100000u; 11427e102996Smaya aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo; 11437e102996Smaya absZ = aFracHi >> (- shiftCount); 11447e102996Smaya } 11457e102996Smaya 11467e102996Smaya int z = mix(int(absZ), -int(absZ), (aSign != 0u)); 11477e102996Smaya int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign)); 11487e102996Smaya return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z)); 11497e102996Smaya} 11507e102996Smaya 11517e102996Smaya/* Returns the result of converting the 32-bit two's complement integer `a' 11527e102996Smaya * to the double-precision floating-point format. The conversion is performed 11537e102996Smaya * according to the IEEE Standard for Floating-Point Arithmetic. 11547e102996Smaya */ 11557e102996Smayauint64_t 11567e102996Smaya__int_to_fp64(int a) 11577e102996Smaya{ 11587e102996Smaya uint zFrac0 = 0u; 11597e102996Smaya uint zFrac1 = 0u; 11607e102996Smaya if (a==0) 11617e102996Smaya return __packFloat64(0u, 0, 0u, 0u); 11627e102996Smaya uint zSign = uint(a < 0); 11637e102996Smaya uint absA = mix(uint(a), uint(-a), a < 0); 11647e102996Smaya int shiftCount = __countLeadingZeros32(absA) - 11; 11657e102996Smaya if (0 <= shiftCount) { 11667e102996Smaya zFrac0 = absA << shiftCount; 11677e102996Smaya zFrac1 = 0u; 11687e102996Smaya } else { 11697e102996Smaya __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1); 11707e102996Smaya } 11717e102996Smaya return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1); 11727e102996Smaya} 11737e102996Smaya 11747e102996Smayabool 11757e102996Smaya__fp64_to_bool(uint64_t a) 11767e102996Smaya{ 11777e102996Smaya return !__feq64_nonnan(__fabs64(a), 0ul); 11787e102996Smaya} 11797e102996Smaya 11807e102996Smayauint64_t 11817e102996Smaya__bool_to_fp64(bool a) 11827e102996Smaya{ 11837e102996Smaya return __int_to_fp64(int(a)); 11847e102996Smaya} 11857e102996Smaya 11867e102996Smaya/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a 11877e102996Smaya * single-precision floating-point value, returning the result. After being 11887e102996Smaya * shifted into the proper positions, the three fields are simply added 11897e102996Smaya * together to form the result. This means that any integer portion of `zSig' 11907e102996Smaya * will be added into the exponent. Since a properly normalized significand 11917e102996Smaya * will have an integer portion equal to 1, the `zExp' input should be 1 less 11927e102996Smaya * than the desired result exponent whenever `zFrac' is a complete, normalized 11937e102996Smaya * significand. 11947e102996Smaya */ 11957e102996Smayafloat 11967e102996Smaya__packFloat32(uint zSign, int zExp, uint zFrac) 11977e102996Smaya{ 11987e102996Smaya return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac); 11997e102996Smaya} 12007e102996Smaya 12017e102996Smaya/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 12027e102996Smaya * and significand `zFrac', and returns the proper single-precision floating- 12037e102996Smaya * point value corresponding to the abstract input. Ordinarily, the abstract 12047e102996Smaya * value is simply rounded and packed into the single-precision format, with 12057e102996Smaya * the inexact exception raised if the abstract input cannot be represented 12067e102996Smaya * exactly. However, if the abstract value is too large, the overflow and 12077e102996Smaya * inexact exceptions are raised and an infinity or maximal finite value is 12087e102996Smaya * returned. If the abstract value is too small, the input value is rounded to 12097e102996Smaya * a subnormal number, and the underflow and inexact exceptions are raised if 12107e102996Smaya * the abstract input cannot be represented exactly as a subnormal single- 12117e102996Smaya * precision floating-point number. 12127e102996Smaya * The input significand `zFrac' has its binary point between bits 30 12137e102996Smaya * and 29, which is 7 bits to the left of the usual location. This shifted 12147e102996Smaya * significand must be normalized or smaller. If `zFrac' is not normalized, 12157e102996Smaya * `zExp' must be 0; in that case, the result returned is a subnormal number, 12167e102996Smaya * and it must not require rounding. In the usual case that `zFrac' is 12177e102996Smaya * normalized, `zExp' must be 1 less than the "true" floating-point exponent. 12187e102996Smaya * The handling of underflow and overflow follows the IEEE Standard for 12197e102996Smaya * Floating-Point Arithmetic. 12207e102996Smaya */ 12217e102996Smayafloat 12227e102996Smaya__roundAndPackFloat32(uint zSign, int zExp, uint zFrac) 12237e102996Smaya{ 12247e102996Smaya bool roundNearestEven; 12257e102996Smaya int roundIncrement; 12267e102996Smaya int roundBits; 12277e102996Smaya 12287e102996Smaya roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 12297e102996Smaya roundIncrement = 0x40; 12307e102996Smaya if (!roundNearestEven) { 12317e102996Smaya if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 12327e102996Smaya roundIncrement = 0; 12337e102996Smaya } else { 12347e102996Smaya roundIncrement = 0x7F; 12357e102996Smaya if (zSign != 0u) { 12367e102996Smaya if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) 12377e102996Smaya roundIncrement = 0; 12387e102996Smaya } else { 12397e102996Smaya if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) 12407e102996Smaya roundIncrement = 0; 12417e102996Smaya } 12427e102996Smaya } 12437e102996Smaya } 12447e102996Smaya roundBits = int(zFrac & 0x7Fu); 12457e102996Smaya if (0xFDu <= uint(zExp)) { 12467e102996Smaya if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0)) 12477e102996Smaya return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0); 12487e102996Smaya int count = -zExp; 12497e102996Smaya bool zexp_lt0 = zExp < 0; 12507e102996Smaya uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32); 12517e102996Smaya zFrac = mix(zFrac, zFrac_lt0, zexp_lt0); 12527e102996Smaya roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0); 12537e102996Smaya zExp = mix(zExp, 0, zexp_lt0); 12547e102996Smaya } 12557e102996Smaya zFrac = (zFrac + uint(roundIncrement))>>7; 12567e102996Smaya zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven); 12577e102996Smaya 12587e102996Smaya return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac); 12597e102996Smaya} 12607e102996Smaya 12617e102996Smaya/* Returns the result of converting the double-precision floating-point value 12627e102996Smaya * `a' to the single-precision floating-point format. The conversion is 12637e102996Smaya * performed according to the IEEE Standard for Floating-Point Arithmetic. 12647e102996Smaya */ 12657e102996Smayafloat 12667e102996Smaya__fp64_to_fp32(uint64_t __a) 12677e102996Smaya{ 12687e102996Smaya uvec2 a = unpackUint2x32(__a); 12697e102996Smaya uint zFrac = 0u; 12707e102996Smaya uint allZero = 0u; 12717e102996Smaya 12727e102996Smaya uint aFracLo = __extractFloat64FracLo(__a); 12737e102996Smaya uint aFracHi = __extractFloat64FracHi(__a); 12747e102996Smaya int aExp = __extractFloat64Exp(__a); 12757e102996Smaya uint aSign = __extractFloat64Sign(__a); 12767e102996Smaya if (aExp == 0x7FF) { 12777e102996Smaya __shortShift64Left(a.y, a.x, 12, a.y, a.x); 12787e102996Smaya float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9)); 12797e102996Smaya rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u); 12807e102996Smaya return rval; 12817e102996Smaya } 12827e102996Smaya __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac); 12837e102996Smaya zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0); 12847e102996Smaya return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac); 12857e102996Smaya} 12867e102996Smaya 12877e102996Smayafloat 12887e102996Smaya__uint64_to_fp32(uint64_t __a) 12897e102996Smaya{ 12907e102996Smaya uint zFrac = 0u; 12917e102996Smaya uvec2 aFrac = unpackUint2x32(__a); 12927e102996Smaya int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u)); 12937e102996Smaya shiftCount -= mix(40, 8, aFrac.y == 0u); 12947e102996Smaya 12957e102996Smaya if (0 <= shiftCount) { 12967e102996Smaya __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x); 12977e102996Smaya bool is_zero = (aFrac.y | aFrac.x) == 0u; 12987e102996Smaya return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero); 12997e102996Smaya } 13007e102996Smaya 13017e102996Smaya shiftCount += 7; 13027e102996Smaya __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x); 13037e102996Smaya zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0); 13047e102996Smaya return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac); 13057e102996Smaya} 13067e102996Smaya 13077e102996Smayafloat 13087e102996Smaya__int64_to_fp32(int64_t __a) 13097e102996Smaya{ 13107e102996Smaya uint zFrac = 0u; 13117e102996Smaya uint aSign = uint(__a < 0); 13127e102996Smaya uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0); 13137e102996Smaya uvec2 aFrac = unpackUint2x32(absA); 13147e102996Smaya int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u)); 13157e102996Smaya shiftCount -= mix(40, 8, aFrac.y == 0u); 13167e102996Smaya 13177e102996Smaya if (0 <= shiftCount) { 13187e102996Smaya __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x); 13197e102996Smaya bool is_zero = (aFrac.y | aFrac.x) == 0u; 13207e102996Smaya return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u); 13217e102996Smaya } 13227e102996Smaya 13237e102996Smaya shiftCount += 7; 13247e102996Smaya __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x); 13257e102996Smaya zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0); 13267e102996Smaya return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac); 13277e102996Smaya} 13287e102996Smaya 13297e102996Smaya/* Returns the result of converting the single-precision floating-point value 13307e102996Smaya * `a' to the double-precision floating-point format. 13317e102996Smaya */ 13327e102996Smayauint64_t 13337e102996Smaya__fp32_to_fp64(float f) 13347e102996Smaya{ 13357e102996Smaya uint a = floatBitsToUint(f); 13367e102996Smaya uint aFrac = a & 0x007FFFFFu; 13377e102996Smaya int aExp = int((a>>23) & 0xFFu); 13387e102996Smaya uint aSign = a>>31; 13397e102996Smaya uint zFrac0 = 0u; 13407e102996Smaya uint zFrac1 = 0u; 13417e102996Smaya 13427e102996Smaya if (aExp == 0xFF) { 13437e102996Smaya if (aFrac != 0u) { 13447e102996Smaya uint nanLo = 0u; 13457e102996Smaya uint nanHi = a<<9; 13467e102996Smaya __shift64Right(nanHi, nanLo, 12, nanHi, nanLo); 13477e102996Smaya nanHi |= ((aSign<<31) | 0x7FF80000u); 13487e102996Smaya return packUint2x32(uvec2(nanLo, nanHi)); 13497e102996Smaya } 13507e102996Smaya return __packFloat64(aSign, 0x7FF, 0u, 0u); 13517e102996Smaya } 13527e102996Smaya 13537e102996Smaya if (aExp == 0) { 13547e102996Smaya if (aFrac == 0u) 13557e102996Smaya return __packFloat64(aSign, 0, 0u, 0u); 13567e102996Smaya /* Normalize subnormal */ 13577e102996Smaya int shiftCount = __countLeadingZeros32(aFrac) - 8; 13587e102996Smaya aFrac <<= shiftCount; 13597e102996Smaya aExp = 1 - shiftCount; 13607e102996Smaya --aExp; 13617e102996Smaya } 13627e102996Smaya 13637e102996Smaya __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1); 13647e102996Smaya return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1); 13657e102996Smaya} 13667e102996Smaya 13677e102996Smaya/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the 13687e102996Smaya * 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 13697e102996Smaya * modulo 2^96, so any carry out is lost. The result is broken into three 13707e102996Smaya * 32-bit pieces which are stored at the locations pointed to by `z0Ptr', 13717e102996Smaya * `z1Ptr', and `z2Ptr'. 13727e102996Smaya */ 13737e102996Smayavoid 13747e102996Smaya__add96(uint a0, uint a1, uint a2, 13757e102996Smaya uint b0, uint b1, uint b2, 13767e102996Smaya out uint z0Ptr, 13777e102996Smaya out uint z1Ptr, 13787e102996Smaya out uint z2Ptr) 13797e102996Smaya{ 13807e102996Smaya uint z2 = a2 + b2; 13817e102996Smaya uint carry1 = uint(z2 < a2); 13827e102996Smaya uint z1 = a1 + b1; 13837e102996Smaya uint carry0 = uint(z1 < a1); 13847e102996Smaya uint z0 = a0 + b0; 13857e102996Smaya z1 += carry1; 13867e102996Smaya z0 += uint(z1 < carry1); 13877e102996Smaya z0 += carry0; 13887e102996Smaya z2Ptr = z2; 13897e102996Smaya z1Ptr = z1; 13907e102996Smaya z0Ptr = z0; 13917e102996Smaya} 13927e102996Smaya 13937e102996Smaya/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from 13947e102996Smaya * the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction 13957e102996Smaya * is modulo 2^96, so any borrow out (carry out) is lost. The result is broken 13967e102996Smaya * into three 32-bit pieces which are stored at the locations pointed to by 13977e102996Smaya * `z0Ptr', `z1Ptr', and `z2Ptr'. 13987e102996Smaya */ 13997e102996Smayavoid 14007e102996Smaya__sub96(uint a0, uint a1, uint a2, 14017e102996Smaya uint b0, uint b1, uint b2, 14027e102996Smaya out uint z0Ptr, 14037e102996Smaya out uint z1Ptr, 14047e102996Smaya out uint z2Ptr) 14057e102996Smaya{ 14067e102996Smaya uint z2 = a2 - b2; 14077e102996Smaya uint borrow1 = uint(a2 < b2); 14087e102996Smaya uint z1 = a1 - b1; 14097e102996Smaya uint borrow0 = uint(a1 < b1); 14107e102996Smaya uint z0 = a0 - b0; 14117e102996Smaya z0 -= uint(z1 < borrow1); 14127e102996Smaya z1 -= borrow1; 14137e102996Smaya z0 -= borrow0; 14147e102996Smaya z2Ptr = z2; 14157e102996Smaya z1Ptr = z1; 14167e102996Smaya z0Ptr = z0; 14177e102996Smaya} 14187e102996Smaya 14197e102996Smaya/* Returns an approximation to the 32-bit integer quotient obtained by dividing 14207e102996Smaya * `b' into the 64-bit value formed by concatenating `a0' and `a1'. The 14217e102996Smaya * divisor `b' must be at least 2^31. If q is the exact quotient truncated 14227e102996Smaya * toward zero, the approximation returned lies between q and q + 2 inclusive. 14237e102996Smaya * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit 14247e102996Smaya * unsigned integer is returned. 14257e102996Smaya */ 14267e102996Smayauint 14277e102996Smaya__estimateDiv64To32(uint a0, uint a1, uint b) 14287e102996Smaya{ 14297e102996Smaya uint b0; 14307e102996Smaya uint b1; 14317e102996Smaya uint rem0 = 0u; 14327e102996Smaya uint rem1 = 0u; 14337e102996Smaya uint term0 = 0u; 14347e102996Smaya uint term1 = 0u; 14357e102996Smaya uint z; 14367e102996Smaya 14377e102996Smaya if (b <= a0) 14387e102996Smaya return 0xFFFFFFFFu; 14397e102996Smaya b0 = b>>16; 14407e102996Smaya z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16; 14417e102996Smaya __mul32To64(b, z, term0, term1); 14427e102996Smaya __sub64(a0, a1, term0, term1, rem0, rem1); 14437e102996Smaya while (int(rem0) < 0) { 14447e102996Smaya z -= 0x10000u; 14457e102996Smaya b1 = b<<16; 14467e102996Smaya __add64(rem0, rem1, b0, b1, rem0, rem1); 14477e102996Smaya } 14487e102996Smaya rem0 = (rem0<<16) | (rem1>>16); 14497e102996Smaya z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0; 14507e102996Smaya return z; 14517e102996Smaya} 14527e102996Smaya 14537e102996Smayauint 14547e102996Smaya__sqrtOddAdjustments(int index) 14557e102996Smaya{ 14567e102996Smaya uint res = 0u; 14577e102996Smaya if (index == 0) 14587e102996Smaya res = 0x0004u; 14597e102996Smaya if (index == 1) 14607e102996Smaya res = 0x0022u; 14617e102996Smaya if (index == 2) 14627e102996Smaya res = 0x005Du; 14637e102996Smaya if (index == 3) 14647e102996Smaya res = 0x00B1u; 14657e102996Smaya if (index == 4) 14667e102996Smaya res = 0x011Du; 14677e102996Smaya if (index == 5) 14687e102996Smaya res = 0x019Fu; 14697e102996Smaya if (index == 6) 14707e102996Smaya res = 0x0236u; 14717e102996Smaya if (index == 7) 14727e102996Smaya res = 0x02E0u; 14737e102996Smaya if (index == 8) 14747e102996Smaya res = 0x039Cu; 14757e102996Smaya if (index == 9) 14767e102996Smaya res = 0x0468u; 14777e102996Smaya if (index == 10) 14787e102996Smaya res = 0x0545u; 14797e102996Smaya if (index == 11) 14807e102996Smaya res = 0x631u; 14817e102996Smaya if (index == 12) 14827e102996Smaya res = 0x072Bu; 14837e102996Smaya if (index == 13) 14847e102996Smaya res = 0x0832u; 14857e102996Smaya if (index == 14) 14867e102996Smaya res = 0x0946u; 14877e102996Smaya if (index == 15) 14887e102996Smaya res = 0x0A67u; 14897e102996Smaya 14907e102996Smaya return res; 14917e102996Smaya} 14927e102996Smaya 14937e102996Smayauint 14947e102996Smaya__sqrtEvenAdjustments(int index) 14957e102996Smaya{ 14967e102996Smaya uint res = 0u; 14977e102996Smaya if (index == 0) 14987e102996Smaya res = 0x0A2Du; 14997e102996Smaya if (index == 1) 15007e102996Smaya res = 0x08AFu; 15017e102996Smaya if (index == 2) 15027e102996Smaya res = 0x075Au; 15037e102996Smaya if (index == 3) 15047e102996Smaya res = 0x0629u; 15057e102996Smaya if (index == 4) 15067e102996Smaya res = 0x051Au; 15077e102996Smaya if (index == 5) 15087e102996Smaya res = 0x0429u; 15097e102996Smaya if (index == 6) 15107e102996Smaya res = 0x0356u; 15117e102996Smaya if (index == 7) 15127e102996Smaya res = 0x029Eu; 15137e102996Smaya if (index == 8) 15147e102996Smaya res = 0x0200u; 15157e102996Smaya if (index == 9) 15167e102996Smaya res = 0x0179u; 15177e102996Smaya if (index == 10) 15187e102996Smaya res = 0x0109u; 15197e102996Smaya if (index == 11) 15207e102996Smaya res = 0x00AFu; 15217e102996Smaya if (index == 12) 15227e102996Smaya res = 0x0068u; 15237e102996Smaya if (index == 13) 15247e102996Smaya res = 0x0034u; 15257e102996Smaya if (index == 14) 15267e102996Smaya res = 0x0012u; 15277e102996Smaya if (index == 15) 15287e102996Smaya res = 0x0002u; 15297e102996Smaya 15307e102996Smaya return res; 15317e102996Smaya} 15327e102996Smaya 15337e102996Smaya/* Returns an approximation to the square root of the 32-bit significand given 15347e102996Smaya * by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 15357e102996Smaya * `aExp' (the least significant bit) is 1, the integer returned approximates 15367e102996Smaya * 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 15377e102996Smaya * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 15387e102996Smaya * case, the approximation returned lies strictly within +/-2 of the exact 15397e102996Smaya * value. 15407e102996Smaya */ 15417e102996Smayauint 15427e102996Smaya__estimateSqrt32(int aExp, uint a) 15437e102996Smaya{ 15447e102996Smaya uint z; 15457e102996Smaya 15467e102996Smaya int index = int(a>>27 & 15u); 15477e102996Smaya if ((aExp & 1) != 0) { 15487e102996Smaya z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index); 15497e102996Smaya z = ((a / z)<<14) + (z<<15); 15507e102996Smaya a >>= 1; 15517e102996Smaya } else { 15527e102996Smaya z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index); 15537e102996Smaya z = a / z + z; 15547e102996Smaya z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15); 15557e102996Smaya if (z <= a) 15567e102996Smaya return uint(int(a)>>1); 15577e102996Smaya } 15587e102996Smaya return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1); 15597e102996Smaya} 15607e102996Smaya 15617e102996Smaya/* Returns the square root of the double-precision floating-point value `a'. 15627e102996Smaya * The operation is performed according to the IEEE Standard for Floating-Point 15637e102996Smaya * Arithmetic. 15647e102996Smaya */ 15657e102996Smayauint64_t 15667e102996Smaya__fsqrt64(uint64_t a) 15677e102996Smaya{ 15687e102996Smaya uint zFrac0 = 0u; 15697e102996Smaya uint zFrac1 = 0u; 15707e102996Smaya uint zFrac2 = 0u; 15717e102996Smaya uint doubleZFrac0 = 0u; 15727e102996Smaya uint rem0 = 0u; 15737e102996Smaya uint rem1 = 0u; 15747e102996Smaya uint rem2 = 0u; 15757e102996Smaya uint rem3 = 0u; 15767e102996Smaya uint term0 = 0u; 15777e102996Smaya uint term1 = 0u; 15787e102996Smaya uint term2 = 0u; 15797e102996Smaya uint term3 = 0u; 15807e102996Smaya uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 15817e102996Smaya 15827e102996Smaya uint aFracLo = __extractFloat64FracLo(a); 15837e102996Smaya uint aFracHi = __extractFloat64FracHi(a); 15847e102996Smaya int aExp = __extractFloat64Exp(a); 15857e102996Smaya uint aSign = __extractFloat64Sign(a); 15867e102996Smaya if (aExp == 0x7FF) { 15877e102996Smaya if ((aFracHi | aFracLo) != 0u) 15887e102996Smaya return __propagateFloat64NaN(a, a); 15897e102996Smaya if (aSign == 0u) 15907e102996Smaya return a; 15917e102996Smaya return default_nan; 15927e102996Smaya } 15937e102996Smaya if (aSign != 0u) { 15947e102996Smaya if ((uint(aExp) | aFracHi | aFracLo) == 0u) 15957e102996Smaya return a; 15967e102996Smaya return default_nan; 15977e102996Smaya } 15987e102996Smaya if (aExp == 0) { 15997e102996Smaya if ((aFracHi | aFracLo) == 0u) 16007e102996Smaya return __packFloat64(0u, 0, 0u, 0u); 16017e102996Smaya __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 16027e102996Smaya } 16037e102996Smaya int zExp = ((aExp - 0x3FF)>>1) + 0x3FE; 16047e102996Smaya aFracHi |= 0x00100000u; 16057e102996Smaya __shortShift64Left(aFracHi, aFracLo, 11, term0, term1); 16067e102996Smaya zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u; 16077e102996Smaya if (zFrac0 == 0u) 16087e102996Smaya zFrac0 = 0x7FFFFFFFu; 16097e102996Smaya doubleZFrac0 = zFrac0 + zFrac0; 16107e102996Smaya __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo); 16117e102996Smaya __mul32To64(zFrac0, zFrac0, term0, term1); 16127e102996Smaya __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1); 16137e102996Smaya while (int(rem0) < 0) { 16147e102996Smaya --zFrac0; 16157e102996Smaya doubleZFrac0 -= 2u; 16167e102996Smaya __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1); 16177e102996Smaya } 16187e102996Smaya zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0); 16197e102996Smaya if ((zFrac1 & 0x1FFu) <= 5u) { 16207e102996Smaya if (zFrac1 == 0u) 16217e102996Smaya zFrac1 = 1u; 16227e102996Smaya __mul32To64(doubleZFrac0, zFrac1, term1, term2); 16237e102996Smaya __sub64(rem1, 0u, term1, term2, rem1, rem2); 16247e102996Smaya __mul32To64(zFrac1, zFrac1, term2, term3); 16257e102996Smaya __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3); 16267e102996Smaya while (int(rem1) < 0) { 16277e102996Smaya --zFrac1; 16287e102996Smaya __shortShift64Left(0u, zFrac1, 1, term2, term3); 16297e102996Smaya term3 |= 1u; 16307e102996Smaya term2 |= doubleZFrac0; 16317e102996Smaya __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3); 16327e102996Smaya } 16337e102996Smaya zFrac1 |= uint((rem1 | rem2 | rem3) != 0u); 16347e102996Smaya } 16357e102996Smaya __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2); 16367e102996Smaya return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2); 16377e102996Smaya} 16387e102996Smaya 16397e102996Smayauint64_t 16407e102996Smaya__ftrunc64(uint64_t __a) 16417e102996Smaya{ 16427e102996Smaya uvec2 a = unpackUint2x32(__a); 16437e102996Smaya int aExp = __extractFloat64Exp(__a); 16447e102996Smaya uint zLo; 16457e102996Smaya uint zHi; 16467e102996Smaya 16477e102996Smaya int unbiasedExp = aExp - 1023; 16487e102996Smaya int fracBits = 52 - unbiasedExp; 16497e102996Smaya uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32); 16507e102996Smaya uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33); 16517e102996Smaya zLo = maskLo & a.x; 16527e102996Smaya zHi = maskHi & a.y; 16537e102996Smaya 16547e102996Smaya zLo = mix(zLo, 0u, unbiasedExp < 0); 16557e102996Smaya zHi = mix(zHi, 0u, unbiasedExp < 0); 16567e102996Smaya zLo = mix(zLo, a.x, unbiasedExp > 52); 16577e102996Smaya zHi = mix(zHi, a.y, unbiasedExp > 52); 16587e102996Smaya return packUint2x32(uvec2(zLo, zHi)); 16597e102996Smaya} 16607e102996Smaya 16617e102996Smayauint64_t 16627e102996Smaya__ffloor64(uint64_t a) 16637e102996Smaya{ 16647e102996Smaya bool is_positive = __fge64(a, 0ul); 16657e102996Smaya uint64_t tr = __ftrunc64(a); 16667e102996Smaya 16677e102996Smaya if (is_positive || __feq64(tr, a)) { 16687e102996Smaya return tr; 16697e102996Smaya } else { 16707e102996Smaya return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */); 16717e102996Smaya } 16727e102996Smaya} 16737e102996Smaya 16747e102996Smayauint64_t 16757e102996Smaya__fround64(uint64_t __a) 16767e102996Smaya{ 16777e102996Smaya uvec2 a = unpackUint2x32(__a); 16787e102996Smaya int unbiasedExp = __extractFloat64Exp(__a) - 1023; 16797e102996Smaya uint aHi = a.y; 16807e102996Smaya uint aLo = a.x; 16817e102996Smaya 16827e102996Smaya if (unbiasedExp < 20) { 16837e102996Smaya if (unbiasedExp < 0) { 16847e102996Smaya if ((aHi & 0x80000000u) != 0u && aLo == 0u) { 16857e102996Smaya return 0; 16867e102996Smaya } 16877e102996Smaya aHi &= 0x80000000u; 16887e102996Smaya if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) { 16897e102996Smaya aLo = 0u; 16907e102996Smaya return packUint2x32(uvec2(aLo, aHi)); 16917e102996Smaya } 16927e102996Smaya aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1); 16937e102996Smaya aLo = 0u; 16947e102996Smaya } else { 16957e102996Smaya uint maskExp = 0x000FFFFFu >> unbiasedExp; 16967e102996Smaya uint lastBit = maskExp + 1; 16977e102996Smaya aHi += 0x00080000u >> unbiasedExp; 16987e102996Smaya if ((aHi & maskExp) == 0u) 16997e102996Smaya aHi &= ~lastBit; 17007e102996Smaya aHi &= ~maskExp; 17017e102996Smaya aLo = 0u; 17027e102996Smaya } 17037e102996Smaya } else if (unbiasedExp > 51 || unbiasedExp == 1024) { 17047e102996Smaya return __a; 17057e102996Smaya } else { 17067e102996Smaya uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20); 17077e102996Smaya if ((aLo & maskExp) == 0u) 17087e102996Smaya return __a; 17097e102996Smaya uint tmp = aLo + (1u << (51 - unbiasedExp)); 17107e102996Smaya if(tmp < aLo) 17117e102996Smaya aHi += 1u; 17127e102996Smaya aLo = tmp; 17137e102996Smaya aLo &= ~maskExp; 17147e102996Smaya } 17157e102996Smaya 17167e102996Smaya return packUint2x32(uvec2(aLo, aHi)); 17177e102996Smaya} 17187e102996Smaya 17197e102996Smayauint64_t 17207e102996Smaya__fmin64(uint64_t a, uint64_t b) 17217e102996Smaya{ 17227e102996Smaya if (__is_nan(a)) return b; 17237e102996Smaya if (__is_nan(b)) return a; 17247e102996Smaya 17257e102996Smaya if (__flt64_nonnan(a, b)) return a; 17267e102996Smaya return b; 17277e102996Smaya} 17287e102996Smaya 17297e102996Smayauint64_t 17307e102996Smaya__fmax64(uint64_t a, uint64_t b) 17317e102996Smaya{ 17327e102996Smaya if (__is_nan(a)) return b; 17337e102996Smaya if (__is_nan(b)) return a; 17347e102996Smaya 17357e102996Smaya if (__flt64_nonnan(a, b)) return b; 17367e102996Smaya return a; 17377e102996Smaya} 17387e102996Smaya 17397e102996Smayauint64_t 17407e102996Smaya__ffract64(uint64_t a) 17417e102996Smaya{ 17427e102996Smaya return __fadd64(a, __fneg64(__ffloor64(a))); 17437e102996Smaya} 1744