1static const char float64_source[] = 2 "/*\n" 3 " * The implementations contained in this file are heavily based on the\n" 4 " * implementations found in the Berkeley SoftFloat library. As such, they are\n" 5 " * licensed under the same 3-clause BSD license:\n" 6 " *\n" 7 " * License for Berkeley SoftFloat Release 3e\n" 8 " *\n" 9 " * John R. Hauser\n" 10 " * 2018 January 20\n" 11 " *\n" 12 " * The following applies to the whole of SoftFloat Release 3e as well as to\n" 13 " * each source file individually.\n" 14 " *\n" 15 " * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the\n" 16 " * University of California. All rights reserved.\n" 17 " *\n" 18 " * Redistribution and use in source and binary forms, with or without\n" 19 " * modification, are permitted provided that the following conditions are met:\n" 20 " *\n" 21 " * 1. Redistributions of source code must retain the above copyright notice,\n" 22 " * this list of conditions, and the following disclaimer.\n" 23 " *\n" 24 " * 2. Redistributions in binary form must reproduce the above copyright\n" 25 " * notice, this list of conditions, and the following disclaimer in the\n" 26 " * documentation and/or other materials provided with the distribution.\n" 27 " *\n" 28 " * 3. Neither the name of the University nor the names of its contributors\n" 29 " * may be used to endorse or promote products derived from this software\n" 30 " * without specific prior written permission.\n" 31 " *\n" 32 " * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS \"AS IS\", AND ANY\n" 33 " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n" 34 " * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE\n" 35 " * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY\n" 36 " * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n" 37 " * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n" 38 " * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n" 39 " * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" 40 " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n" 41 " * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" 42 "*/\n" 43 "\n" 44 "#version 430\n" 45 "#extension GL_ARB_gpu_shader_int64 : enable\n" 46 "#extension GL_ARB_shader_bit_encoding : enable\n" 47 "#extension GL_EXT_shader_integer_mix : enable\n" 48 "#extension GL_MESA_shader_integer_functions : enable\n" 49 "\n" 50 "#pragma warning(off)\n" 51 "\n" 52 "/* Software IEEE floating-point rounding mode.\n" 53 " * GLSL spec section \"4.7.1 Range and Precision\":\n" 54 " * The rounding mode cannot be set and is undefined.\n" 55 " * But here, we are able to define the rounding mode at the compilation time.\n" 56 " */\n" 57 "#define FLOAT_ROUND_NEAREST_EVEN 0\n" 58 "#define FLOAT_ROUND_TO_ZERO 1\n" 59 "#define FLOAT_ROUND_DOWN 2\n" 60 "#define FLOAT_ROUND_UP 3\n" 61 "#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN\n" 62 "\n" 63 "/* Absolute value of a Float64 :\n" 64 " * Clear the sign bit\n" 65 " */\n" 66 "uint64_t\n" 67 "__fabs64(uint64_t __a)\n" 68 "{\n" 69 " uvec2 a = unpackUint2x32(__a);\n" 70 " a.y &= 0x7FFFFFFFu;\n" 71 " return packUint2x32(a);\n" 72 "}\n" 73 "\n" 74 "/* Returns 1 if the double-precision floating-point value `a' is a NaN;\n" 75 " * otherwise returns 0.\n" 76 " */\n" 77 "bool\n" 78 "__is_nan(uint64_t __a)\n" 79 "{\n" 80 " uvec2 a = unpackUint2x32(__a);\n" 81 " return (0xFFE00000u <= (a.y<<1)) &&\n" 82 " ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u));\n" 83 "}\n" 84 "\n" 85 "/* Negate value of a Float64 :\n" 86 " * Toggle the sign bit\n" 87 " */\n" 88 "uint64_t\n" 89 "__fneg64(uint64_t __a)\n" 90 "{\n" 91 " uvec2 a = unpackUint2x32(__a);\n" 92 " uint t = a.y;\n" 93 "\n" 94 " t ^= (1u << 31);\n" 95 " a.y = mix(t, a.y, __is_nan(__a));\n" 96 " return packUint2x32(a);\n" 97 "}\n" 98 "\n" 99 "uint64_t\n" 100 "__fsign64(uint64_t __a)\n" 101 "{\n" 102 " uvec2 a = unpackUint2x32(__a);\n" 103 " uvec2 retval;\n" 104 " retval.x = 0u;\n" 105 " retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u);\n" 106 " return packUint2x32(retval);\n" 107 "}\n" 108 "\n" 109 "/* Returns the fraction bits of the double-precision floating-point value `a'.*/\n" 110 "uint\n" 111 "__extractFloat64FracLo(uint64_t a)\n" 112 "{\n" 113 " return unpackUint2x32(a).x;\n" 114 "}\n" 115 "\n" 116 "uint\n" 117 "__extractFloat64FracHi(uint64_t a)\n" 118 "{\n" 119 " return unpackUint2x32(a).y & 0x000FFFFFu;\n" 120 "}\n" 121 "\n" 122 "/* Returns the exponent bits of the double-precision floating-point value `a'.*/\n" 123 "int\n" 124 "__extractFloat64Exp(uint64_t __a)\n" 125 "{\n" 126 " uvec2 a = unpackUint2x32(__a);\n" 127 " return int((a.y>>20) & 0x7FFu);\n" 128 "}\n" 129 "\n" 130 "bool\n" 131 "__feq64_nonnan(uint64_t __a, uint64_t __b)\n" 132 "{\n" 133 " uvec2 a = unpackUint2x32(__a);\n" 134 " uvec2 b = unpackUint2x32(__b);\n" 135 " return (a.x == b.x) &&\n" 136 " ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u)));\n" 137 "}\n" 138 "\n" 139 "/* Returns true if the double-precision floating-point value `a' is equal to the\n" 140 " * corresponding value `b', and false otherwise. The comparison is performed\n" 141 " * according to the IEEE Standard for Floating-Point Arithmetic.\n" 142 " */\n" 143 "bool\n" 144 "__feq64(uint64_t a, uint64_t b)\n" 145 "{\n" 146 " if (__is_nan(a) || __is_nan(b))\n" 147 " return false;\n" 148 "\n" 149 " return __feq64_nonnan(a, b);\n" 150 "}\n" 151 "\n" 152 "/* Returns true if the double-precision floating-point value `a' is not equal\n" 153 " * to the corresponding value `b', and false otherwise. The comparison is\n" 154 " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n" 155 " */\n" 156 "bool\n" 157 "__fne64(uint64_t a, uint64_t b)\n" 158 "{\n" 159 " if (__is_nan(a) || __is_nan(b))\n" 160 " return true;\n" 161 "\n" 162 " return !__feq64_nonnan(a, b);\n" 163 "}\n" 164 "\n" 165 "/* Returns the sign bit of the double-precision floating-point value `a'.*/\n" 166 "uint\n" 167 "__extractFloat64Sign(uint64_t a)\n" 168 "{\n" 169 " return unpackUint2x32(a).y >> 31;\n" 170 "}\n" 171 "\n" 172 "/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less\n" 173 " * than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,\n" 174 " * returns false.\n" 175 " */\n" 176 "bool\n" 177 "lt64(uint a0, uint a1, uint b0, uint b1)\n" 178 "{\n" 179 " return (a0 < b0) || ((a0 == b0) && (a1 < b1));\n" 180 "}\n" 181 "\n" 182 "bool\n" 183 "__flt64_nonnan(uint64_t __a, uint64_t __b)\n" 184 "{\n" 185 " uvec2 a = unpackUint2x32(__a);\n" 186 " uvec2 b = unpackUint2x32(__b);\n" 187 " uint aSign = __extractFloat64Sign(__a);\n" 188 " uint bSign = __extractFloat64Sign(__b);\n" 189 " if (aSign != bSign)\n" 190 " return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);\n" 191 "\n" 192 " return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);\n" 193 "}\n" 194 "\n" 195 "/* Returns true if the double-precision floating-point value `a' is less than\n" 196 " * the corresponding value `b', and false otherwise. The comparison is performed\n" 197 " * according to the IEEE Standard for Floating-Point Arithmetic.\n" 198 " */\n" 199 "bool\n" 200 "__flt64(uint64_t a, uint64_t b)\n" 201 "{\n" 202 " if (__is_nan(a) || __is_nan(b))\n" 203 " return false;\n" 204 "\n" 205 " return __flt64_nonnan(a, b);\n" 206 "}\n" 207 "\n" 208 "/* Returns true if the double-precision floating-point value `a' is greater\n" 209 " * than or equal to * the corresponding value `b', and false otherwise. The\n" 210 " * comparison is performed * according to the IEEE Standard for Floating-Point\n" 211 " * Arithmetic.\n" 212 " */\n" 213 "bool\n" 214 "__fge64(uint64_t a, uint64_t b)\n" 215 "{\n" 216 " if (__is_nan(a) || __is_nan(b))\n" 217 " return false;\n" 218 "\n" 219 " return !__flt64_nonnan(a, b);\n" 220 "}\n" 221 "\n" 222 "/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit\n" 223 " * value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so\n" 224 " * any carry out is lost. The result is broken into two 32-bit pieces which\n" 225 " * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n" 226 " */\n" 227 "void\n" 228 "__add64(uint a0, uint a1, uint b0, uint b1,\n" 229 " out uint z0Ptr,\n" 230 " out uint z1Ptr)\n" 231 "{\n" 232 " uint z1 = a1 + b1;\n" 233 " z1Ptr = z1;\n" 234 " z0Ptr = a0 + b0 + uint(z1 < a1);\n" 235 "}\n" 236 "\n" 237 "\n" 238 "/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the\n" 239 " * 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo\n" 240 " * 2^64, so any borrow out (carry out) is lost. The result is broken into two\n" 241 " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and\n" 242 " * `z1Ptr'.\n" 243 " */\n" 244 "void\n" 245 "__sub64(uint a0, uint a1, uint b0, uint b1,\n" 246 " out uint z0Ptr,\n" 247 " out uint z1Ptr)\n" 248 "{\n" 249 " z1Ptr = a1 - b1;\n" 250 " z0Ptr = a0 - b0 - uint(a1 < b1);\n" 251 "}\n" 252 "\n" 253 "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n" 254 " * number of bits given in `count'. If any nonzero bits are shifted off, they\n" 255 " * are \"jammed\" into the least significant bit of the result by setting the\n" 256 " * least significant bit to 1. The value of `count' can be arbitrarily large;\n" 257 " * in particular, if `count' is greater than 64, the result will be either 0\n" 258 " * or 1, depending on whether the concatenation of `a0' and `a1' is zero or\n" 259 " * nonzero. The result is broken into two 32-bit pieces which are stored at\n" 260 " * the locations pointed to by `z0Ptr' and `z1Ptr'.\n" 261 " */\n" 262 "void\n" 263 "__shift64RightJamming(uint a0,\n" 264 " uint a1,\n" 265 " int count,\n" 266 " out uint z0Ptr,\n" 267 " out uint z1Ptr)\n" 268 "{\n" 269 " uint z0;\n" 270 " uint z1;\n" 271 " int negCount = (-count) & 31;\n" 272 "\n" 273 " z0 = mix(0u, a0, count == 0);\n" 274 " z0 = mix(z0, (a0 >> count), count < 32);\n" 275 "\n" 276 " z1 = uint((a0 | a1) != 0u); /* count >= 64 */\n" 277 " uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u);\n" 278 " z1 = mix(z1, z1_lt64, count < 64);\n" 279 " z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32);\n" 280 " uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u);\n" 281 " z1 = mix(z1, z1_lt32, count < 32);\n" 282 " z1 = mix(z1, a1, count == 0);\n" 283 " z1Ptr = z1;\n" 284 " z0Ptr = z0;\n" 285 "}\n" 286 "\n" 287 "/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right\n" 288 " * by 32 _plus_ the number of bits given in `count'. The shifted result is\n" 289 " * at most 64 nonzero bits; these are broken into two 32-bit pieces which are\n" 290 " * stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted\n" 291 " * off form a third 32-bit result as follows: The _last_ bit shifted off is\n" 292 " * the most-significant bit of the extra result, and the other 31 bits of the\n" 293 " * extra result are all zero if and only if _all_but_the_last_ bits shifted off\n" 294 " * were all zero. This extra result is stored in the location pointed to by\n" 295 " * `z2Ptr'. The value of `count' can be arbitrarily large.\n" 296 " * (This routine makes more sense if `a0', `a1', and `a2' are considered\n" 297 " * to form a fixed-point value with binary point between `a1' and `a2'. This\n" 298 " * fixed-point value is shifted right by the number of bits given in `count',\n" 299 " * and the integer part of the result is returned at the locations pointed to\n" 300 " * by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly\n" 301 " * corrupted as described above, and is returned at the location pointed to by\n" 302 " * `z2Ptr'.)\n" 303 " */\n" 304 "void\n" 305 "__shift64ExtraRightJamming(uint a0, uint a1, uint a2,\n" 306 " int count,\n" 307 " out uint z0Ptr,\n" 308 " out uint z1Ptr,\n" 309 " out uint z2Ptr)\n" 310 "{\n" 311 " uint z0 = 0u;\n" 312 " uint z1;\n" 313 " uint z2;\n" 314 " int negCount = (-count) & 31;\n" 315 "\n" 316 " z2 = mix(uint(a0 != 0u), a0, count == 64);\n" 317 " z2 = mix(z2, a0 << negCount, count < 64);\n" 318 " z2 = mix(z2, a1 << negCount, count < 32);\n" 319 "\n" 320 " z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n" 321 " z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n" 322 "\n" 323 " a2 = mix(a2 | a1, a2, count < 32);\n" 324 " z0 = mix(z0, a0 >> count, count < 32);\n" 325 " z2 |= uint(a2 != 0u);\n" 326 "\n" 327 " z0 = mix(z0, 0u, (count == 32));\n" 328 " z1 = mix(z1, a0, (count == 32));\n" 329 " z2 = mix(z2, a1, (count == 32));\n" 330 " z0 = mix(z0, a0, (count == 0));\n" 331 " z1 = mix(z1, a1, (count == 0));\n" 332 " z2 = mix(z2, a2, (count == 0));\n" 333 " z2Ptr = z2;\n" 334 " z1Ptr = z1;\n" 335 " z0Ptr = z0;\n" 336 "}\n" 337 "\n" 338 "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the\n" 339 " * number of bits given in `count'. Any bits shifted off are lost. The value\n" 340 " * of `count' must be less than 32. The result is broken into two 32-bit\n" 341 " * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n" 342 " */\n" 343 "void\n" 344 "__shortShift64Left(uint a0, uint a1,\n" 345 " int count,\n" 346 " out uint z0Ptr,\n" 347 " out uint z1Ptr)\n" 348 "{\n" 349 " z1Ptr = a1<<count;\n" 350 " z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0);\n" 351 "}\n" 352 "\n" 353 "/* Packs the sign `zSign', the exponent `zExp', and the significand formed by\n" 354 " * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-\n" 355 " * point value, returning the result. After being shifted into the proper\n" 356 " * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added\n" 357 " * together to form the most significant 32 bits of the result. This means\n" 358 " * that any integer portion of `zFrac0' will be added into the exponent. Since\n" 359 " * a properly normalized significand will have an integer portion equal to 1,\n" 360 " * the `zExp' input should be 1 less than the desired result exponent whenever\n" 361 " * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.\n" 362 " */\n" 363 "uint64_t\n" 364 "__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)\n" 365 "{\n" 366 " uvec2 z;\n" 367 "\n" 368 " z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;\n" 369 " z.x = zFrac1;\n" 370 " return packUint2x32(z);\n" 371 "}\n" 372 "\n" 373 "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n" 374 " * and extended significand formed by the concatenation of `zFrac0', `zFrac1',\n" 375 " * and `zFrac2', and returns the proper double-precision floating-point value\n" 376 " * corresponding to the abstract input. Ordinarily, the abstract value is\n" 377 " * simply rounded and packed into the double-precision format, with the inexact\n" 378 " * exception raised if the abstract input cannot be represented exactly.\n" 379 " * However, if the abstract value is too large, the overflow and inexact\n" 380 " * exceptions are raised and an infinity or maximal finite value is returned.\n" 381 " * If the abstract value is too small, the input value is rounded to a\n" 382 " * subnormal number, and the underflow and inexact exceptions are raised if the\n" 383 " * abstract input cannot be represented exactly as a subnormal double-precision\n" 384 " * floating-point number.\n" 385 " * The input significand must be normalized or smaller. If the input\n" 386 " * significand is not normalized, `zExp' must be 0; in that case, the result\n" 387 " * returned is a subnormal number, and it must not require rounding. In the\n" 388 " * usual case that the input significand is normalized, `zExp' must be 1 less\n" 389 " * than the \"true\" floating-point exponent. The handling of underflow and\n" 390 " * overflow follows the IEEE Standard for Floating-Point Arithmetic.\n" 391 " */\n" 392 "uint64_t\n" 393 "__roundAndPackFloat64(uint zSign,\n" 394 " int zExp,\n" 395 " uint zFrac0,\n" 396 " uint zFrac1,\n" 397 " uint zFrac2)\n" 398 "{\n" 399 " bool roundNearestEven;\n" 400 " bool increment;\n" 401 "\n" 402 " roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n" 403 " increment = int(zFrac2) < 0;\n" 404 " if (!roundNearestEven) {\n" 405 " if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n" 406 " increment = false;\n" 407 " } else {\n" 408 " if (zSign != 0u) {\n" 409 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n" 410 " (zFrac2 != 0u);\n" 411 " } else {\n" 412 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n" 413 " (zFrac2 != 0u);\n" 414 " }\n" 415 " }\n" 416 " }\n" 417 " if (0x7FD <= zExp) {\n" 418 " if ((0x7FD < zExp) ||\n" 419 " ((zExp == 0x7FD) &&\n" 420 " (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) &&\n" 421 " increment)) {\n" 422 " if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) ||\n" 423 " ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) ||\n" 424 " ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) {\n" 425 " return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu);\n" 426 " }\n" 427 " return __packFloat64(zSign, 0x7FF, 0u, 0u);\n" 428 " }\n" 429 " if (zExp < 0) {\n" 430 " __shift64ExtraRightJamming(\n" 431 " zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);\n" 432 " zExp = 0;\n" 433 " if (roundNearestEven) {\n" 434 " increment = zFrac2 < 0u;\n" 435 " } else {\n" 436 " if (zSign != 0u) {\n" 437 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n" 438 " (zFrac2 != 0u);\n" 439 " } else {\n" 440 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n" 441 " (zFrac2 != 0u);\n" 442 " }\n" 443 " }\n" 444 " }\n" 445 " }\n" 446 " if (increment) {\n" 447 " __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n" 448 " zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));\n" 449 " } else {\n" 450 " zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u);\n" 451 " }\n" 452 " return __packFloat64(zSign, zExp, zFrac0, zFrac1);\n" 453 "}\n" 454 "\n" 455 "uint64_t\n" 456 "__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n" 457 "{\n" 458 " bool roundNearestEven;\n" 459 " bool increment;\n" 460 " uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n" 461 "\n" 462 " roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n" 463 "\n" 464 " if (zFrac2 >= 0x80000000u)\n" 465 " increment = false;\n" 466 "\n" 467 " if (!roundNearestEven) {\n" 468 " if (zSign != 0u) {\n" 469 " if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {\n" 470 " increment = false;\n" 471 " }\n" 472 " } else {\n" 473 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n" 474 " (zFrac2 != 0u);\n" 475 " }\n" 476 " }\n" 477 "\n" 478 " if (increment) {\n" 479 " __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n" 480 " if ((zFrac0 | zFrac1) != 0u)\n" 481 " zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n" 482 " }\n" 483 " return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,\n" 484 " (zSign !=0u && (zFrac0 | zFrac1) != 0u));\n" 485 "}\n" 486 "\n" 487 "int64_t\n" 488 "__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n" 489 "{\n" 490 " bool roundNearestEven;\n" 491 " bool increment;\n" 492 " int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n" 493 " int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n" 494 "\n" 495 " roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n" 496 "\n" 497 " if (zFrac2 >= 0x80000000u)\n" 498 " increment = false;\n" 499 "\n" 500 " if (!roundNearestEven) {\n" 501 " if (zSign != 0u) {\n" 502 " increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n" 503 " (zFrac2 != 0u));\n" 504 " } else {\n" 505 " increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n" 506 " (zFrac2 != 0u);\n" 507 " }\n" 508 " }\n" 509 "\n" 510 " if (increment) {\n" 511 " __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n" 512 " if ((zFrac0 | zFrac1) != 0u)\n" 513 " zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n" 514 " }\n" 515 "\n" 516 " int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n" 517 " -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n" 518 " (zSign != 0u));\n" 519 " int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));\n" 520 " return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));\n" 521 "}\n" 522 "\n" 523 "/* Returns the number of leading 0 bits before the most-significant 1 bit of\n" 524 " * `a'. If `a' is zero, 32 is returned.\n" 525 " */\n" 526 "int\n" 527 "__countLeadingZeros32(uint a)\n" 528 "{\n" 529 " int shiftCount;\n" 530 " shiftCount = mix(31 - findMSB(a), 32, a == 0u);\n" 531 " return shiftCount;\n" 532 "}\n" 533 "\n" 534 "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n" 535 " * and significand formed by the concatenation of `zSig0' and `zSig1', and\n" 536 " * returns the proper double-precision floating-point value corresponding\n" 537 " * to the abstract input. This routine is just like `__roundAndPackFloat64'\n" 538 " * except that the input significand has fewer bits and does not have to be\n" 539 " * normalized. In all cases, `zExp' must be 1 less than the \"true\" floating-\n" 540 " * point exponent.\n" 541 " */\n" 542 "uint64_t\n" 543 "__normalizeRoundAndPackFloat64(uint zSign,\n" 544 " int zExp,\n" 545 " uint zFrac0,\n" 546 " uint zFrac1)\n" 547 "{\n" 548 " int shiftCount;\n" 549 " uint zFrac2;\n" 550 "\n" 551 " if (zFrac0 == 0u) {\n" 552 " zExp -= 32;\n" 553 " zFrac0 = zFrac1;\n" 554 " zFrac1 = 0u;\n" 555 " }\n" 556 "\n" 557 " shiftCount = __countLeadingZeros32(zFrac0) - 11;\n" 558 " if (0 <= shiftCount) {\n" 559 " zFrac2 = 0u;\n" 560 " __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1);\n" 561 " } else {\n" 562 " __shift64ExtraRightJamming(\n" 563 " zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2);\n" 564 " }\n" 565 " zExp -= shiftCount;\n" 566 " return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n" 567 "}\n" 568 "\n" 569 "/* Takes two double-precision floating-point values `a' and `b', one of which\n" 570 " * is a NaN, and returns the appropriate NaN result.\n" 571 " */\n" 572 "uint64_t\n" 573 "__propagateFloat64NaN(uint64_t __a, uint64_t __b)\n" 574 "{\n" 575 " bool aIsNaN = __is_nan(__a);\n" 576 " bool bIsNaN = __is_nan(__b);\n" 577 " uvec2 a = unpackUint2x32(__a);\n" 578 " uvec2 b = unpackUint2x32(__b);\n" 579 " a.y |= 0x00080000u;\n" 580 " b.y |= 0x00080000u;\n" 581 "\n" 582 " return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));\n" 583 "}\n" 584 "\n" 585 "/* Returns the result of adding the double-precision floating-point values\n" 586 " * `a' and `b'. The operation is performed according to the IEEE Standard for\n" 587 " * Floating-Point Arithmetic.\n" 588 " */\n" 589 "uint64_t\n" 590 "__fadd64(uint64_t a, uint64_t b)\n" 591 "{\n" 592 " uint aSign = __extractFloat64Sign(a);\n" 593 " uint bSign = __extractFloat64Sign(b);\n" 594 " uint aFracLo = __extractFloat64FracLo(a);\n" 595 " uint aFracHi = __extractFloat64FracHi(a);\n" 596 " uint bFracLo = __extractFloat64FracLo(b);\n" 597 " uint bFracHi = __extractFloat64FracHi(b);\n" 598 " int aExp = __extractFloat64Exp(a);\n" 599 " int bExp = __extractFloat64Exp(b);\n" 600 " uint zFrac0 = 0u;\n" 601 " uint zFrac1 = 0u;\n" 602 " int expDiff = aExp - bExp;\n" 603 " if (aSign == bSign) {\n" 604 " uint zFrac2 = 0u;\n" 605 " int zExp;\n" 606 " bool orig_exp_diff_is_zero = (expDiff == 0);\n" 607 "\n" 608 " if (orig_exp_diff_is_zero) {\n" 609 " if (aExp == 0x7FF) {\n" 610 " bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n" 611 " return mix(a, __propagateFloat64NaN(a, b), propagate);\n" 612 " }\n" 613 " __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n" 614 " if (aExp == 0)\n" 615 " return __packFloat64(aSign, 0, zFrac0, zFrac1);\n" 616 " zFrac2 = 0u;\n" 617 " zFrac0 |= 0x00200000u;\n" 618 " zExp = aExp;\n" 619 " __shift64ExtraRightJamming(\n" 620 " zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n" 621 " } else if (0 < expDiff) {\n" 622 " if (aExp == 0x7FF) {\n" 623 " bool propagate = (aFracHi | aFracLo) != 0u;\n" 624 " return mix(a, __propagateFloat64NaN(a, b), propagate);\n" 625 " }\n" 626 "\n" 627 " expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n" 628 " bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);\n" 629 " __shift64ExtraRightJamming(\n" 630 " bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);\n" 631 " zExp = aExp;\n" 632 " } else if (expDiff < 0) {\n" 633 " if (bExp == 0x7FF) {\n" 634 " bool propagate = (bFracHi | bFracLo) != 0u;\n" 635 " return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n" 636 " }\n" 637 " expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n" 638 " aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);\n" 639 " __shift64ExtraRightJamming(\n" 640 " aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);\n" 641 " zExp = bExp;\n" 642 " }\n" 643 " if (!orig_exp_diff_is_zero) {\n" 644 " aFracHi |= 0x00100000u;\n" 645 " __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n" 646 " --zExp;\n" 647 " if (!(zFrac0 < 0x00200000u)) {\n" 648 " __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n" 649 " ++zExp;\n" 650 " }\n" 651 " }\n" 652 " return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2);\n" 653 "\n" 654 " } else {\n" 655 " int zExp;\n" 656 "\n" 657 " __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);\n" 658 " __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);\n" 659 " if (0 < expDiff) {\n" 660 " if (aExp == 0x7FF) {\n" 661 " bool propagate = (aFracHi | aFracLo) != 0u;\n" 662 " return mix(a, __propagateFloat64NaN(a, b), propagate);\n" 663 " }\n" 664 " expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n" 665 " bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);\n" 666 " __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);\n" 667 " aFracHi |= 0x40000000u;\n" 668 " __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n" 669 " zExp = aExp;\n" 670 " --zExp;\n" 671 " return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n" 672 " }\n" 673 " if (expDiff < 0) {\n" 674 " if (bExp == 0x7FF) {\n" 675 " bool propagate = (bFracHi | bFracLo) != 0u;\n" 676 " return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n" 677 " }\n" 678 " expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n" 679 " aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);\n" 680 " __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);\n" 681 " bFracHi |= 0x40000000u;\n" 682 " __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n" 683 " zExp = bExp;\n" 684 " aSign ^= 1u;\n" 685 " --zExp;\n" 686 " return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n" 687 " }\n" 688 " if (aExp == 0x7FF) {\n" 689 " bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n" 690 " return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);\n" 691 " }\n" 692 " bExp = mix(bExp, 1, aExp == 0);\n" 693 " aExp = mix(aExp, 1, aExp == 0);\n" 694 " bool zexp_normal = false;\n" 695 " bool blta = true;\n" 696 " if (bFracHi < aFracHi) {\n" 697 " __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n" 698 " zexp_normal = true;\n" 699 " }\n" 700 " else if (aFracHi < bFracHi) {\n" 701 " __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n" 702 " blta = false;\n" 703 " zexp_normal = true;\n" 704 " }\n" 705 " else if (bFracLo < aFracLo) {\n" 706 " __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n" 707 " zexp_normal = true;\n" 708 " }\n" 709 " else if (aFracLo < bFracLo) {\n" 710 " __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n" 711 " blta = false;\n" 712 " zexp_normal = true;\n" 713 " }\n" 714 " zExp = mix(bExp, aExp, blta);\n" 715 " aSign = mix(aSign ^ 1u, aSign, blta);\n" 716 " uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);\n" 717 " uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);\n" 718 " return mix(retval_0, retval_1, zexp_normal);\n" 719 " }\n" 720 "}\n" 721 "\n" 722 "/* Multiplies `a' by `b' to obtain a 64-bit product. The product is broken\n" 723 " * into two 32-bit pieces which are stored at the locations pointed to by\n" 724 " * `z0Ptr' and `z1Ptr'.\n" 725 " */\n" 726 "void\n" 727 "__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)\n" 728 "{\n" 729 " uint aLow = a & 0x0000FFFFu;\n" 730 " uint aHigh = a>>16;\n" 731 " uint bLow = b & 0x0000FFFFu;\n" 732 " uint bHigh = b>>16;\n" 733 " uint z1 = aLow * bLow;\n" 734 " uint zMiddleA = aLow * bHigh;\n" 735 " uint zMiddleB = aHigh * bLow;\n" 736 " uint z0 = aHigh * bHigh;\n" 737 " zMiddleA += zMiddleB;\n" 738 " z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);\n" 739 " zMiddleA <<= 16;\n" 740 " z1 += zMiddleA;\n" 741 " z0 += uint(z1 < zMiddleA);\n" 742 " z1Ptr = z1;\n" 743 " z0Ptr = z0;\n" 744 "}\n" 745 "\n" 746 "/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the\n" 747 " * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit\n" 748 " * product. The product is broken into four 32-bit pieces which are stored at\n" 749 " * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.\n" 750 " */\n" 751 "void\n" 752 "__mul64To128(uint a0, uint a1, uint b0, uint b1,\n" 753 " out uint z0Ptr,\n" 754 " out uint z1Ptr,\n" 755 " out uint z2Ptr,\n" 756 " out uint z3Ptr)\n" 757 "{\n" 758 " uint z0 = 0u;\n" 759 " uint z1 = 0u;\n" 760 " uint z2 = 0u;\n" 761 " uint z3 = 0u;\n" 762 " uint more1 = 0u;\n" 763 " uint more2 = 0u;\n" 764 "\n" 765 " __mul32To64(a1, b1, z2, z3);\n" 766 " __mul32To64(a1, b0, z1, more2);\n" 767 " __add64(z1, more2, 0u, z2, z1, z2);\n" 768 " __mul32To64(a0, b0, z0, more1);\n" 769 " __add64(z0, more1, 0u, z1, z0, z1);\n" 770 " __mul32To64(a0, b1, more1, more2);\n" 771 " __add64(more1, more2, 0u, z2, more1, z2);\n" 772 " __add64(z0, z1, 0u, more1, z0, z1);\n" 773 " z3Ptr = z3;\n" 774 " z2Ptr = z2;\n" 775 " z1Ptr = z1;\n" 776 " z0Ptr = z0;\n" 777 "}\n" 778 "\n" 779 "/* Normalizes the subnormal double-precision floating-point value represented\n" 780 " * by the denormalized significand formed by the concatenation of `aFrac0' and\n" 781 " * `aFrac1'. The normalized exponent is stored at the location pointed to by\n" 782 " * `zExpPtr'. The most significant 21 bits of the normalized significand are\n" 783 " * stored at the location pointed to by `zFrac0Ptr', and the least significant\n" 784 " * 32 bits of the normalized significand are stored at the location pointed to\n" 785 " * by `zFrac1Ptr'.\n" 786 " */\n" 787 "void\n" 788 "__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1,\n" 789 " out int zExpPtr,\n" 790 " out uint zFrac0Ptr,\n" 791 " out uint zFrac1Ptr)\n" 792 "{\n" 793 " int shiftCount;\n" 794 " uint temp_zfrac0, temp_zfrac1;\n" 795 " shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11;\n" 796 " zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u);\n" 797 "\n" 798 " temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0);\n" 799 " temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0);\n" 800 "\n" 801 " __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr);\n" 802 "\n" 803 " zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0);\n" 804 " zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0);\n" 805 "}\n" 806 "\n" 807 "/* Returns the result of multiplying the double-precision floating-point values\n" 808 " * `a' and `b'. The operation is performed according to the IEEE Standard for\n" 809 " * Floating-Point Arithmetic.\n" 810 " */\n" 811 "uint64_t\n" 812 "__fmul64(uint64_t a, uint64_t b)\n" 813 "{\n" 814 " uint zFrac0 = 0u;\n" 815 " uint zFrac1 = 0u;\n" 816 " uint zFrac2 = 0u;\n" 817 " uint zFrac3 = 0u;\n" 818 " int zExp;\n" 819 "\n" 820 " uint aFracLo = __extractFloat64FracLo(a);\n" 821 " uint aFracHi = __extractFloat64FracHi(a);\n" 822 " uint bFracLo = __extractFloat64FracLo(b);\n" 823 " uint bFracHi = __extractFloat64FracHi(b);\n" 824 " int aExp = __extractFloat64Exp(a);\n" 825 " uint aSign = __extractFloat64Sign(a);\n" 826 " int bExp = __extractFloat64Exp(b);\n" 827 " uint bSign = __extractFloat64Sign(b);\n" 828 " uint zSign = aSign ^ bSign;\n" 829 " if (aExp == 0x7FF) {\n" 830 " if (((aFracHi | aFracLo) != 0u) ||\n" 831 " ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) {\n" 832 " return __propagateFloat64NaN(a, b);\n" 833 " }\n" 834 " if ((uint(bExp) | bFracHi | bFracLo) == 0u)\n" 835 " return 0xFFFFFFFFFFFFFFFFUL;\n" 836 " return __packFloat64(zSign, 0x7FF, 0u, 0u);\n" 837 " }\n" 838 " if (bExp == 0x7FF) {\n" 839 " if ((bFracHi | bFracLo) != 0u)\n" 840 " return __propagateFloat64NaN(a, b);\n" 841 " if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n" 842 " return 0xFFFFFFFFFFFFFFFFUL;\n" 843 " return __packFloat64(zSign, 0x7FF, 0u, 0u);\n" 844 " }\n" 845 " if (aExp == 0) {\n" 846 " if ((aFracHi | aFracLo) == 0u)\n" 847 " return __packFloat64(zSign, 0, 0u, 0u);\n" 848 " __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n" 849 " }\n" 850 " if (bExp == 0) {\n" 851 " if ((bFracHi | bFracLo) == 0u)\n" 852 " return __packFloat64(zSign, 0, 0u, 0u);\n" 853 " __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo);\n" 854 " }\n" 855 " zExp = aExp + bExp - 0x400;\n" 856 " aFracHi |= 0x00100000u;\n" 857 " __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo);\n" 858 " __mul64To128(\n" 859 " aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3);\n" 860 " __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1);\n" 861 " zFrac2 |= uint(zFrac3 != 0u);\n" 862 " if (0x00200000u <= zFrac0) {\n" 863 " __shift64ExtraRightJamming(\n" 864 " zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n" 865 " ++zExp;\n" 866 " }\n" 867 " return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n" 868 "}\n" 869 "\n" 870 "uint64_t\n" 871 "__ffma64(uint64_t a, uint64_t b, uint64_t c)\n" 872 "{\n" 873 " return __fadd64(__fmul64(a, b), c);\n" 874 "}\n" 875 "\n" 876 "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n" 877 " * number of bits given in `count'. Any bits shifted off are lost. The value\n" 878 " * of `count' can be arbitrarily large; in particular, if `count' is greater\n" 879 " * than 64, the result will be 0. The result is broken into two 32-bit pieces\n" 880 " * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n" 881 " */\n" 882 "void\n" 883 "__shift64Right(uint a0, uint a1,\n" 884 " int count,\n" 885 " out uint z0Ptr,\n" 886 " out uint z1Ptr)\n" 887 "{\n" 888 " uint z0;\n" 889 " uint z1;\n" 890 " int negCount = (-count) & 31;\n" 891 "\n" 892 " z0 = 0u;\n" 893 " z0 = mix(z0, (a0 >> count), count < 32);\n" 894 " z0 = mix(z0, a0, count == 0);\n" 895 "\n" 896 " z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n" 897 " z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n" 898 " z1 = mix(z1, a0, count == 0);\n" 899 "\n" 900 " z1Ptr = z1;\n" 901 " z0Ptr = z0;\n" 902 "}\n" 903 "\n" 904 "/* Returns the result of converting the double-precision floating-point value\n" 905 " * `a' to the unsigned integer format. The conversion is performed according\n" 906 " * to the IEEE Standard for Floating-Point Arithmetic.\n" 907 " */\n" 908 "uint\n" 909 "__fp64_to_uint(uint64_t a)\n" 910 "{\n" 911 " uint aFracLo = __extractFloat64FracLo(a);\n" 912 " uint aFracHi = __extractFloat64FracHi(a);\n" 913 " int aExp = __extractFloat64Exp(a);\n" 914 " uint aSign = __extractFloat64Sign(a);\n" 915 "\n" 916 " if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u))\n" 917 " return 0xFFFFFFFFu;\n" 918 "\n" 919 " aFracHi |= mix(0u, 0x00100000u, aExp != 0);\n" 920 "\n" 921 " int shiftDist = 0x427 - aExp;\n" 922 " if (0 < shiftDist)\n" 923 " __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);\n" 924 "\n" 925 " if ((aFracHi & 0xFFFFF000u) != 0u)\n" 926 " return mix(~0u, 0u, (aSign != 0u));\n" 927 "\n" 928 " uint z = 0u;\n" 929 " uint zero = 0u;\n" 930 " __shift64Right(aFracHi, aFracLo, 12, zero, z);\n" 931 "\n" 932 " uint expt = mix(~0u, 0u, (aSign != 0u));\n" 933 "\n" 934 " return mix(z, expt, (aSign != 0u) && (z != 0u));\n" 935 "}\n" 936 "\n" 937 "uint64_t\n" 938 "__uint_to_fp64(uint a)\n" 939 "{\n" 940 " if (a == 0u)\n" 941 " return 0ul;\n" 942 "\n" 943 " int shiftDist = __countLeadingZeros32(a) + 21;\n" 944 "\n" 945 " uint aHigh = 0u;\n" 946 " uint aLow = 0u;\n" 947 " int negCount = (- shiftDist) & 31;\n" 948 "\n" 949 " aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64);\n" 950 " aLow = 0u;\n" 951 " aHigh = mix(aHigh, 0u, shiftDist == 0);\n" 952 " aLow = mix(aLow, a, shiftDist ==0);\n" 953 " aHigh = mix(aHigh, a >> negCount, shiftDist < 32);\n" 954 " aLow = mix(aLow, a << shiftDist, shiftDist < 32);\n" 955 "\n" 956 " return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);\n" 957 "}\n" 958 "\n" 959 "uint64_t\n" 960 "__uint64_to_fp64(uint64_t a)\n" 961 "{\n" 962 " if (a == 0u)\n" 963 " return 0ul;\n" 964 "\n" 965 " uvec2 aFrac = unpackUint2x32(a);\n" 966 " uint aFracLo = __extractFloat64FracLo(a);\n" 967 " uint aFracHi = __extractFloat64FracHi(a);\n" 968 "\n" 969 " if ((aFracHi & 0x80000000u) != 0u) {\n" 970 " __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);\n" 971 " return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);\n" 972 " } else {\n" 973 " return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);\n" 974 " }\n" 975 "}\n" 976 "\n" 977 "uint64_t\n" 978 "__fp64_to_uint64(uint64_t a)\n" 979 "{\n" 980 " uint aFracLo = __extractFloat64FracLo(a);\n" 981 " uint aFracHi = __extractFloat64FracHi(a);\n" 982 " int aExp = __extractFloat64Exp(a);\n" 983 " uint aSign = __extractFloat64Sign(a);\n" 984 " uint zFrac2 = 0u;\n" 985 " uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n" 986 "\n" 987 " aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n" 988 " int shiftCount = 0x433 - aExp;\n" 989 "\n" 990 " if ( shiftCount <= 0 ) {\n" 991 " if (shiftCount < -11 && aExp == 0x7FF) {\n" 992 " if ((aFracHi | aFracLo) != 0u)\n" 993 " return __propagateFloat64NaN(a, a);\n" 994 " return mix(default_nan, a, aSign == 0u);\n" 995 " }\n" 996 " __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n" 997 " } else {\n" 998 " __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n" 999 " aFracHi, aFracLo, zFrac2);\n" 1000 " }\n" 1001 " return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);\n" 1002 "}\n" 1003 "\n" 1004 "int64_t\n" 1005 "__fp64_to_int64(uint64_t a)\n" 1006 "{\n" 1007 " uint zFrac2 = 0u;\n" 1008 " uint aFracLo = __extractFloat64FracLo(a);\n" 1009 " uint aFracHi = __extractFloat64FracHi(a);\n" 1010 " int aExp = __extractFloat64Exp(a);\n" 1011 " uint aSign = __extractFloat64Sign(a);\n" 1012 " int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n" 1013 " int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n" 1014 "\n" 1015 " aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n" 1016 " int shiftCount = 0x433 - aExp;\n" 1017 "\n" 1018 " if (shiftCount <= 0) {\n" 1019 " if (shiftCount < -11 && aExp == 0x7FF) {\n" 1020 " if ((aFracHi | aFracLo) != 0u)\n" 1021 " return default_NegNaN;\n" 1022 " return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n" 1023 " }\n" 1024 " __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n" 1025 " } else {\n" 1026 " __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n" 1027 " aFracHi, aFracLo, zFrac2);\n" 1028 " }\n" 1029 "\n" 1030 " return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2);\n" 1031 "}\n" 1032 "\n" 1033 "uint64_t\n" 1034 "__fp32_to_uint64(float f)\n" 1035 "{\n" 1036 " uint a = floatBitsToUint(f);\n" 1037 " uint aFrac = a & 0x007FFFFFu;\n" 1038 " int aExp = int((a>>23) & 0xFFu);\n" 1039 " uint aSign = a>>31;\n" 1040 " uint zFrac0 = 0u;\n" 1041 " uint zFrac1 = 0u;\n" 1042 " uint zFrac2 = 0u;\n" 1043 " uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n" 1044 " int shiftCount = 0xBE - aExp;\n" 1045 "\n" 1046 " if (shiftCount <0) {\n" 1047 " if (aExp == 0xFF)\n" 1048 " return default_nan;\n" 1049 " }\n" 1050 "\n" 1051 " aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n" 1052 " __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n" 1053 "\n" 1054 " if (shiftCount != 0) {\n" 1055 " __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n" 1056 " zFrac0, zFrac1, zFrac2);\n" 1057 " }\n" 1058 "\n" 1059 " return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2);\n" 1060 "}\n" 1061 "\n" 1062 "int64_t\n" 1063 "__fp32_to_int64(float f)\n" 1064 "{\n" 1065 " uint a = floatBitsToUint(f);\n" 1066 " uint aFrac = a & 0x007FFFFFu;\n" 1067 " int aExp = int((a>>23) & 0xFFu);\n" 1068 " uint aSign = a>>31;\n" 1069 " uint zFrac0 = 0u;\n" 1070 " uint zFrac1 = 0u;\n" 1071 " uint zFrac2 = 0u;\n" 1072 " int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n" 1073 " int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n" 1074 " int shiftCount = 0xBE - aExp;\n" 1075 "\n" 1076 " if (shiftCount <0) {\n" 1077 " if (aExp == 0xFF && aFrac != 0u)\n" 1078 " return default_NegNaN;\n" 1079 " return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n" 1080 " }\n" 1081 "\n" 1082 " aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n" 1083 " __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n" 1084 "\n" 1085 " if (shiftCount != 0) {\n" 1086 " __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n" 1087 " zFrac0, zFrac1, zFrac2);\n" 1088 " }\n" 1089 "\n" 1090 " return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2);\n" 1091 "}\n" 1092 "\n" 1093 "uint64_t\n" 1094 "__int64_to_fp64(int64_t a)\n" 1095 "{\n" 1096 " if (a==0)\n" 1097 " return 0ul;\n" 1098 "\n" 1099 " uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);\n" 1100 " uint aFracHi = __extractFloat64FracHi(absA);\n" 1101 " uvec2 aFrac = unpackUint2x32(absA);\n" 1102 " uint zSign = uint(a < 0);\n" 1103 "\n" 1104 " if ((aFracHi & 0x80000000u) != 0u) {\n" 1105 " return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);\n" 1106 " }\n" 1107 "\n" 1108 " return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);\n" 1109 "}\n" 1110 "\n" 1111 "/* Returns the result of converting the double-precision floating-point value\n" 1112 " * `a' to the 32-bit two's complement integer format. The conversion is\n" 1113 " * performed according to the IEEE Standard for Floating-Point Arithmetic---\n" 1114 " * which means in particular that the conversion is rounded according to the\n" 1115 " * current rounding mode. If `a' is a NaN, the largest positive integer is\n" 1116 " * returned. Otherwise, if the conversion overflows, the largest integer with\n" 1117 " * the same sign as `a' is returned.\n" 1118 " */\n" 1119 "int\n" 1120 "__fp64_to_int(uint64_t a)\n" 1121 "{\n" 1122 " uint aFracLo = __extractFloat64FracLo(a);\n" 1123 " uint aFracHi = __extractFloat64FracHi(a);\n" 1124 " int aExp = __extractFloat64Exp(a);\n" 1125 " uint aSign = __extractFloat64Sign(a);\n" 1126 "\n" 1127 " uint absZ = 0u;\n" 1128 " uint aFracExtra = 0u;\n" 1129 " int shiftCount = aExp - 0x413;\n" 1130 "\n" 1131 " if (0 <= shiftCount) {\n" 1132 " if (0x41E < aExp) {\n" 1133 " if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))\n" 1134 " aSign = 0u;\n" 1135 " return mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n" 1136 " }\n" 1137 " __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);\n" 1138 " } else {\n" 1139 " if (aExp < 0x3FF)\n" 1140 " return 0;\n" 1141 "\n" 1142 " aFracHi |= 0x00100000u;\n" 1143 " aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo;\n" 1144 " absZ = aFracHi >> (- shiftCount);\n" 1145 " }\n" 1146 "\n" 1147 " int z = mix(int(absZ), -int(absZ), (aSign != 0u));\n" 1148 " int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n" 1149 " return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));\n" 1150 "}\n" 1151 "\n" 1152 "/* Returns the result of converting the 32-bit two's complement integer `a'\n" 1153 " * to the double-precision floating-point format. The conversion is performed\n" 1154 " * according to the IEEE Standard for Floating-Point Arithmetic.\n" 1155 " */\n" 1156 "uint64_t\n" 1157 "__int_to_fp64(int a)\n" 1158 "{\n" 1159 " uint zFrac0 = 0u;\n" 1160 " uint zFrac1 = 0u;\n" 1161 " if (a==0)\n" 1162 " return __packFloat64(0u, 0, 0u, 0u);\n" 1163 " uint zSign = uint(a < 0);\n" 1164 " uint absA = mix(uint(a), uint(-a), a < 0);\n" 1165 " int shiftCount = __countLeadingZeros32(absA) - 11;\n" 1166 " if (0 <= shiftCount) {\n" 1167 " zFrac0 = absA << shiftCount;\n" 1168 " zFrac1 = 0u;\n" 1169 " } else {\n" 1170 " __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1);\n" 1171 " }\n" 1172 " return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);\n" 1173 "}\n" 1174 "\n" 1175 "bool\n" 1176 "__fp64_to_bool(uint64_t a)\n" 1177 "{\n" 1178 " return !__feq64_nonnan(__fabs64(a), 0ul);\n" 1179 "}\n" 1180 "\n" 1181 "uint64_t\n" 1182 "__bool_to_fp64(bool a)\n" 1183 "{\n" 1184 " return __int_to_fp64(int(a));\n" 1185 "}\n" 1186 "\n" 1187 "/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a\n" 1188 " * single-precision floating-point value, returning the result. After being\n" 1189 " * shifted into the proper positions, the three fields are simply added\n" 1190 " * together to form the result. This means that any integer portion of `zSig'\n" 1191 " * will be added into the exponent. Since a properly normalized significand\n" 1192 " * will have an integer portion equal to 1, the `zExp' input should be 1 less\n" 1193 " * than the desired result exponent whenever `zFrac' is a complete, normalized\n" 1194 " * significand.\n" 1195 " */\n" 1196 "float\n" 1197 "__packFloat32(uint zSign, int zExp, uint zFrac)\n" 1198 "{\n" 1199 " return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);\n" 1200 "}\n" 1201 "\n" 1202 "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n" 1203 " * and significand `zFrac', and returns the proper single-precision floating-\n" 1204 " * point value corresponding to the abstract input. Ordinarily, the abstract\n" 1205 " * value is simply rounded and packed into the single-precision format, with\n" 1206 " * the inexact exception raised if the abstract input cannot be represented\n" 1207 " * exactly. However, if the abstract value is too large, the overflow and\n" 1208 " * inexact exceptions are raised and an infinity or maximal finite value is\n" 1209 " * returned. If the abstract value is too small, the input value is rounded to\n" 1210 " * a subnormal number, and the underflow and inexact exceptions are raised if\n" 1211 " * the abstract input cannot be represented exactly as a subnormal single-\n" 1212 " * precision floating-point number.\n" 1213 " * The input significand `zFrac' has its binary point between bits 30\n" 1214 " * and 29, which is 7 bits to the left of the usual location. This shifted\n" 1215 " * significand must be normalized or smaller. If `zFrac' is not normalized,\n" 1216 " * `zExp' must be 0; in that case, the result returned is a subnormal number,\n" 1217 " * and it must not require rounding. In the usual case that `zFrac' is\n" 1218 " * normalized, `zExp' must be 1 less than the \"true\" floating-point exponent.\n" 1219 " * The handling of underflow and overflow follows the IEEE Standard for\n" 1220 " * Floating-Point Arithmetic.\n" 1221 " */\n" 1222 "float\n" 1223 "__roundAndPackFloat32(uint zSign, int zExp, uint zFrac)\n" 1224 "{\n" 1225 " bool roundNearestEven;\n" 1226 " int roundIncrement;\n" 1227 " int roundBits;\n" 1228 "\n" 1229 " roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n" 1230 " roundIncrement = 0x40;\n" 1231 " if (!roundNearestEven) {\n" 1232 " if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n" 1233 " roundIncrement = 0;\n" 1234 " } else {\n" 1235 " roundIncrement = 0x7F;\n" 1236 " if (zSign != 0u) {\n" 1237 " if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)\n" 1238 " roundIncrement = 0;\n" 1239 " } else {\n" 1240 " if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN)\n" 1241 " roundIncrement = 0;\n" 1242 " }\n" 1243 " }\n" 1244 " }\n" 1245 " roundBits = int(zFrac & 0x7Fu);\n" 1246 " if (0xFDu <= uint(zExp)) {\n" 1247 " if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0))\n" 1248 " return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0);\n" 1249 " int count = -zExp;\n" 1250 " bool zexp_lt0 = zExp < 0;\n" 1251 " uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32);\n" 1252 " zFrac = mix(zFrac, zFrac_lt0, zexp_lt0);\n" 1253 " roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0);\n" 1254 " zExp = mix(zExp, 0, zexp_lt0);\n" 1255 " }\n" 1256 " zFrac = (zFrac + uint(roundIncrement))>>7;\n" 1257 " zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven);\n" 1258 "\n" 1259 " return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac);\n" 1260 "}\n" 1261 "\n" 1262 "/* Returns the result of converting the double-precision floating-point value\n" 1263 " * `a' to the single-precision floating-point format. The conversion is\n" 1264 " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n" 1265 " */\n" 1266 "float\n" 1267 "__fp64_to_fp32(uint64_t __a)\n" 1268 "{\n" 1269 " uvec2 a = unpackUint2x32(__a);\n" 1270 " uint zFrac = 0u;\n" 1271 " uint allZero = 0u;\n" 1272 "\n" 1273 " uint aFracLo = __extractFloat64FracLo(__a);\n" 1274 " uint aFracHi = __extractFloat64FracHi(__a);\n" 1275 " int aExp = __extractFloat64Exp(__a);\n" 1276 " uint aSign = __extractFloat64Sign(__a);\n" 1277 " if (aExp == 0x7FF) {\n" 1278 " __shortShift64Left(a.y, a.x, 12, a.y, a.x);\n" 1279 " float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));\n" 1280 " rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);\n" 1281 " return rval;\n" 1282 " }\n" 1283 " __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac);\n" 1284 " zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0);\n" 1285 " return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);\n" 1286 "}\n" 1287 "\n" 1288 "float\n" 1289 "__uint64_to_fp32(uint64_t __a)\n" 1290 "{\n" 1291 " uint zFrac = 0u;\n" 1292 " uvec2 aFrac = unpackUint2x32(__a);\n" 1293 " int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n" 1294 " shiftCount -= mix(40, 8, aFrac.y == 0u);\n" 1295 "\n" 1296 " if (0 <= shiftCount) {\n" 1297 " __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n" 1298 " bool is_zero = (aFrac.y | aFrac.x) == 0u;\n" 1299 " return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);\n" 1300 " }\n" 1301 "\n" 1302 " shiftCount += 7;\n" 1303 " __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n" 1304 " zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n" 1305 " return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);\n" 1306 "}\n" 1307 "\n" 1308 "float\n" 1309 "__int64_to_fp32(int64_t __a)\n" 1310 "{\n" 1311 " uint zFrac = 0u;\n" 1312 " uint aSign = uint(__a < 0);\n" 1313 " uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);\n" 1314 " uvec2 aFrac = unpackUint2x32(absA);\n" 1315 " int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n" 1316 " shiftCount -= mix(40, 8, aFrac.y == 0u);\n" 1317 "\n" 1318 " if (0 <= shiftCount) {\n" 1319 " __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n" 1320 " bool is_zero = (aFrac.y | aFrac.x) == 0u;\n" 1321 " return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);\n" 1322 " }\n" 1323 "\n" 1324 " shiftCount += 7;\n" 1325 " __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n" 1326 " zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n" 1327 " return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);\n" 1328 "}\n" 1329 "\n" 1330 "/* Returns the result of converting the single-precision floating-point value\n" 1331 " * `a' to the double-precision floating-point format.\n" 1332 " */\n" 1333 "uint64_t\n" 1334 "__fp32_to_fp64(float f)\n" 1335 "{\n" 1336 " uint a = floatBitsToUint(f);\n" 1337 " uint aFrac = a & 0x007FFFFFu;\n" 1338 " int aExp = int((a>>23) & 0xFFu);\n" 1339 " uint aSign = a>>31;\n" 1340 " uint zFrac0 = 0u;\n" 1341 " uint zFrac1 = 0u;\n" 1342 "\n" 1343 " if (aExp == 0xFF) {\n" 1344 " if (aFrac != 0u) {\n" 1345 " uint nanLo = 0u;\n" 1346 " uint nanHi = a<<9;\n" 1347 " __shift64Right(nanHi, nanLo, 12, nanHi, nanLo);\n" 1348 " nanHi |= ((aSign<<31) | 0x7FF80000u);\n" 1349 " return packUint2x32(uvec2(nanLo, nanHi));\n" 1350 " }\n" 1351 " return __packFloat64(aSign, 0x7FF, 0u, 0u);\n" 1352 " }\n" 1353 "\n" 1354 " if (aExp == 0) {\n" 1355 " if (aFrac == 0u)\n" 1356 " return __packFloat64(aSign, 0, 0u, 0u);\n" 1357 " /* Normalize subnormal */\n" 1358 " int shiftCount = __countLeadingZeros32(aFrac) - 8;\n" 1359 " aFrac <<= shiftCount;\n" 1360 " aExp = 1 - shiftCount;\n" 1361 " --aExp;\n" 1362 " }\n" 1363 "\n" 1364 " __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);\n" 1365 " return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);\n" 1366 "}\n" 1367 "\n" 1368 "/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the\n" 1369 " * 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is\n" 1370 " * modulo 2^96, so any carry out is lost. The result is broken into three\n" 1371 " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',\n" 1372 " * `z1Ptr', and `z2Ptr'.\n" 1373 " */\n" 1374 "void\n" 1375 "__add96(uint a0, uint a1, uint a2,\n" 1376 " uint b0, uint b1, uint b2,\n" 1377 " out uint z0Ptr,\n" 1378 " out uint z1Ptr,\n" 1379 " out uint z2Ptr)\n" 1380 "{\n" 1381 " uint z2 = a2 + b2;\n" 1382 " uint carry1 = uint(z2 < a2);\n" 1383 " uint z1 = a1 + b1;\n" 1384 " uint carry0 = uint(z1 < a1);\n" 1385 " uint z0 = a0 + b0;\n" 1386 " z1 += carry1;\n" 1387 " z0 += uint(z1 < carry1);\n" 1388 " z0 += carry0;\n" 1389 " z2Ptr = z2;\n" 1390 " z1Ptr = z1;\n" 1391 " z0Ptr = z0;\n" 1392 "}\n" 1393 "\n" 1394 "/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from\n" 1395 " * the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction\n" 1396 " * is modulo 2^96, so any borrow out (carry out) is lost. The result is broken\n" 1397 " * into three 32-bit pieces which are stored at the locations pointed to by\n" 1398 " * `z0Ptr', `z1Ptr', and `z2Ptr'.\n" 1399 " */\n" 1400 "void\n" 1401 "__sub96(uint a0, uint a1, uint a2,\n" 1402 " uint b0, uint b1, uint b2,\n" 1403 " out uint z0Ptr,\n" 1404 " out uint z1Ptr,\n" 1405 " out uint z2Ptr)\n" 1406 "{\n" 1407 " uint z2 = a2 - b2;\n" 1408 " uint borrow1 = uint(a2 < b2);\n" 1409 " uint z1 = a1 - b1;\n" 1410 " uint borrow0 = uint(a1 < b1);\n" 1411 " uint z0 = a0 - b0;\n" 1412 " z0 -= uint(z1 < borrow1);\n" 1413 " z1 -= borrow1;\n" 1414 " z0 -= borrow0;\n" 1415 " z2Ptr = z2;\n" 1416 " z1Ptr = z1;\n" 1417 " z0Ptr = z0;\n" 1418 "}\n" 1419 "\n" 1420 "/* Returns an approximation to the 32-bit integer quotient obtained by dividing\n" 1421 " * `b' into the 64-bit value formed by concatenating `a0' and `a1'. The\n" 1422 " * divisor `b' must be at least 2^31. If q is the exact quotient truncated\n" 1423 " * toward zero, the approximation returned lies between q and q + 2 inclusive.\n" 1424 " * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit\n" 1425 " * unsigned integer is returned.\n" 1426 " */\n" 1427 "uint\n" 1428 "__estimateDiv64To32(uint a0, uint a1, uint b)\n" 1429 "{\n" 1430 " uint b0;\n" 1431 " uint b1;\n" 1432 " uint rem0 = 0u;\n" 1433 " uint rem1 = 0u;\n" 1434 " uint term0 = 0u;\n" 1435 " uint term1 = 0u;\n" 1436 " uint z;\n" 1437 "\n" 1438 " if (b <= a0)\n" 1439 " return 0xFFFFFFFFu;\n" 1440 " b0 = b>>16;\n" 1441 " z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;\n" 1442 " __mul32To64(b, z, term0, term1);\n" 1443 " __sub64(a0, a1, term0, term1, rem0, rem1);\n" 1444 " while (int(rem0) < 0) {\n" 1445 " z -= 0x10000u;\n" 1446 " b1 = b<<16;\n" 1447 " __add64(rem0, rem1, b0, b1, rem0, rem1);\n" 1448 " }\n" 1449 " rem0 = (rem0<<16) | (rem1>>16);\n" 1450 " z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;\n" 1451 " return z;\n" 1452 "}\n" 1453 "\n" 1454 "uint\n" 1455 "__sqrtOddAdjustments(int index)\n" 1456 "{\n" 1457 " uint res = 0u;\n" 1458 " if (index == 0)\n" 1459 " res = 0x0004u;\n" 1460 " if (index == 1)\n" 1461 " res = 0x0022u;\n" 1462 " if (index == 2)\n" 1463 " res = 0x005Du;\n" 1464 " if (index == 3)\n" 1465 " res = 0x00B1u;\n" 1466 " if (index == 4)\n" 1467 " res = 0x011Du;\n" 1468 " if (index == 5)\n" 1469 " res = 0x019Fu;\n" 1470 " if (index == 6)\n" 1471 " res = 0x0236u;\n" 1472 " if (index == 7)\n" 1473 " res = 0x02E0u;\n" 1474 " if (index == 8)\n" 1475 " res = 0x039Cu;\n" 1476 " if (index == 9)\n" 1477 " res = 0x0468u;\n" 1478 " if (index == 10)\n" 1479 " res = 0x0545u;\n" 1480 " if (index == 11)\n" 1481 " res = 0x631u;\n" 1482 " if (index == 12)\n" 1483 " res = 0x072Bu;\n" 1484 " if (index == 13)\n" 1485 " res = 0x0832u;\n" 1486 " if (index == 14)\n" 1487 " res = 0x0946u;\n" 1488 " if (index == 15)\n" 1489 " res = 0x0A67u;\n" 1490 "\n" 1491 " return res;\n" 1492 "}\n" 1493 "\n" 1494 "uint\n" 1495 "__sqrtEvenAdjustments(int index)\n" 1496 "{\n" 1497 " uint res = 0u;\n" 1498 " if (index == 0)\n" 1499 " res = 0x0A2Du;\n" 1500 " if (index == 1)\n" 1501 " res = 0x08AFu;\n" 1502 " if (index == 2)\n" 1503 " res = 0x075Au;\n" 1504 " if (index == 3)\n" 1505 " res = 0x0629u;\n" 1506 " if (index == 4)\n" 1507 " res = 0x051Au;\n" 1508 " if (index == 5)\n" 1509 " res = 0x0429u;\n" 1510 " if (index == 6)\n" 1511 " res = 0x0356u;\n" 1512 " if (index == 7)\n" 1513 " res = 0x029Eu;\n" 1514 " if (index == 8)\n" 1515 " res = 0x0200u;\n" 1516 " if (index == 9)\n" 1517 " res = 0x0179u;\n" 1518 " if (index == 10)\n" 1519 " res = 0x0109u;\n" 1520 " if (index == 11)\n" 1521 " res = 0x00AFu;\n" 1522 " if (index == 12)\n" 1523 " res = 0x0068u;\n" 1524 " if (index == 13)\n" 1525 " res = 0x0034u;\n" 1526 " if (index == 14)\n" 1527 " res = 0x0012u;\n" 1528 " if (index == 15)\n" 1529 " res = 0x0002u;\n" 1530 "\n" 1531 " return res;\n" 1532 "}\n" 1533 "\n" 1534 "/* Returns an approximation to the square root of the 32-bit significand given\n" 1535 " * by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of\n" 1536 " * `aExp' (the least significant bit) is 1, the integer returned approximates\n" 1537 " * 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'\n" 1538 " * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either\n" 1539 " * case, the approximation returned lies strictly within +/-2 of the exact\n" 1540 " * value.\n" 1541 " */\n" 1542 "uint\n" 1543 "__estimateSqrt32(int aExp, uint a)\n" 1544 "{\n" 1545 " uint z;\n" 1546 "\n" 1547 " int index = int(a>>27 & 15u);\n" 1548 " if ((aExp & 1) != 0) {\n" 1549 " z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index);\n" 1550 " z = ((a / z)<<14) + (z<<15);\n" 1551 " a >>= 1;\n" 1552 " } else {\n" 1553 " z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index);\n" 1554 " z = a / z + z;\n" 1555 " z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);\n" 1556 " if (z <= a)\n" 1557 " return uint(int(a)>>1);\n" 1558 " }\n" 1559 " return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1);\n" 1560 "}\n" 1561 "\n" 1562 "/* Returns the square root of the double-precision floating-point value `a'.\n" 1563 " * The operation is performed according to the IEEE Standard for Floating-Point\n" 1564 " * Arithmetic.\n" 1565 " */\n" 1566 "uint64_t\n" 1567 "__fsqrt64(uint64_t a)\n" 1568 "{\n" 1569 " uint zFrac0 = 0u;\n" 1570 " uint zFrac1 = 0u;\n" 1571 " uint zFrac2 = 0u;\n" 1572 " uint doubleZFrac0 = 0u;\n" 1573 " uint rem0 = 0u;\n" 1574 " uint rem1 = 0u;\n" 1575 " uint rem2 = 0u;\n" 1576 " uint rem3 = 0u;\n" 1577 " uint term0 = 0u;\n" 1578 " uint term1 = 0u;\n" 1579 " uint term2 = 0u;\n" 1580 " uint term3 = 0u;\n" 1581 " uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n" 1582 "\n" 1583 " uint aFracLo = __extractFloat64FracLo(a);\n" 1584 " uint aFracHi = __extractFloat64FracHi(a);\n" 1585 " int aExp = __extractFloat64Exp(a);\n" 1586 " uint aSign = __extractFloat64Sign(a);\n" 1587 " if (aExp == 0x7FF) {\n" 1588 " if ((aFracHi | aFracLo) != 0u)\n" 1589 " return __propagateFloat64NaN(a, a);\n" 1590 " if (aSign == 0u)\n" 1591 " return a;\n" 1592 " return default_nan;\n" 1593 " }\n" 1594 " if (aSign != 0u) {\n" 1595 " if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n" 1596 " return a;\n" 1597 " return default_nan;\n" 1598 " }\n" 1599 " if (aExp == 0) {\n" 1600 " if ((aFracHi | aFracLo) == 0u)\n" 1601 " return __packFloat64(0u, 0, 0u, 0u);\n" 1602 " __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n" 1603 " }\n" 1604 " int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;\n" 1605 " aFracHi |= 0x00100000u;\n" 1606 " __shortShift64Left(aFracHi, aFracLo, 11, term0, term1);\n" 1607 " zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u;\n" 1608 " if (zFrac0 == 0u)\n" 1609 " zFrac0 = 0x7FFFFFFFu;\n" 1610 " doubleZFrac0 = zFrac0 + zFrac0;\n" 1611 " __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);\n" 1612 " __mul32To64(zFrac0, zFrac0, term0, term1);\n" 1613 " __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);\n" 1614 " while (int(rem0) < 0) {\n" 1615 " --zFrac0;\n" 1616 " doubleZFrac0 -= 2u;\n" 1617 " __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);\n" 1618 " }\n" 1619 " zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0);\n" 1620 " if ((zFrac1 & 0x1FFu) <= 5u) {\n" 1621 " if (zFrac1 == 0u)\n" 1622 " zFrac1 = 1u;\n" 1623 " __mul32To64(doubleZFrac0, zFrac1, term1, term2);\n" 1624 " __sub64(rem1, 0u, term1, term2, rem1, rem2);\n" 1625 " __mul32To64(zFrac1, zFrac1, term2, term3);\n" 1626 " __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);\n" 1627 " while (int(rem1) < 0) {\n" 1628 " --zFrac1;\n" 1629 " __shortShift64Left(0u, zFrac1, 1, term2, term3);\n" 1630 " term3 |= 1u;\n" 1631 " term2 |= doubleZFrac0;\n" 1632 " __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);\n" 1633 " }\n" 1634 " zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);\n" 1635 " }\n" 1636 " __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);\n" 1637 " return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);\n" 1638 "}\n" 1639 "\n" 1640 "uint64_t\n" 1641 "__ftrunc64(uint64_t __a)\n" 1642 "{\n" 1643 " uvec2 a = unpackUint2x32(__a);\n" 1644 " int aExp = __extractFloat64Exp(__a);\n" 1645 " uint zLo;\n" 1646 " uint zHi;\n" 1647 "\n" 1648 " int unbiasedExp = aExp - 1023;\n" 1649 " int fracBits = 52 - unbiasedExp;\n" 1650 " uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32);\n" 1651 " uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33);\n" 1652 " zLo = maskLo & a.x;\n" 1653 " zHi = maskHi & a.y;\n" 1654 "\n" 1655 " zLo = mix(zLo, 0u, unbiasedExp < 0);\n" 1656 " zHi = mix(zHi, 0u, unbiasedExp < 0);\n" 1657 " zLo = mix(zLo, a.x, unbiasedExp > 52);\n" 1658 " zHi = mix(zHi, a.y, unbiasedExp > 52);\n" 1659 " return packUint2x32(uvec2(zLo, zHi));\n" 1660 "}\n" 1661 "\n" 1662 "uint64_t\n" 1663 "__ffloor64(uint64_t a)\n" 1664 "{\n" 1665 " bool is_positive = __fge64(a, 0ul);\n" 1666 " uint64_t tr = __ftrunc64(a);\n" 1667 "\n" 1668 " if (is_positive || __feq64(tr, a)) {\n" 1669 " return tr;\n" 1670 " } else {\n" 1671 " return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);\n" 1672 " }\n" 1673 "}\n" 1674 "\n" 1675 "uint64_t\n" 1676 "__fround64(uint64_t __a)\n" 1677 "{\n" 1678 " uvec2 a = unpackUint2x32(__a);\n" 1679 " int unbiasedExp = __extractFloat64Exp(__a) - 1023;\n" 1680 " uint aHi = a.y;\n" 1681 " uint aLo = a.x;\n" 1682 "\n" 1683 " if (unbiasedExp < 20) {\n" 1684 " if (unbiasedExp < 0) {\n" 1685 " if ((aHi & 0x80000000u) != 0u && aLo == 0u) {\n" 1686 " return 0;\n" 1687 " }\n" 1688 " aHi &= 0x80000000u;\n" 1689 " if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) {\n" 1690 " aLo = 0u;\n" 1691 " return packUint2x32(uvec2(aLo, aHi));\n" 1692 " }\n" 1693 " aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1);\n" 1694 " aLo = 0u;\n" 1695 " } else {\n" 1696 " uint maskExp = 0x000FFFFFu >> unbiasedExp;\n" 1697 " uint lastBit = maskExp + 1;\n" 1698 " aHi += 0x00080000u >> unbiasedExp;\n" 1699 " if ((aHi & maskExp) == 0u)\n" 1700 " aHi &= ~lastBit;\n" 1701 " aHi &= ~maskExp;\n" 1702 " aLo = 0u;\n" 1703 " }\n" 1704 " } else if (unbiasedExp > 51 || unbiasedExp == 1024) {\n" 1705 " return __a;\n" 1706 " } else {\n" 1707 " uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);\n" 1708 " if ((aLo & maskExp) == 0u)\n" 1709 " return __a;\n" 1710 " uint tmp = aLo + (1u << (51 - unbiasedExp));\n" 1711 " if(tmp < aLo)\n" 1712 " aHi += 1u;\n" 1713 " aLo = tmp;\n" 1714 " aLo &= ~maskExp;\n" 1715 " }\n" 1716 "\n" 1717 " return packUint2x32(uvec2(aLo, aHi));\n" 1718 "}\n" 1719 "\n" 1720 "uint64_t\n" 1721 "__fmin64(uint64_t a, uint64_t b)\n" 1722 "{\n" 1723 " if (__is_nan(a)) return b;\n" 1724 " if (__is_nan(b)) return a;\n" 1725 "\n" 1726 " if (__flt64_nonnan(a, b)) return a;\n" 1727 " return b;\n" 1728 "}\n" 1729 "\n" 1730 "uint64_t\n" 1731 "__fmax64(uint64_t a, uint64_t b)\n" 1732 "{\n" 1733 " if (__is_nan(a)) return b;\n" 1734 " if (__is_nan(b)) return a;\n" 1735 "\n" 1736 " if (__flt64_nonnan(a, b)) return b;\n" 1737 " return a;\n" 1738 "}\n" 1739 "\n" 1740 "uint64_t\n" 1741 "__ffract64(uint64_t a)\n" 1742 "{\n" 1743 " return __fadd64(a, __fneg64(__ffloor64(a)));\n" 1744 "}\n" 1745 "" 1746 ; 1747