1b8e80941Smrgstatic const char float64_source[] =
2b8e80941Smrg "/*\n"
3b8e80941Smrg    " * The implementations contained in this file are heavily based on the\n"
4b8e80941Smrg    " * implementations found in the Berkeley SoftFloat library. As such, they are\n"
5b8e80941Smrg    " * licensed under the same 3-clause BSD license:\n"
6b8e80941Smrg    " *\n"
7b8e80941Smrg    " * License for Berkeley SoftFloat Release 3e\n"
8b8e80941Smrg    " *\n"
9b8e80941Smrg    " * John R. Hauser\n"
10b8e80941Smrg    " * 2018 January 20\n"
11b8e80941Smrg    " *\n"
12b8e80941Smrg    " * The following applies to the whole of SoftFloat Release 3e as well as to\n"
13b8e80941Smrg    " * each source file individually.\n"
14b8e80941Smrg    " *\n"
15b8e80941Smrg    " * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the\n"
16b8e80941Smrg    " * University of California.  All rights reserved.\n"
17b8e80941Smrg    " *\n"
18b8e80941Smrg    " * Redistribution and use in source and binary forms, with or without\n"
19b8e80941Smrg    " * modification, are permitted provided that the following conditions are met:\n"
20b8e80941Smrg    " *\n"
21b8e80941Smrg    " *  1. Redistributions of source code must retain the above copyright notice,\n"
22b8e80941Smrg    " *     this list of conditions, and the following disclaimer.\n"
23b8e80941Smrg    " *\n"
24b8e80941Smrg    " *  2. Redistributions in binary form must reproduce the above copyright\n"
25b8e80941Smrg    " *     notice, this list of conditions, and the following disclaimer in the\n"
26b8e80941Smrg    " *     documentation and/or other materials provided with the distribution.\n"
27b8e80941Smrg    " *\n"
28b8e80941Smrg    " *  3. Neither the name of the University nor the names of its contributors\n"
29b8e80941Smrg    " *     may be used to endorse or promote products derived from this software\n"
30b8e80941Smrg    " *     without specific prior written permission.\n"
31b8e80941Smrg    " *\n"
32b8e80941Smrg    " * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS \"AS IS\", AND ANY\n"
33b8e80941Smrg    " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n"
34b8e80941Smrg    " * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE\n"
35b8e80941Smrg    " * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY\n"
36b8e80941Smrg    " * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n"
37b8e80941Smrg    " * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
38b8e80941Smrg    " * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n"
39b8e80941Smrg    " * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
40b8e80941Smrg    " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
41b8e80941Smrg    " * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
42b8e80941Smrg    "*/\n"
43b8e80941Smrg    "\n"
44b8e80941Smrg    "#version 430\n"
45b8e80941Smrg    "#extension GL_ARB_gpu_shader_int64 : enable\n"
46b8e80941Smrg    "#extension GL_ARB_shader_bit_encoding : enable\n"
47b8e80941Smrg    "#extension GL_EXT_shader_integer_mix : enable\n"
48b8e80941Smrg    "#extension GL_MESA_shader_integer_functions : enable\n"
49b8e80941Smrg    "\n"
50b8e80941Smrg    "#pragma warning(off)\n"
51b8e80941Smrg    "\n"
52b8e80941Smrg    "/* Software IEEE floating-point rounding mode.\n"
53b8e80941Smrg    " * GLSL spec section \"4.7.1 Range and Precision\":\n"
54b8e80941Smrg    " * The rounding mode cannot be set and is undefined.\n"
55b8e80941Smrg    " * But here, we are able to define the rounding mode at the compilation time.\n"
56b8e80941Smrg    " */\n"
57b8e80941Smrg    "#define FLOAT_ROUND_NEAREST_EVEN    0\n"
58b8e80941Smrg    "#define FLOAT_ROUND_TO_ZERO         1\n"
59b8e80941Smrg    "#define FLOAT_ROUND_DOWN            2\n"
60b8e80941Smrg    "#define FLOAT_ROUND_UP              3\n"
61b8e80941Smrg    "#define FLOAT_ROUNDING_MODE         FLOAT_ROUND_NEAREST_EVEN\n"
62b8e80941Smrg    "\n"
63b8e80941Smrg    "/* Absolute value of a Float64 :\n"
64b8e80941Smrg    " * Clear the sign bit\n"
65b8e80941Smrg    " */\n"
66b8e80941Smrg    "uint64_t\n"
67b8e80941Smrg    "__fabs64(uint64_t __a)\n"
68b8e80941Smrg    "{\n"
69b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
70b8e80941Smrg    "   a.y &= 0x7FFFFFFFu;\n"
71b8e80941Smrg    "   return packUint2x32(a);\n"
72b8e80941Smrg    "}\n"
73b8e80941Smrg    "\n"
74b8e80941Smrg    "/* Returns 1 if the double-precision floating-point value `a' is a NaN;\n"
75b8e80941Smrg    " * otherwise returns 0.\n"
76b8e80941Smrg    " */\n"
77b8e80941Smrg    "bool\n"
78b8e80941Smrg    "__is_nan(uint64_t __a)\n"
79b8e80941Smrg    "{\n"
80b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
81b8e80941Smrg    "   return (0xFFE00000u <= (a.y<<1)) &&\n"
82b8e80941Smrg    "      ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u));\n"
83b8e80941Smrg    "}\n"
84b8e80941Smrg    "\n"
85b8e80941Smrg    "/* Negate value of a Float64 :\n"
86b8e80941Smrg    " * Toggle the sign bit\n"
87b8e80941Smrg    " */\n"
88b8e80941Smrg    "uint64_t\n"
89b8e80941Smrg    "__fneg64(uint64_t __a)\n"
90b8e80941Smrg    "{\n"
91b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
92b8e80941Smrg    "   uint t = a.y;\n"
93b8e80941Smrg    "\n"
94b8e80941Smrg    "   t ^= (1u << 31);\n"
95b8e80941Smrg    "   a.y = mix(t, a.y, __is_nan(__a));\n"
96b8e80941Smrg    "   return packUint2x32(a);\n"
97b8e80941Smrg    "}\n"
98b8e80941Smrg    "\n"
99b8e80941Smrg    "uint64_t\n"
100b8e80941Smrg    "__fsign64(uint64_t __a)\n"
101b8e80941Smrg    "{\n"
102b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
103b8e80941Smrg    "   uvec2 retval;\n"
104b8e80941Smrg    "   retval.x = 0u;\n"
105b8e80941Smrg    "   retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u);\n"
106b8e80941Smrg    "   return packUint2x32(retval);\n"
107b8e80941Smrg    "}\n"
108b8e80941Smrg    "\n"
109b8e80941Smrg    "/* Returns the fraction bits of the double-precision floating-point value `a'.*/\n"
110b8e80941Smrg    "uint\n"
111b8e80941Smrg    "__extractFloat64FracLo(uint64_t a)\n"
112b8e80941Smrg    "{\n"
113b8e80941Smrg    "   return unpackUint2x32(a).x;\n"
114b8e80941Smrg    "}\n"
115b8e80941Smrg    "\n"
116b8e80941Smrg    "uint\n"
117b8e80941Smrg    "__extractFloat64FracHi(uint64_t a)\n"
118b8e80941Smrg    "{\n"
119b8e80941Smrg    "   return unpackUint2x32(a).y & 0x000FFFFFu;\n"
120b8e80941Smrg    "}\n"
121b8e80941Smrg    "\n"
122b8e80941Smrg    "/* Returns the exponent bits of the double-precision floating-point value `a'.*/\n"
123b8e80941Smrg    "int\n"
124b8e80941Smrg    "__extractFloat64Exp(uint64_t __a)\n"
125b8e80941Smrg    "{\n"
126b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
127b8e80941Smrg    "   return int((a.y>>20) & 0x7FFu);\n"
128b8e80941Smrg    "}\n"
129b8e80941Smrg    "\n"
130b8e80941Smrg    "bool\n"
131b8e80941Smrg    "__feq64_nonnan(uint64_t __a, uint64_t __b)\n"
132b8e80941Smrg    "{\n"
133b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
134b8e80941Smrg    "   uvec2 b = unpackUint2x32(__b);\n"
135b8e80941Smrg    "   return (a.x == b.x) &&\n"
136b8e80941Smrg    "          ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u)));\n"
137b8e80941Smrg    "}\n"
138b8e80941Smrg    "\n"
139b8e80941Smrg    "/* Returns true if the double-precision floating-point value `a' is equal to the\n"
140b8e80941Smrg    " * corresponding value `b', and false otherwise.  The comparison is performed\n"
141b8e80941Smrg    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
142b8e80941Smrg    " */\n"
143b8e80941Smrg    "bool\n"
144b8e80941Smrg    "__feq64(uint64_t a, uint64_t b)\n"
145b8e80941Smrg    "{\n"
146b8e80941Smrg    "   if (__is_nan(a) || __is_nan(b))\n"
147b8e80941Smrg    "      return false;\n"
148b8e80941Smrg    "\n"
149b8e80941Smrg    "   return __feq64_nonnan(a, b);\n"
150b8e80941Smrg    "}\n"
151b8e80941Smrg    "\n"
152b8e80941Smrg    "/* Returns true if the double-precision floating-point value `a' is not equal\n"
153b8e80941Smrg    " * to the corresponding value `b', and false otherwise.  The comparison is\n"
154b8e80941Smrg    " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n"
155b8e80941Smrg    " */\n"
156b8e80941Smrg    "bool\n"
157b8e80941Smrg    "__fne64(uint64_t a, uint64_t b)\n"
158b8e80941Smrg    "{\n"
159b8e80941Smrg    "   if (__is_nan(a) || __is_nan(b))\n"
160b8e80941Smrg    "      return true;\n"
161b8e80941Smrg    "\n"
162b8e80941Smrg    "   return !__feq64_nonnan(a, b);\n"
163b8e80941Smrg    "}\n"
164b8e80941Smrg    "\n"
165b8e80941Smrg    "/* Returns the sign bit of the double-precision floating-point value `a'.*/\n"
166b8e80941Smrg    "uint\n"
167b8e80941Smrg    "__extractFloat64Sign(uint64_t a)\n"
168b8e80941Smrg    "{\n"
169b8e80941Smrg    "   return unpackUint2x32(a).y >> 31;\n"
170b8e80941Smrg    "}\n"
171b8e80941Smrg    "\n"
172b8e80941Smrg    "/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less\n"
173b8e80941Smrg    " * than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,\n"
174b8e80941Smrg    " * returns false.\n"
175b8e80941Smrg    " */\n"
176b8e80941Smrg    "bool\n"
177b8e80941Smrg    "lt64(uint a0, uint a1, uint b0, uint b1)\n"
178b8e80941Smrg    "{\n"
179b8e80941Smrg    "   return (a0 < b0) || ((a0 == b0) && (a1 < b1));\n"
180b8e80941Smrg    "}\n"
181b8e80941Smrg    "\n"
182b8e80941Smrg    "bool\n"
183b8e80941Smrg    "__flt64_nonnan(uint64_t __a, uint64_t __b)\n"
184b8e80941Smrg    "{\n"
185b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
186b8e80941Smrg    "   uvec2 b = unpackUint2x32(__b);\n"
187b8e80941Smrg    "   uint aSign = __extractFloat64Sign(__a);\n"
188b8e80941Smrg    "   uint bSign = __extractFloat64Sign(__b);\n"
189b8e80941Smrg    "   if (aSign != bSign)\n"
190b8e80941Smrg    "      return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);\n"
191b8e80941Smrg    "\n"
192b8e80941Smrg    "   return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);\n"
193b8e80941Smrg    "}\n"
194b8e80941Smrg    "\n"
195b8e80941Smrg    "/* Returns true if the double-precision floating-point value `a' is less than\n"
196b8e80941Smrg    " * the corresponding value `b', and false otherwise.  The comparison is performed\n"
197b8e80941Smrg    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
198b8e80941Smrg    " */\n"
199b8e80941Smrg    "bool\n"
200b8e80941Smrg    "__flt64(uint64_t a, uint64_t b)\n"
201b8e80941Smrg    "{\n"
202b8e80941Smrg    "   if (__is_nan(a) || __is_nan(b))\n"
203b8e80941Smrg    "      return false;\n"
204b8e80941Smrg    "\n"
205b8e80941Smrg    "   return __flt64_nonnan(a, b);\n"
206b8e80941Smrg    "}\n"
207b8e80941Smrg    "\n"
208b8e80941Smrg    "/* Returns true if the double-precision floating-point value `a' is greater\n"
209b8e80941Smrg    " * than or equal to * the corresponding value `b', and false otherwise.  The\n"
210b8e80941Smrg    " * comparison is performed * according to the IEEE Standard for Floating-Point\n"
211b8e80941Smrg    " * Arithmetic.\n"
212b8e80941Smrg    " */\n"
213b8e80941Smrg    "bool\n"
214b8e80941Smrg    "__fge64(uint64_t a, uint64_t b)\n"
215b8e80941Smrg    "{\n"
216b8e80941Smrg    "   if (__is_nan(a) || __is_nan(b))\n"
217b8e80941Smrg    "      return false;\n"
218b8e80941Smrg    "\n"
219b8e80941Smrg    "   return !__flt64_nonnan(a, b);\n"
220b8e80941Smrg    "}\n"
221b8e80941Smrg    "\n"
222b8e80941Smrg    "/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit\n"
223b8e80941Smrg    " * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so\n"
224b8e80941Smrg    " * any carry out is lost.  The result is broken into two 32-bit pieces which\n"
225b8e80941Smrg    " * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
226b8e80941Smrg    " */\n"
227b8e80941Smrg    "void\n"
228b8e80941Smrg    "__add64(uint a0, uint a1, uint b0, uint b1,\n"
229b8e80941Smrg    "        out uint z0Ptr,\n"
230b8e80941Smrg    "        out uint z1Ptr)\n"
231b8e80941Smrg    "{\n"
232b8e80941Smrg    "   uint z1 = a1 + b1;\n"
233b8e80941Smrg    "   z1Ptr = z1;\n"
234b8e80941Smrg    "   z0Ptr = a0 + b0 + uint(z1 < a1);\n"
235b8e80941Smrg    "}\n"
236b8e80941Smrg    "\n"
237b8e80941Smrg    "\n"
238b8e80941Smrg    "/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the\n"
239b8e80941Smrg    " * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo\n"
240b8e80941Smrg    " * 2^64, so any borrow out (carry out) is lost.  The result is broken into two\n"
241b8e80941Smrg    " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and\n"
242b8e80941Smrg    " * `z1Ptr'.\n"
243b8e80941Smrg    " */\n"
244b8e80941Smrg    "void\n"
245b8e80941Smrg    "__sub64(uint a0, uint a1, uint b0, uint b1,\n"
246b8e80941Smrg    "        out uint z0Ptr,\n"
247b8e80941Smrg    "        out uint z1Ptr)\n"
248b8e80941Smrg    "{\n"
249b8e80941Smrg    "   z1Ptr = a1 - b1;\n"
250b8e80941Smrg    "   z0Ptr = a0 - b0 - uint(a1 < b1);\n"
251b8e80941Smrg    "}\n"
252b8e80941Smrg    "\n"
253b8e80941Smrg    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n"
254b8e80941Smrg    " * number of bits given in `count'.  If any nonzero bits are shifted off, they\n"
255b8e80941Smrg    " * are \"jammed\" into the least significant bit of the result by setting the\n"
256b8e80941Smrg    " * least significant bit to 1.  The value of `count' can be arbitrarily large;\n"
257b8e80941Smrg    " * in particular, if `count' is greater than 64, the result will be either 0\n"
258b8e80941Smrg    " * or 1, depending on whether the concatenation of `a0' and `a1' is zero or\n"
259b8e80941Smrg    " * nonzero.  The result is broken into two 32-bit pieces which are stored at\n"
260b8e80941Smrg    " * the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
261b8e80941Smrg    " */\n"
262b8e80941Smrg    "void\n"
263b8e80941Smrg    "__shift64RightJamming(uint a0,\n"
264b8e80941Smrg    "                      uint a1,\n"
265b8e80941Smrg    "                      int count,\n"
266b8e80941Smrg    "                      out uint z0Ptr,\n"
267b8e80941Smrg    "                      out uint z1Ptr)\n"
268b8e80941Smrg    "{\n"
269b8e80941Smrg    "   uint z0;\n"
270b8e80941Smrg    "   uint z1;\n"
271b8e80941Smrg    "   int negCount = (-count) & 31;\n"
272b8e80941Smrg    "\n"
273b8e80941Smrg    "   z0 = mix(0u, a0, count == 0);\n"
274b8e80941Smrg    "   z0 = mix(z0, (a0 >> count), count < 32);\n"
275b8e80941Smrg    "\n"
276b8e80941Smrg    "   z1 = uint((a0 | a1) != 0u); /* count >= 64 */\n"
277b8e80941Smrg    "   uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u);\n"
278b8e80941Smrg    "   z1 = mix(z1, z1_lt64, count < 64);\n"
279b8e80941Smrg    "   z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32);\n"
280b8e80941Smrg    "   uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u);\n"
281b8e80941Smrg    "   z1 = mix(z1, z1_lt32, count < 32);\n"
282b8e80941Smrg    "   z1 = mix(z1, a1, count == 0);\n"
283b8e80941Smrg    "   z1Ptr = z1;\n"
284b8e80941Smrg    "   z0Ptr = z0;\n"
285b8e80941Smrg    "}\n"
286b8e80941Smrg    "\n"
287b8e80941Smrg    "/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right\n"
288b8e80941Smrg    " * by 32 _plus_ the number of bits given in `count'.  The shifted result is\n"
289b8e80941Smrg    " * at most 64 nonzero bits; these are broken into two 32-bit pieces which are\n"
290b8e80941Smrg    " * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted\n"
291b8e80941Smrg    " * off form a third 32-bit result as follows:  The _last_ bit shifted off is\n"
292b8e80941Smrg    " * the most-significant bit of the extra result, and the other 31 bits of the\n"
293b8e80941Smrg    " * extra result are all zero if and only if _all_but_the_last_ bits shifted off\n"
294b8e80941Smrg    " * were all zero.  This extra result is stored in the location pointed to by\n"
295b8e80941Smrg    " * `z2Ptr'.  The value of `count' can be arbitrarily large.\n"
296b8e80941Smrg    " *     (This routine makes more sense if `a0', `a1', and `a2' are considered\n"
297b8e80941Smrg    " * to form a fixed-point value with binary point between `a1' and `a2'.  This\n"
298b8e80941Smrg    " * fixed-point value is shifted right by the number of bits given in `count',\n"
299b8e80941Smrg    " * and the integer part of the result is returned at the locations pointed to\n"
300b8e80941Smrg    " * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly\n"
301b8e80941Smrg    " * corrupted as described above, and is returned at the location pointed to by\n"
302b8e80941Smrg    " * `z2Ptr'.)\n"
303b8e80941Smrg    " */\n"
304b8e80941Smrg    "void\n"
305b8e80941Smrg    "__shift64ExtraRightJamming(uint a0, uint a1, uint a2,\n"
306b8e80941Smrg    "                           int count,\n"
307b8e80941Smrg    "                           out uint z0Ptr,\n"
308b8e80941Smrg    "                           out uint z1Ptr,\n"
309b8e80941Smrg    "                           out uint z2Ptr)\n"
310b8e80941Smrg    "{\n"
311b8e80941Smrg    "   uint z0 = 0u;\n"
312b8e80941Smrg    "   uint z1;\n"
313b8e80941Smrg    "   uint z2;\n"
314b8e80941Smrg    "   int negCount = (-count) & 31;\n"
315b8e80941Smrg    "\n"
316b8e80941Smrg    "   z2 = mix(uint(a0 != 0u), a0, count == 64);\n"
317b8e80941Smrg    "   z2 = mix(z2, a0 << negCount, count < 64);\n"
318b8e80941Smrg    "   z2 = mix(z2, a1 << negCount, count < 32);\n"
319b8e80941Smrg    "\n"
320b8e80941Smrg    "   z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n"
321b8e80941Smrg    "   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n"
322b8e80941Smrg    "\n"
323b8e80941Smrg    "   a2 = mix(a2 | a1, a2, count < 32);\n"
324b8e80941Smrg    "   z0 = mix(z0, a0 >> count, count < 32);\n"
325b8e80941Smrg    "   z2 |= uint(a2 != 0u);\n"
326b8e80941Smrg    "\n"
327b8e80941Smrg    "   z0 = mix(z0, 0u, (count == 32));\n"
328b8e80941Smrg    "   z1 = mix(z1, a0, (count == 32));\n"
329b8e80941Smrg    "   z2 = mix(z2, a1, (count == 32));\n"
330b8e80941Smrg    "   z0 = mix(z0, a0, (count == 0));\n"
331b8e80941Smrg    "   z1 = mix(z1, a1, (count == 0));\n"
332b8e80941Smrg    "   z2 = mix(z2, a2, (count == 0));\n"
333b8e80941Smrg    "   z2Ptr = z2;\n"
334b8e80941Smrg    "   z1Ptr = z1;\n"
335b8e80941Smrg    "   z0Ptr = z0;\n"
336b8e80941Smrg    "}\n"
337b8e80941Smrg    "\n"
338b8e80941Smrg    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the\n"
339b8e80941Smrg    " * number of bits given in `count'.  Any bits shifted off are lost.  The value\n"
340b8e80941Smrg    " * of `count' must be less than 32.  The result is broken into two 32-bit\n"
341b8e80941Smrg    " * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
342b8e80941Smrg    " */\n"
343b8e80941Smrg    "void\n"
344b8e80941Smrg    "__shortShift64Left(uint a0, uint a1,\n"
345b8e80941Smrg    "                   int count,\n"
346b8e80941Smrg    "                   out uint z0Ptr,\n"
347b8e80941Smrg    "                   out uint z1Ptr)\n"
348b8e80941Smrg    "{\n"
349b8e80941Smrg    "   z1Ptr = a1<<count;\n"
350b8e80941Smrg    "   z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0);\n"
351b8e80941Smrg    "}\n"
352b8e80941Smrg    "\n"
353b8e80941Smrg    "/* Packs the sign `zSign', the exponent `zExp', and the significand formed by\n"
354b8e80941Smrg    " * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-\n"
355b8e80941Smrg    " * point value, returning the result.  After being shifted into the proper\n"
356b8e80941Smrg    " * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added\n"
357b8e80941Smrg    " * together to form the most significant 32 bits of the result.  This means\n"
358b8e80941Smrg    " * that any integer portion of `zFrac0' will be added into the exponent.  Since\n"
359b8e80941Smrg    " * a properly normalized significand will have an integer portion equal to 1,\n"
360b8e80941Smrg    " * the `zExp' input should be 1 less than the desired result exponent whenever\n"
361b8e80941Smrg    " * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.\n"
362b8e80941Smrg    " */\n"
363b8e80941Smrg    "uint64_t\n"
364b8e80941Smrg    "__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)\n"
365b8e80941Smrg    "{\n"
366b8e80941Smrg    "   uvec2 z;\n"
367b8e80941Smrg    "\n"
368b8e80941Smrg    "   z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;\n"
369b8e80941Smrg    "   z.x = zFrac1;\n"
370b8e80941Smrg    "   return packUint2x32(z);\n"
371b8e80941Smrg    "}\n"
372b8e80941Smrg    "\n"
373b8e80941Smrg    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
374b8e80941Smrg    " * and extended significand formed by the concatenation of `zFrac0', `zFrac1',\n"
375b8e80941Smrg    " * and `zFrac2', and returns the proper double-precision floating-point value\n"
376b8e80941Smrg    " * corresponding to the abstract input.  Ordinarily, the abstract value is\n"
377b8e80941Smrg    " * simply rounded and packed into the double-precision format, with the inexact\n"
378b8e80941Smrg    " * exception raised if the abstract input cannot be represented exactly.\n"
379b8e80941Smrg    " * However, if the abstract value is too large, the overflow and inexact\n"
380b8e80941Smrg    " * exceptions are raised and an infinity or maximal finite value is returned.\n"
381b8e80941Smrg    " * If the abstract value is too small, the input value is rounded to a\n"
382b8e80941Smrg    " * subnormal number, and the underflow and inexact exceptions are raised if the\n"
383b8e80941Smrg    " * abstract input cannot be represented exactly as a subnormal double-precision\n"
384b8e80941Smrg    " * floating-point number.\n"
385b8e80941Smrg    " *     The input significand must be normalized or smaller.  If the input\n"
386b8e80941Smrg    " * significand is not normalized, `zExp' must be 0; in that case, the result\n"
387b8e80941Smrg    " * returned is a subnormal number, and it must not require rounding.  In the\n"
388b8e80941Smrg    " * usual case that the input significand is normalized, `zExp' must be 1 less\n"
389b8e80941Smrg    " * than the \"true\" floating-point exponent.  The handling of underflow and\n"
390b8e80941Smrg    " * overflow follows the IEEE Standard for Floating-Point Arithmetic.\n"
391b8e80941Smrg    " */\n"
392b8e80941Smrg    "uint64_t\n"
393b8e80941Smrg    "__roundAndPackFloat64(uint zSign,\n"
394b8e80941Smrg    "                      int zExp,\n"
395b8e80941Smrg    "                      uint zFrac0,\n"
396b8e80941Smrg    "                      uint zFrac1,\n"
397b8e80941Smrg    "                      uint zFrac2)\n"
398b8e80941Smrg    "{\n"
399b8e80941Smrg    "   bool roundNearestEven;\n"
400b8e80941Smrg    "   bool increment;\n"
401b8e80941Smrg    "\n"
402b8e80941Smrg    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
403b8e80941Smrg    "   increment = int(zFrac2) < 0;\n"
404b8e80941Smrg    "   if (!roundNearestEven) {\n"
405b8e80941Smrg    "      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n"
406b8e80941Smrg    "         increment = false;\n"
407b8e80941Smrg    "      } else {\n"
408b8e80941Smrg    "         if (zSign != 0u) {\n"
409b8e80941Smrg    "            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
410b8e80941Smrg    "               (zFrac2 != 0u);\n"
411b8e80941Smrg    "         } else {\n"
412b8e80941Smrg    "            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
413b8e80941Smrg    "               (zFrac2 != 0u);\n"
414b8e80941Smrg    "         }\n"
415b8e80941Smrg    "      }\n"
416b8e80941Smrg    "   }\n"
417b8e80941Smrg    "   if (0x7FD <= zExp) {\n"
418b8e80941Smrg    "      if ((0x7FD < zExp) ||\n"
419b8e80941Smrg    "         ((zExp == 0x7FD) &&\n"
420b8e80941Smrg    "            (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) &&\n"
421b8e80941Smrg    "               increment)) {\n"
422b8e80941Smrg    "         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) ||\n"
423b8e80941Smrg    "            ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) ||\n"
424b8e80941Smrg    "               ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) {\n"
425b8e80941Smrg    "            return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu);\n"
426b8e80941Smrg    "         }\n"
427b8e80941Smrg    "         return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
428b8e80941Smrg    "      }\n"
429b8e80941Smrg    "      if (zExp < 0) {\n"
430b8e80941Smrg    "         __shift64ExtraRightJamming(\n"
431b8e80941Smrg    "            zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);\n"
432b8e80941Smrg    "         zExp = 0;\n"
433b8e80941Smrg    "         if (roundNearestEven) {\n"
434b8e80941Smrg    "            increment = zFrac2 < 0u;\n"
435b8e80941Smrg    "         } else {\n"
436b8e80941Smrg    "            if (zSign != 0u) {\n"
437b8e80941Smrg    "               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
438b8e80941Smrg    "                  (zFrac2 != 0u);\n"
439b8e80941Smrg    "            } else {\n"
440b8e80941Smrg    "               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
441b8e80941Smrg    "                  (zFrac2 != 0u);\n"
442b8e80941Smrg    "            }\n"
443b8e80941Smrg    "         }\n"
444b8e80941Smrg    "      }\n"
445b8e80941Smrg    "   }\n"
446b8e80941Smrg    "   if (increment) {\n"
447b8e80941Smrg    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
448b8e80941Smrg    "      zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));\n"
449b8e80941Smrg    "   } else {\n"
450b8e80941Smrg    "      zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u);\n"
451b8e80941Smrg    "   }\n"
452b8e80941Smrg    "   return __packFloat64(zSign, zExp, zFrac0, zFrac1);\n"
453b8e80941Smrg    "}\n"
454b8e80941Smrg    "\n"
455b8e80941Smrg    "uint64_t\n"
456b8e80941Smrg    "__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n"
457b8e80941Smrg    "{\n"
458b8e80941Smrg    "   bool roundNearestEven;\n"
459b8e80941Smrg    "   bool increment;\n"
460b8e80941Smrg    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
461b8e80941Smrg    "\n"
462b8e80941Smrg    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
463b8e80941Smrg    "\n"
464b8e80941Smrg    "   if (zFrac2 >= 0x80000000u)\n"
465b8e80941Smrg    "      increment = false;\n"
466b8e80941Smrg    "\n"
467b8e80941Smrg    "   if (!roundNearestEven) {\n"
468b8e80941Smrg    "      if (zSign != 0u) {\n"
469b8e80941Smrg    "         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {\n"
470b8e80941Smrg    "            increment = false;\n"
471b8e80941Smrg    "         }\n"
472b8e80941Smrg    "      } else {\n"
473b8e80941Smrg    "         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
474b8e80941Smrg    "            (zFrac2 != 0u);\n"
475b8e80941Smrg    "      }\n"
476b8e80941Smrg    "   }\n"
477b8e80941Smrg    "\n"
478b8e80941Smrg    "   if (increment) {\n"
479b8e80941Smrg    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
480b8e80941Smrg    "      if ((zFrac0 | zFrac1) != 0u)\n"
481b8e80941Smrg    "         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n"
482b8e80941Smrg    "   }\n"
483b8e80941Smrg    "   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,\n"
484b8e80941Smrg    "              (zSign !=0u && (zFrac0 | zFrac1) != 0u));\n"
485b8e80941Smrg    "}\n"
486b8e80941Smrg    "\n"
487b8e80941Smrg    "int64_t\n"
488b8e80941Smrg    "__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n"
489b8e80941Smrg    "{\n"
490b8e80941Smrg    "   bool roundNearestEven;\n"
491b8e80941Smrg    "   bool increment;\n"
492b8e80941Smrg    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
493b8e80941Smrg    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
494b8e80941Smrg    "\n"
495b8e80941Smrg    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
496b8e80941Smrg    "\n"
497b8e80941Smrg    "   if (zFrac2 >= 0x80000000u)\n"
498b8e80941Smrg    "      increment = false;\n"
499b8e80941Smrg    "\n"
500b8e80941Smrg    "   if (!roundNearestEven) {\n"
501b8e80941Smrg    "      if (zSign != 0u) {\n"
502b8e80941Smrg    "         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
503b8e80941Smrg    "            (zFrac2 != 0u));\n"
504b8e80941Smrg    "      } else {\n"
505b8e80941Smrg    "         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
506b8e80941Smrg    "            (zFrac2 != 0u);\n"
507b8e80941Smrg    "      }\n"
508b8e80941Smrg    "   }\n"
509b8e80941Smrg    "\n"
510b8e80941Smrg    "   if (increment) {\n"
511b8e80941Smrg    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
512b8e80941Smrg    "      if ((zFrac0 | zFrac1) != 0u)\n"
513b8e80941Smrg    "         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n"
514b8e80941Smrg    "   }\n"
515b8e80941Smrg    "\n"
516b8e80941Smrg    "   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n"
517b8e80941Smrg    "                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n"
518b8e80941Smrg    "                      (zSign != 0u));\n"
519b8e80941Smrg    "   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));\n"
520b8e80941Smrg    "   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));\n"
521b8e80941Smrg    "}\n"
522b8e80941Smrg    "\n"
523b8e80941Smrg    "/* Returns the number of leading 0 bits before the most-significant 1 bit of\n"
524b8e80941Smrg    " * `a'.  If `a' is zero, 32 is returned.\n"
525b8e80941Smrg    " */\n"
526b8e80941Smrg    "int\n"
527b8e80941Smrg    "__countLeadingZeros32(uint a)\n"
528b8e80941Smrg    "{\n"
529b8e80941Smrg    "   int shiftCount;\n"
530b8e80941Smrg    "   shiftCount = mix(31 - findMSB(a), 32, a == 0u);\n"
531b8e80941Smrg    "   return shiftCount;\n"
532b8e80941Smrg    "}\n"
533b8e80941Smrg    "\n"
534b8e80941Smrg    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
535b8e80941Smrg    " * and significand formed by the concatenation of `zSig0' and `zSig1', and\n"
536b8e80941Smrg    " * returns the proper double-precision floating-point value corresponding\n"
537b8e80941Smrg    " * to the abstract input.  This routine is just like `__roundAndPackFloat64'\n"
538b8e80941Smrg    " * except that the input significand has fewer bits and does not have to be\n"
539b8e80941Smrg    " * normalized.  In all cases, `zExp' must be 1 less than the \"true\" floating-\n"
540b8e80941Smrg    " * point exponent.\n"
541b8e80941Smrg    " */\n"
542b8e80941Smrg    "uint64_t\n"
543b8e80941Smrg    "__normalizeRoundAndPackFloat64(uint zSign,\n"
544b8e80941Smrg    "                               int zExp,\n"
545b8e80941Smrg    "                               uint zFrac0,\n"
546b8e80941Smrg    "                               uint zFrac1)\n"
547b8e80941Smrg    "{\n"
548b8e80941Smrg    "   int shiftCount;\n"
549b8e80941Smrg    "   uint zFrac2;\n"
550b8e80941Smrg    "\n"
551b8e80941Smrg    "   if (zFrac0 == 0u) {\n"
552b8e80941Smrg    "      zExp -= 32;\n"
553b8e80941Smrg    "      zFrac0 = zFrac1;\n"
554b8e80941Smrg    "      zFrac1 = 0u;\n"
555b8e80941Smrg    "   }\n"
556b8e80941Smrg    "\n"
557b8e80941Smrg    "   shiftCount = __countLeadingZeros32(zFrac0) - 11;\n"
558b8e80941Smrg    "   if (0 <= shiftCount) {\n"
559b8e80941Smrg    "      zFrac2 = 0u;\n"
560b8e80941Smrg    "      __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1);\n"
561b8e80941Smrg    "   } else {\n"
562b8e80941Smrg    "      __shift64ExtraRightJamming(\n"
563b8e80941Smrg    "         zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2);\n"
564b8e80941Smrg    "   }\n"
565b8e80941Smrg    "   zExp -= shiftCount;\n"
566b8e80941Smrg    "   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n"
567b8e80941Smrg    "}\n"
568b8e80941Smrg    "\n"
569b8e80941Smrg    "/* Takes two double-precision floating-point values `a' and `b', one of which\n"
570b8e80941Smrg    " * is a NaN, and returns the appropriate NaN result.\n"
571b8e80941Smrg    " */\n"
572b8e80941Smrg    "uint64_t\n"
573b8e80941Smrg    "__propagateFloat64NaN(uint64_t __a, uint64_t __b)\n"
574b8e80941Smrg    "{\n"
575b8e80941Smrg    "   bool aIsNaN = __is_nan(__a);\n"
576b8e80941Smrg    "   bool bIsNaN = __is_nan(__b);\n"
577b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
578b8e80941Smrg    "   uvec2 b = unpackUint2x32(__b);\n"
579b8e80941Smrg    "   a.y |= 0x00080000u;\n"
580b8e80941Smrg    "   b.y |= 0x00080000u;\n"
581b8e80941Smrg    "\n"
582b8e80941Smrg    "   return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));\n"
583b8e80941Smrg    "}\n"
584b8e80941Smrg    "\n"
585b8e80941Smrg    "/* Returns the result of adding the double-precision floating-point values\n"
586b8e80941Smrg    " * `a' and `b'.  The operation is performed according to the IEEE Standard for\n"
587b8e80941Smrg    " * Floating-Point Arithmetic.\n"
588b8e80941Smrg    " */\n"
589b8e80941Smrg    "uint64_t\n"
590b8e80941Smrg    "__fadd64(uint64_t a, uint64_t b)\n"
591b8e80941Smrg    "{\n"
592b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
593b8e80941Smrg    "   uint bSign = __extractFloat64Sign(b);\n"
594b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
595b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
596b8e80941Smrg    "   uint bFracLo = __extractFloat64FracLo(b);\n"
597b8e80941Smrg    "   uint bFracHi = __extractFloat64FracHi(b);\n"
598b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
599b8e80941Smrg    "   int bExp = __extractFloat64Exp(b);\n"
600b8e80941Smrg    "   uint zFrac0 = 0u;\n"
601b8e80941Smrg    "   uint zFrac1 = 0u;\n"
602b8e80941Smrg    "   int expDiff = aExp - bExp;\n"
603b8e80941Smrg    "   if (aSign == bSign) {\n"
604b8e80941Smrg    "      uint zFrac2 = 0u;\n"
605b8e80941Smrg    "      int zExp;\n"
606b8e80941Smrg    "      bool orig_exp_diff_is_zero = (expDiff == 0);\n"
607b8e80941Smrg    "\n"
608b8e80941Smrg    "      if (orig_exp_diff_is_zero) {\n"
609b8e80941Smrg    "         if (aExp == 0x7FF) {\n"
610b8e80941Smrg    "            bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n"
611b8e80941Smrg    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
612b8e80941Smrg    "         }\n"
613b8e80941Smrg    "         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
614b8e80941Smrg    "         if (aExp == 0)\n"
615b8e80941Smrg    "            return __packFloat64(aSign, 0, zFrac0, zFrac1);\n"
616b8e80941Smrg    "         zFrac2 = 0u;\n"
617b8e80941Smrg    "         zFrac0 |= 0x00200000u;\n"
618b8e80941Smrg    "         zExp = aExp;\n"
619b8e80941Smrg    "         __shift64ExtraRightJamming(\n"
620b8e80941Smrg    "            zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
621b8e80941Smrg    "      } else if (0 < expDiff) {\n"
622b8e80941Smrg    "         if (aExp == 0x7FF) {\n"
623b8e80941Smrg    "            bool propagate = (aFracHi | aFracLo) != 0u;\n"
624b8e80941Smrg    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
625b8e80941Smrg    "         }\n"
626b8e80941Smrg    "\n"
627b8e80941Smrg    "         expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n"
628b8e80941Smrg    "         bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);\n"
629b8e80941Smrg    "         __shift64ExtraRightJamming(\n"
630b8e80941Smrg    "            bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);\n"
631b8e80941Smrg    "         zExp = aExp;\n"
632b8e80941Smrg    "      } else if (expDiff < 0) {\n"
633b8e80941Smrg    "         if (bExp == 0x7FF) {\n"
634b8e80941Smrg    "            bool propagate = (bFracHi | bFracLo) != 0u;\n"
635b8e80941Smrg    "            return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n"
636b8e80941Smrg    "         }\n"
637b8e80941Smrg    "         expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n"
638b8e80941Smrg    "         aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);\n"
639b8e80941Smrg    "         __shift64ExtraRightJamming(\n"
640b8e80941Smrg    "            aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);\n"
641b8e80941Smrg    "         zExp = bExp;\n"
642b8e80941Smrg    "      }\n"
643b8e80941Smrg    "      if (!orig_exp_diff_is_zero) {\n"
644b8e80941Smrg    "         aFracHi |= 0x00100000u;\n"
645b8e80941Smrg    "         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
646b8e80941Smrg    "         --zExp;\n"
647b8e80941Smrg    "         if (!(zFrac0 < 0x00200000u)) {\n"
648b8e80941Smrg    "            __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
649b8e80941Smrg    "            ++zExp;\n"
650b8e80941Smrg    "         }\n"
651b8e80941Smrg    "      }\n"
652b8e80941Smrg    "      return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2);\n"
653b8e80941Smrg    "\n"
654b8e80941Smrg    "   } else {\n"
655b8e80941Smrg    "      int zExp;\n"
656b8e80941Smrg    "\n"
657b8e80941Smrg    "      __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);\n"
658b8e80941Smrg    "      __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);\n"
659b8e80941Smrg    "      if (0 < expDiff) {\n"
660b8e80941Smrg    "         if (aExp == 0x7FF) {\n"
661b8e80941Smrg    "            bool propagate = (aFracHi | aFracLo) != 0u;\n"
662b8e80941Smrg    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
663b8e80941Smrg    "         }\n"
664b8e80941Smrg    "         expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n"
665b8e80941Smrg    "         bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);\n"
666b8e80941Smrg    "         __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);\n"
667b8e80941Smrg    "         aFracHi |= 0x40000000u;\n"
668b8e80941Smrg    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
669b8e80941Smrg    "         zExp = aExp;\n"
670b8e80941Smrg    "         --zExp;\n"
671b8e80941Smrg    "         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n"
672b8e80941Smrg    "      }\n"
673b8e80941Smrg    "      if (expDiff < 0) {\n"
674b8e80941Smrg    "         if (bExp == 0x7FF) {\n"
675b8e80941Smrg    "            bool propagate = (bFracHi | bFracLo) != 0u;\n"
676b8e80941Smrg    "            return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n"
677b8e80941Smrg    "         }\n"
678b8e80941Smrg    "         expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n"
679b8e80941Smrg    "         aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);\n"
680b8e80941Smrg    "         __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);\n"
681b8e80941Smrg    "         bFracHi |= 0x40000000u;\n"
682b8e80941Smrg    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
683b8e80941Smrg    "         zExp = bExp;\n"
684b8e80941Smrg    "         aSign ^= 1u;\n"
685b8e80941Smrg    "         --zExp;\n"
686b8e80941Smrg    "         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n"
687b8e80941Smrg    "      }\n"
688b8e80941Smrg    "      if (aExp == 0x7FF) {\n"
689b8e80941Smrg    "          bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n"
690b8e80941Smrg    "         return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);\n"
691b8e80941Smrg    "      }\n"
692b8e80941Smrg    "      bExp = mix(bExp, 1, aExp == 0);\n"
693b8e80941Smrg    "      aExp = mix(aExp, 1, aExp == 0);\n"
694b8e80941Smrg    "      bool zexp_normal = false;\n"
695b8e80941Smrg    "      bool blta = true;\n"
696b8e80941Smrg    "      if (bFracHi < aFracHi) {\n"
697b8e80941Smrg    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
698b8e80941Smrg    "         zexp_normal = true;\n"
699b8e80941Smrg    "      }\n"
700b8e80941Smrg    "      else if (aFracHi < bFracHi) {\n"
701b8e80941Smrg    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
702b8e80941Smrg    "         blta = false;\n"
703b8e80941Smrg    "         zexp_normal = true;\n"
704b8e80941Smrg    "      }\n"
705b8e80941Smrg    "      else if (bFracLo < aFracLo) {\n"
706b8e80941Smrg    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
707b8e80941Smrg    "         zexp_normal = true;\n"
708b8e80941Smrg    "      }\n"
709b8e80941Smrg    "      else if (aFracLo < bFracLo) {\n"
710b8e80941Smrg    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
711b8e80941Smrg    "          blta = false;\n"
712b8e80941Smrg    "          zexp_normal = true;\n"
713b8e80941Smrg    "      }\n"
714b8e80941Smrg    "      zExp = mix(bExp, aExp, blta);\n"
715b8e80941Smrg    "      aSign = mix(aSign ^ 1u, aSign, blta);\n"
716b8e80941Smrg    "      uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);\n"
717b8e80941Smrg    "      uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);\n"
718b8e80941Smrg    "      return mix(retval_0, retval_1, zexp_normal);\n"
719b8e80941Smrg    "   }\n"
720b8e80941Smrg    "}\n"
721b8e80941Smrg    "\n"
722b8e80941Smrg    "/* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken\n"
723b8e80941Smrg    " * into two 32-bit pieces which are stored at the locations pointed to by\n"
724b8e80941Smrg    " * `z0Ptr' and `z1Ptr'.\n"
725b8e80941Smrg    " */\n"
726b8e80941Smrg    "void\n"
727b8e80941Smrg    "__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)\n"
728b8e80941Smrg    "{\n"
729b8e80941Smrg    "   uint aLow = a & 0x0000FFFFu;\n"
730b8e80941Smrg    "   uint aHigh = a>>16;\n"
731b8e80941Smrg    "   uint bLow = b & 0x0000FFFFu;\n"
732b8e80941Smrg    "   uint bHigh = b>>16;\n"
733b8e80941Smrg    "   uint z1 = aLow * bLow;\n"
734b8e80941Smrg    "   uint zMiddleA = aLow * bHigh;\n"
735b8e80941Smrg    "   uint zMiddleB = aHigh * bLow;\n"
736b8e80941Smrg    "   uint z0 = aHigh * bHigh;\n"
737b8e80941Smrg    "   zMiddleA += zMiddleB;\n"
738b8e80941Smrg    "   z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);\n"
739b8e80941Smrg    "   zMiddleA <<= 16;\n"
740b8e80941Smrg    "   z1 += zMiddleA;\n"
741b8e80941Smrg    "   z0 += uint(z1 < zMiddleA);\n"
742b8e80941Smrg    "   z1Ptr = z1;\n"
743b8e80941Smrg    "   z0Ptr = z0;\n"
744b8e80941Smrg    "}\n"
745b8e80941Smrg    "\n"
746b8e80941Smrg    "/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the\n"
747b8e80941Smrg    " * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit\n"
748b8e80941Smrg    " * product.  The product is broken into four 32-bit pieces which are stored at\n"
749b8e80941Smrg    " * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.\n"
750b8e80941Smrg    " */\n"
751b8e80941Smrg    "void\n"
752b8e80941Smrg    "__mul64To128(uint a0, uint a1, uint b0, uint b1,\n"
753b8e80941Smrg    "             out uint z0Ptr,\n"
754b8e80941Smrg    "             out uint z1Ptr,\n"
755b8e80941Smrg    "             out uint z2Ptr,\n"
756b8e80941Smrg    "             out uint z3Ptr)\n"
757b8e80941Smrg    "{\n"
758b8e80941Smrg    "   uint z0 = 0u;\n"
759b8e80941Smrg    "   uint z1 = 0u;\n"
760b8e80941Smrg    "   uint z2 = 0u;\n"
761b8e80941Smrg    "   uint z3 = 0u;\n"
762b8e80941Smrg    "   uint more1 = 0u;\n"
763b8e80941Smrg    "   uint more2 = 0u;\n"
764b8e80941Smrg    "\n"
765b8e80941Smrg    "   __mul32To64(a1, b1, z2, z3);\n"
766b8e80941Smrg    "   __mul32To64(a1, b0, z1, more2);\n"
767b8e80941Smrg    "   __add64(z1, more2, 0u, z2, z1, z2);\n"
768b8e80941Smrg    "   __mul32To64(a0, b0, z0, more1);\n"
769b8e80941Smrg    "   __add64(z0, more1, 0u, z1, z0, z1);\n"
770b8e80941Smrg    "   __mul32To64(a0, b1, more1, more2);\n"
771b8e80941Smrg    "   __add64(more1, more2, 0u, z2, more1, z2);\n"
772b8e80941Smrg    "   __add64(z0, z1, 0u, more1, z0, z1);\n"
773b8e80941Smrg    "   z3Ptr = z3;\n"
774b8e80941Smrg    "   z2Ptr = z2;\n"
775b8e80941Smrg    "   z1Ptr = z1;\n"
776b8e80941Smrg    "   z0Ptr = z0;\n"
777b8e80941Smrg    "}\n"
778b8e80941Smrg    "\n"
779b8e80941Smrg    "/* Normalizes the subnormal double-precision floating-point value represented\n"
780b8e80941Smrg    " * by the denormalized significand formed by the concatenation of `aFrac0' and\n"
781b8e80941Smrg    " * `aFrac1'.  The normalized exponent is stored at the location pointed to by\n"
782b8e80941Smrg    " * `zExpPtr'.  The most significant 21 bits of the normalized significand are\n"
783b8e80941Smrg    " * stored at the location pointed to by `zFrac0Ptr', and the least significant\n"
784b8e80941Smrg    " * 32 bits of the normalized significand are stored at the location pointed to\n"
785b8e80941Smrg    " * by `zFrac1Ptr'.\n"
786b8e80941Smrg    " */\n"
787b8e80941Smrg    "void\n"
788b8e80941Smrg    "__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1,\n"
789b8e80941Smrg    "                            out int zExpPtr,\n"
790b8e80941Smrg    "                            out uint zFrac0Ptr,\n"
791b8e80941Smrg    "                            out uint zFrac1Ptr)\n"
792b8e80941Smrg    "{\n"
793b8e80941Smrg    "   int shiftCount;\n"
794b8e80941Smrg    "   uint temp_zfrac0, temp_zfrac1;\n"
795b8e80941Smrg    "   shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11;\n"
796b8e80941Smrg    "   zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u);\n"
797b8e80941Smrg    "\n"
798b8e80941Smrg    "   temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0);\n"
799b8e80941Smrg    "   temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0);\n"
800b8e80941Smrg    "\n"
801b8e80941Smrg    "   __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr);\n"
802b8e80941Smrg    "\n"
803b8e80941Smrg    "   zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0);\n"
804b8e80941Smrg    "   zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0);\n"
805b8e80941Smrg    "}\n"
806b8e80941Smrg    "\n"
807b8e80941Smrg    "/* Returns the result of multiplying the double-precision floating-point values\n"
808b8e80941Smrg    " * `a' and `b'.  The operation is performed according to the IEEE Standard for\n"
809b8e80941Smrg    " * Floating-Point Arithmetic.\n"
810b8e80941Smrg    " */\n"
811b8e80941Smrg    "uint64_t\n"
812b8e80941Smrg    "__fmul64(uint64_t a, uint64_t b)\n"
813b8e80941Smrg    "{\n"
814b8e80941Smrg    "   uint zFrac0 = 0u;\n"
815b8e80941Smrg    "   uint zFrac1 = 0u;\n"
816b8e80941Smrg    "   uint zFrac2 = 0u;\n"
817b8e80941Smrg    "   uint zFrac3 = 0u;\n"
818b8e80941Smrg    "   int zExp;\n"
819b8e80941Smrg    "\n"
820b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
821b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
822b8e80941Smrg    "   uint bFracLo = __extractFloat64FracLo(b);\n"
823b8e80941Smrg    "   uint bFracHi = __extractFloat64FracHi(b);\n"
824b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
825b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
826b8e80941Smrg    "   int bExp = __extractFloat64Exp(b);\n"
827b8e80941Smrg    "   uint bSign = __extractFloat64Sign(b);\n"
828b8e80941Smrg    "   uint zSign = aSign ^ bSign;\n"
829b8e80941Smrg    "   if (aExp == 0x7FF) {\n"
830b8e80941Smrg    "      if (((aFracHi | aFracLo) != 0u) ||\n"
831b8e80941Smrg    "         ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) {\n"
832b8e80941Smrg    "         return __propagateFloat64NaN(a, b);\n"
833b8e80941Smrg    "      }\n"
834b8e80941Smrg    "      if ((uint(bExp) | bFracHi | bFracLo) == 0u)\n"
835b8e80941Smrg    "            return 0xFFFFFFFFFFFFFFFFUL;\n"
836b8e80941Smrg    "      return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
837b8e80941Smrg    "   }\n"
838b8e80941Smrg    "   if (bExp == 0x7FF) {\n"
839b8e80941Smrg    "      if ((bFracHi | bFracLo) != 0u)\n"
840b8e80941Smrg    "         return __propagateFloat64NaN(a, b);\n"
841b8e80941Smrg    "      if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n"
842b8e80941Smrg    "         return 0xFFFFFFFFFFFFFFFFUL;\n"
843b8e80941Smrg    "      return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
844b8e80941Smrg    "   }\n"
845b8e80941Smrg    "   if (aExp == 0) {\n"
846b8e80941Smrg    "      if ((aFracHi | aFracLo) == 0u)\n"
847b8e80941Smrg    "         return __packFloat64(zSign, 0, 0u, 0u);\n"
848b8e80941Smrg    "      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n"
849b8e80941Smrg    "   }\n"
850b8e80941Smrg    "   if (bExp == 0) {\n"
851b8e80941Smrg    "      if ((bFracHi | bFracLo) == 0u)\n"
852b8e80941Smrg    "         return __packFloat64(zSign, 0, 0u, 0u);\n"
853b8e80941Smrg    "      __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo);\n"
854b8e80941Smrg    "   }\n"
855b8e80941Smrg    "   zExp = aExp + bExp - 0x400;\n"
856b8e80941Smrg    "   aFracHi |= 0x00100000u;\n"
857b8e80941Smrg    "   __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo);\n"
858b8e80941Smrg    "   __mul64To128(\n"
859b8e80941Smrg    "      aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3);\n"
860b8e80941Smrg    "   __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1);\n"
861b8e80941Smrg    "   zFrac2 |= uint(zFrac3 != 0u);\n"
862b8e80941Smrg    "   if (0x00200000u <= zFrac0) {\n"
863b8e80941Smrg    "      __shift64ExtraRightJamming(\n"
864b8e80941Smrg    "         zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
865b8e80941Smrg    "      ++zExp;\n"
866b8e80941Smrg    "   }\n"
867b8e80941Smrg    "   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n"
868b8e80941Smrg    "}\n"
869b8e80941Smrg    "\n"
870b8e80941Smrg    "uint64_t\n"
871b8e80941Smrg    "__ffma64(uint64_t a, uint64_t b, uint64_t c)\n"
872b8e80941Smrg    "{\n"
873b8e80941Smrg    "   return __fadd64(__fmul64(a, b), c);\n"
874b8e80941Smrg    "}\n"
875b8e80941Smrg    "\n"
876b8e80941Smrg    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n"
877b8e80941Smrg    " * number of bits given in `count'.  Any bits shifted off are lost.  The value\n"
878b8e80941Smrg    " * of `count' can be arbitrarily large; in particular, if `count' is greater\n"
879b8e80941Smrg    " * than 64, the result will be 0.  The result is broken into two 32-bit pieces\n"
880b8e80941Smrg    " * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
881b8e80941Smrg    " */\n"
882b8e80941Smrg    "void\n"
883b8e80941Smrg    "__shift64Right(uint a0, uint a1,\n"
884b8e80941Smrg    "               int count,\n"
885b8e80941Smrg    "               out uint z0Ptr,\n"
886b8e80941Smrg    "               out uint z1Ptr)\n"
887b8e80941Smrg    "{\n"
888b8e80941Smrg    "   uint z0;\n"
889b8e80941Smrg    "   uint z1;\n"
890b8e80941Smrg    "   int negCount = (-count) & 31;\n"
891b8e80941Smrg    "\n"
892b8e80941Smrg    "   z0 = 0u;\n"
893b8e80941Smrg    "   z0 = mix(z0, (a0 >> count), count < 32);\n"
894b8e80941Smrg    "   z0 = mix(z0, a0, count == 0);\n"
895b8e80941Smrg    "\n"
896b8e80941Smrg    "   z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n"
897b8e80941Smrg    "   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n"
898b8e80941Smrg    "   z1 = mix(z1, a0, count == 0);\n"
899b8e80941Smrg    "\n"
900b8e80941Smrg    "   z1Ptr = z1;\n"
901b8e80941Smrg    "   z0Ptr = z0;\n"
902b8e80941Smrg    "}\n"
903b8e80941Smrg    "\n"
904b8e80941Smrg    "/* Returns the result of converting the double-precision floating-point value\n"
905b8e80941Smrg    " * `a' to the unsigned integer format.  The conversion is performed according\n"
906b8e80941Smrg    " * to the IEEE Standard for Floating-Point Arithmetic.\n"
907b8e80941Smrg    " */\n"
908b8e80941Smrg    "uint\n"
909b8e80941Smrg    "__fp64_to_uint(uint64_t a)\n"
910b8e80941Smrg    "{\n"
911b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
912b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
913b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
914b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
915b8e80941Smrg    "\n"
916b8e80941Smrg    "   if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u))\n"
917b8e80941Smrg    "      return 0xFFFFFFFFu;\n"
918b8e80941Smrg    "\n"
919b8e80941Smrg    "   aFracHi |= mix(0u, 0x00100000u, aExp != 0);\n"
920b8e80941Smrg    "\n"
921b8e80941Smrg    "   int shiftDist = 0x427 - aExp;\n"
922b8e80941Smrg    "   if (0 < shiftDist)\n"
923b8e80941Smrg    "      __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);\n"
924b8e80941Smrg    "\n"
925b8e80941Smrg    "   if ((aFracHi & 0xFFFFF000u) != 0u)\n"
926b8e80941Smrg    "      return mix(~0u, 0u, (aSign != 0u));\n"
927b8e80941Smrg    "\n"
928b8e80941Smrg    "   uint z = 0u;\n"
929b8e80941Smrg    "   uint zero = 0u;\n"
930b8e80941Smrg    "   __shift64Right(aFracHi, aFracLo, 12, zero, z);\n"
931b8e80941Smrg    "\n"
932b8e80941Smrg    "   uint expt = mix(~0u, 0u, (aSign != 0u));\n"
933b8e80941Smrg    "\n"
934b8e80941Smrg    "   return mix(z, expt, (aSign != 0u) && (z != 0u));\n"
935b8e80941Smrg    "}\n"
936b8e80941Smrg    "\n"
937b8e80941Smrg    "uint64_t\n"
938b8e80941Smrg    "__uint_to_fp64(uint a)\n"
939b8e80941Smrg    "{\n"
940b8e80941Smrg    "   if (a == 0u)\n"
941b8e80941Smrg    "      return 0ul;\n"
942b8e80941Smrg    "\n"
943b8e80941Smrg    "   int shiftDist = __countLeadingZeros32(a) + 21;\n"
944b8e80941Smrg    "\n"
945b8e80941Smrg    "   uint aHigh = 0u;\n"
946b8e80941Smrg    "   uint aLow = 0u;\n"
947b8e80941Smrg    "   int negCount = (- shiftDist) & 31;\n"
948b8e80941Smrg    "\n"
949b8e80941Smrg    "   aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64);\n"
950b8e80941Smrg    "   aLow = 0u;\n"
951b8e80941Smrg    "   aHigh = mix(aHigh, 0u, shiftDist == 0);\n"
952b8e80941Smrg    "   aLow = mix(aLow, a, shiftDist ==0);\n"
953b8e80941Smrg    "   aHigh = mix(aHigh, a >> negCount, shiftDist < 32);\n"
954b8e80941Smrg    "   aLow = mix(aLow, a << shiftDist, shiftDist < 32);\n"
955b8e80941Smrg    "\n"
956b8e80941Smrg    "   return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);\n"
957b8e80941Smrg    "}\n"
958b8e80941Smrg    "\n"
959b8e80941Smrg    "uint64_t\n"
960b8e80941Smrg    "__uint64_to_fp64(uint64_t a)\n"
961b8e80941Smrg    "{\n"
962b8e80941Smrg    "   if (a == 0u)\n"
963b8e80941Smrg    "      return 0ul;\n"
964b8e80941Smrg    "\n"
965b8e80941Smrg    "   uvec2 aFrac = unpackUint2x32(a);\n"
966b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
967b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
968b8e80941Smrg    "\n"
969b8e80941Smrg    "   if ((aFracHi & 0x80000000u) != 0u) {\n"
970b8e80941Smrg    "      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);\n"
971b8e80941Smrg    "      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);\n"
972b8e80941Smrg    "   } else {\n"
973b8e80941Smrg    "      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);\n"
974b8e80941Smrg    "   }\n"
975b8e80941Smrg    "}\n"
976b8e80941Smrg    "\n"
977b8e80941Smrg    "uint64_t\n"
978b8e80941Smrg    "__fp64_to_uint64(uint64_t a)\n"
979b8e80941Smrg    "{\n"
980b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
981b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
982b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
983b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
984b8e80941Smrg    "   uint zFrac2 = 0u;\n"
985b8e80941Smrg    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
986b8e80941Smrg    "\n"
987b8e80941Smrg    "   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n"
988b8e80941Smrg    "   int shiftCount = 0x433 - aExp;\n"
989b8e80941Smrg    "\n"
990b8e80941Smrg    "   if ( shiftCount <= 0 ) {\n"
991b8e80941Smrg    "      if (shiftCount < -11 && aExp == 0x7FF) {\n"
992b8e80941Smrg    "         if ((aFracHi | aFracLo) != 0u)\n"
993b8e80941Smrg    "            return __propagateFloat64NaN(a, a);\n"
994b8e80941Smrg    "         return mix(default_nan, a, aSign == 0u);\n"
995b8e80941Smrg    "      }\n"
996b8e80941Smrg    "      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n"
997b8e80941Smrg    "   } else {\n"
998b8e80941Smrg    "      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n"
999b8e80941Smrg    "                                 aFracHi, aFracLo, zFrac2);\n"
1000b8e80941Smrg    "   }\n"
1001b8e80941Smrg    "   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);\n"
1002b8e80941Smrg    "}\n"
1003b8e80941Smrg    "\n"
1004b8e80941Smrg    "int64_t\n"
1005b8e80941Smrg    "__fp64_to_int64(uint64_t a)\n"
1006b8e80941Smrg    "{\n"
1007b8e80941Smrg    "   uint zFrac2 = 0u;\n"
1008b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1009b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1010b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
1011b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
1012b8e80941Smrg    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
1013b8e80941Smrg    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
1014b8e80941Smrg    "\n"
1015b8e80941Smrg    "   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n"
1016b8e80941Smrg    "   int shiftCount = 0x433 - aExp;\n"
1017b8e80941Smrg    "\n"
1018b8e80941Smrg    "   if (shiftCount <= 0) {\n"
1019b8e80941Smrg    "      if (shiftCount < -11 && aExp == 0x7FF) {\n"
1020b8e80941Smrg    "         if ((aFracHi | aFracLo) != 0u)\n"
1021b8e80941Smrg    "            return default_NegNaN;\n"
1022b8e80941Smrg    "         return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n"
1023b8e80941Smrg    "      }\n"
1024b8e80941Smrg    "      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n"
1025b8e80941Smrg    "   } else {\n"
1026b8e80941Smrg    "      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n"
1027b8e80941Smrg    "                                 aFracHi, aFracLo, zFrac2);\n"
1028b8e80941Smrg    "   }\n"
1029b8e80941Smrg    "\n"
1030b8e80941Smrg    "   return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2);\n"
1031b8e80941Smrg    "}\n"
1032b8e80941Smrg    "\n"
1033b8e80941Smrg    "uint64_t\n"
1034b8e80941Smrg    "__fp32_to_uint64(float f)\n"
1035b8e80941Smrg    "{\n"
1036b8e80941Smrg    "   uint a = floatBitsToUint(f);\n"
1037b8e80941Smrg    "   uint aFrac = a & 0x007FFFFFu;\n"
1038b8e80941Smrg    "   int aExp = int((a>>23) & 0xFFu);\n"
1039b8e80941Smrg    "   uint aSign = a>>31;\n"
1040b8e80941Smrg    "   uint zFrac0 = 0u;\n"
1041b8e80941Smrg    "   uint zFrac1 = 0u;\n"
1042b8e80941Smrg    "   uint zFrac2 = 0u;\n"
1043b8e80941Smrg    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
1044b8e80941Smrg    "   int shiftCount = 0xBE - aExp;\n"
1045b8e80941Smrg    "\n"
1046b8e80941Smrg    "   if (shiftCount <0) {\n"
1047b8e80941Smrg    "      if (aExp == 0xFF)\n"
1048b8e80941Smrg    "         return default_nan;\n"
1049b8e80941Smrg    "   }\n"
1050b8e80941Smrg    "\n"
1051b8e80941Smrg    "   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n"
1052b8e80941Smrg    "   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n"
1053b8e80941Smrg    "\n"
1054b8e80941Smrg    "   if (shiftCount != 0) {\n"
1055b8e80941Smrg    "      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n"
1056b8e80941Smrg    "                                 zFrac0, zFrac1, zFrac2);\n"
1057b8e80941Smrg    "   }\n"
1058b8e80941Smrg    "\n"
1059b8e80941Smrg    "   return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2);\n"
1060b8e80941Smrg    "}\n"
1061b8e80941Smrg    "\n"
1062b8e80941Smrg    "int64_t\n"
1063b8e80941Smrg    "__fp32_to_int64(float f)\n"
1064b8e80941Smrg    "{\n"
1065b8e80941Smrg    "   uint a = floatBitsToUint(f);\n"
1066b8e80941Smrg    "   uint aFrac = a & 0x007FFFFFu;\n"
1067b8e80941Smrg    "   int aExp = int((a>>23) & 0xFFu);\n"
1068b8e80941Smrg    "   uint aSign = a>>31;\n"
1069b8e80941Smrg    "   uint zFrac0 = 0u;\n"
1070b8e80941Smrg    "   uint zFrac1 = 0u;\n"
1071b8e80941Smrg    "   uint zFrac2 = 0u;\n"
1072b8e80941Smrg    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
1073b8e80941Smrg    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
1074b8e80941Smrg    "   int shiftCount = 0xBE - aExp;\n"
1075b8e80941Smrg    "\n"
1076b8e80941Smrg    "   if (shiftCount <0) {\n"
1077b8e80941Smrg    "      if (aExp == 0xFF && aFrac != 0u)\n"
1078b8e80941Smrg    "         return default_NegNaN;\n"
1079b8e80941Smrg    "      return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n"
1080b8e80941Smrg    "   }\n"
1081b8e80941Smrg    "\n"
1082b8e80941Smrg    "   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n"
1083b8e80941Smrg    "   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n"
1084b8e80941Smrg    "\n"
1085b8e80941Smrg    "   if (shiftCount != 0) {\n"
1086b8e80941Smrg    "      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n"
1087b8e80941Smrg    "                                 zFrac0, zFrac1, zFrac2);\n"
1088b8e80941Smrg    "   }\n"
1089b8e80941Smrg    "\n"
1090b8e80941Smrg    "   return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2);\n"
1091b8e80941Smrg    "}\n"
1092b8e80941Smrg    "\n"
1093b8e80941Smrg    "uint64_t\n"
1094b8e80941Smrg    "__int64_to_fp64(int64_t a)\n"
1095b8e80941Smrg    "{\n"
1096b8e80941Smrg    "   if (a==0)\n"
1097b8e80941Smrg    "      return 0ul;\n"
1098b8e80941Smrg    "\n"
1099b8e80941Smrg    "   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);\n"
1100b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(absA);\n"
1101b8e80941Smrg    "   uvec2 aFrac = unpackUint2x32(absA);\n"
1102b8e80941Smrg    "   uint zSign = uint(a < 0);\n"
1103b8e80941Smrg    "\n"
1104b8e80941Smrg    "   if ((aFracHi & 0x80000000u) != 0u) {\n"
1105b8e80941Smrg    "      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);\n"
1106b8e80941Smrg    "   }\n"
1107b8e80941Smrg    "\n"
1108b8e80941Smrg    "   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);\n"
1109b8e80941Smrg    "}\n"
1110b8e80941Smrg    "\n"
1111b8e80941Smrg    "/* Returns the result of converting the double-precision floating-point value\n"
1112b8e80941Smrg    " * `a' to the 32-bit two's complement integer format.  The conversion is\n"
1113b8e80941Smrg    " * performed according to the IEEE Standard for Floating-Point Arithmetic---\n"
1114b8e80941Smrg    " * which means in particular that the conversion is rounded according to the\n"
1115b8e80941Smrg    " * current rounding mode.  If `a' is a NaN, the largest positive integer is\n"
1116b8e80941Smrg    " * returned.  Otherwise, if the conversion overflows, the largest integer with\n"
1117b8e80941Smrg    " * the same sign as `a' is returned.\n"
1118b8e80941Smrg    " */\n"
1119b8e80941Smrg    "int\n"
1120b8e80941Smrg    "__fp64_to_int(uint64_t a)\n"
1121b8e80941Smrg    "{\n"
1122b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1123b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1124b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
1125b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
1126b8e80941Smrg    "\n"
1127b8e80941Smrg    "   uint absZ = 0u;\n"
1128b8e80941Smrg    "   uint aFracExtra = 0u;\n"
1129b8e80941Smrg    "   int shiftCount = aExp - 0x413;\n"
1130b8e80941Smrg    "\n"
1131b8e80941Smrg    "   if (0 <= shiftCount) {\n"
1132b8e80941Smrg    "      if (0x41E < aExp) {\n"
1133b8e80941Smrg    "         if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))\n"
1134b8e80941Smrg    "            aSign = 0u;\n"
1135b8e80941Smrg    "         return mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n"
1136b8e80941Smrg    "      }\n"
1137b8e80941Smrg    "      __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);\n"
1138b8e80941Smrg    "   } else {\n"
1139b8e80941Smrg    "      if (aExp < 0x3FF)\n"
1140b8e80941Smrg    "         return 0;\n"
1141b8e80941Smrg    "\n"
1142b8e80941Smrg    "      aFracHi |= 0x00100000u;\n"
1143b8e80941Smrg    "      aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo;\n"
1144b8e80941Smrg    "      absZ = aFracHi >> (- shiftCount);\n"
1145b8e80941Smrg    "   }\n"
1146b8e80941Smrg    "\n"
1147b8e80941Smrg    "   int z = mix(int(absZ), -int(absZ), (aSign != 0u));\n"
1148b8e80941Smrg    "   int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n"
1149b8e80941Smrg    "   return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));\n"
1150b8e80941Smrg    "}\n"
1151b8e80941Smrg    "\n"
1152b8e80941Smrg    "/* Returns the result of converting the 32-bit two's complement integer `a'\n"
1153b8e80941Smrg    " * to the double-precision floating-point format.  The conversion is performed\n"
1154b8e80941Smrg    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
1155b8e80941Smrg    " */\n"
1156b8e80941Smrg    "uint64_t\n"
1157b8e80941Smrg    "__int_to_fp64(int a)\n"
1158b8e80941Smrg    "{\n"
1159b8e80941Smrg    "   uint zFrac0 = 0u;\n"
1160b8e80941Smrg    "   uint zFrac1 = 0u;\n"
1161b8e80941Smrg    "   if (a==0)\n"
1162b8e80941Smrg    "      return __packFloat64(0u, 0, 0u, 0u);\n"
1163b8e80941Smrg    "   uint zSign = uint(a < 0);\n"
1164b8e80941Smrg    "   uint absA = mix(uint(a), uint(-a), a < 0);\n"
1165b8e80941Smrg    "   int shiftCount = __countLeadingZeros32(absA) - 11;\n"
1166b8e80941Smrg    "   if (0 <= shiftCount) {\n"
1167b8e80941Smrg    "      zFrac0 = absA << shiftCount;\n"
1168b8e80941Smrg    "      zFrac1 = 0u;\n"
1169b8e80941Smrg    "   } else {\n"
1170b8e80941Smrg    "      __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1);\n"
1171b8e80941Smrg    "   }\n"
1172b8e80941Smrg    "   return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);\n"
1173b8e80941Smrg    "}\n"
1174b8e80941Smrg    "\n"
1175b8e80941Smrg    "bool\n"
1176b8e80941Smrg    "__fp64_to_bool(uint64_t a)\n"
1177b8e80941Smrg    "{\n"
1178b8e80941Smrg    "   return !__feq64_nonnan(__fabs64(a), 0ul);\n"
1179b8e80941Smrg    "}\n"
1180b8e80941Smrg    "\n"
1181b8e80941Smrg    "uint64_t\n"
1182b8e80941Smrg    "__bool_to_fp64(bool a)\n"
1183b8e80941Smrg    "{\n"
1184b8e80941Smrg    "   return __int_to_fp64(int(a));\n"
1185b8e80941Smrg    "}\n"
1186b8e80941Smrg    "\n"
1187b8e80941Smrg    "/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a\n"
1188b8e80941Smrg    " * single-precision floating-point value, returning the result.  After being\n"
1189b8e80941Smrg    " * shifted into the proper positions, the three fields are simply added\n"
1190b8e80941Smrg    " * together to form the result.  This means that any integer portion of `zSig'\n"
1191b8e80941Smrg    " * will be added into the exponent.  Since a properly normalized significand\n"
1192b8e80941Smrg    " * will have an integer portion equal to 1, the `zExp' input should be 1 less\n"
1193b8e80941Smrg    " * than the desired result exponent whenever `zFrac' is a complete, normalized\n"
1194b8e80941Smrg    " * significand.\n"
1195b8e80941Smrg    " */\n"
1196b8e80941Smrg    "float\n"
1197b8e80941Smrg    "__packFloat32(uint zSign, int zExp, uint zFrac)\n"
1198b8e80941Smrg    "{\n"
1199b8e80941Smrg    "   return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);\n"
1200b8e80941Smrg    "}\n"
1201b8e80941Smrg    "\n"
1202b8e80941Smrg    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
1203b8e80941Smrg    " * and significand `zFrac', and returns the proper single-precision floating-\n"
1204b8e80941Smrg    " * point value corresponding to the abstract input.  Ordinarily, the abstract\n"
1205b8e80941Smrg    " * value is simply rounded and packed into the single-precision format, with\n"
1206b8e80941Smrg    " * the inexact exception raised if the abstract input cannot be represented\n"
1207b8e80941Smrg    " * exactly.  However, if the abstract value is too large, the overflow and\n"
1208b8e80941Smrg    " * inexact exceptions are raised and an infinity or maximal finite value is\n"
1209b8e80941Smrg    " * returned.  If the abstract value is too small, the input value is rounded to\n"
1210b8e80941Smrg    " * a subnormal number, and the underflow and inexact exceptions are raised if\n"
1211b8e80941Smrg    " * the abstract input cannot be represented exactly as a subnormal single-\n"
1212b8e80941Smrg    " * precision floating-point number.\n"
1213b8e80941Smrg    " *     The input significand `zFrac' has its binary point between bits 30\n"
1214b8e80941Smrg    " * and 29, which is 7 bits to the left of the usual location.  This shifted\n"
1215b8e80941Smrg    " * significand must be normalized or smaller.  If `zFrac' is not normalized,\n"
1216b8e80941Smrg    " * `zExp' must be 0; in that case, the result returned is a subnormal number,\n"
1217b8e80941Smrg    " * and it must not require rounding.  In the usual case that `zFrac' is\n"
1218b8e80941Smrg    " * normalized, `zExp' must be 1 less than the \"true\" floating-point exponent.\n"
1219b8e80941Smrg    " * The handling of underflow and overflow follows the IEEE Standard for\n"
1220b8e80941Smrg    " * Floating-Point Arithmetic.\n"
1221b8e80941Smrg    " */\n"
1222b8e80941Smrg    "float\n"
1223b8e80941Smrg    "__roundAndPackFloat32(uint zSign, int zExp, uint zFrac)\n"
1224b8e80941Smrg    "{\n"
1225b8e80941Smrg    "   bool roundNearestEven;\n"
1226b8e80941Smrg    "   int roundIncrement;\n"
1227b8e80941Smrg    "   int roundBits;\n"
1228b8e80941Smrg    "\n"
1229b8e80941Smrg    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
1230b8e80941Smrg    "   roundIncrement = 0x40;\n"
1231b8e80941Smrg    "   if (!roundNearestEven) {\n"
1232b8e80941Smrg    "      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n"
1233b8e80941Smrg    "         roundIncrement = 0;\n"
1234b8e80941Smrg    "      } else {\n"
1235b8e80941Smrg    "         roundIncrement = 0x7F;\n"
1236b8e80941Smrg    "         if (zSign != 0u) {\n"
1237b8e80941Smrg    "            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)\n"
1238b8e80941Smrg    "               roundIncrement = 0;\n"
1239b8e80941Smrg    "         } else {\n"
1240b8e80941Smrg    "            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN)\n"
1241b8e80941Smrg    "               roundIncrement = 0;\n"
1242b8e80941Smrg    "         }\n"
1243b8e80941Smrg    "      }\n"
1244b8e80941Smrg    "   }\n"
1245b8e80941Smrg    "   roundBits = int(zFrac & 0x7Fu);\n"
1246b8e80941Smrg    "   if (0xFDu <= uint(zExp)) {\n"
1247b8e80941Smrg    "      if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0))\n"
1248b8e80941Smrg    "         return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0);\n"
1249b8e80941Smrg    "      int count = -zExp;\n"
1250b8e80941Smrg    "      bool zexp_lt0 = zExp < 0;\n"
1251b8e80941Smrg    "      uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32);\n"
1252b8e80941Smrg    "      zFrac = mix(zFrac, zFrac_lt0, zexp_lt0);\n"
1253b8e80941Smrg    "      roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0);\n"
1254b8e80941Smrg    "      zExp = mix(zExp, 0, zexp_lt0);\n"
1255b8e80941Smrg    "   }\n"
1256b8e80941Smrg    "   zFrac = (zFrac + uint(roundIncrement))>>7;\n"
1257b8e80941Smrg    "   zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven);\n"
1258b8e80941Smrg    "\n"
1259b8e80941Smrg    "   return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac);\n"
1260b8e80941Smrg    "}\n"
1261b8e80941Smrg    "\n"
1262b8e80941Smrg    "/* Returns the result of converting the double-precision floating-point value\n"
1263b8e80941Smrg    " * `a' to the single-precision floating-point format.  The conversion is\n"
1264b8e80941Smrg    " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n"
1265b8e80941Smrg    " */\n"
1266b8e80941Smrg    "float\n"
1267b8e80941Smrg    "__fp64_to_fp32(uint64_t __a)\n"
1268b8e80941Smrg    "{\n"
1269b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
1270b8e80941Smrg    "   uint zFrac = 0u;\n"
1271b8e80941Smrg    "   uint allZero = 0u;\n"
1272b8e80941Smrg    "\n"
1273b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(__a);\n"
1274b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(__a);\n"
1275b8e80941Smrg    "   int aExp = __extractFloat64Exp(__a);\n"
1276b8e80941Smrg    "   uint aSign = __extractFloat64Sign(__a);\n"
1277b8e80941Smrg    "   if (aExp == 0x7FF) {\n"
1278b8e80941Smrg    "      __shortShift64Left(a.y, a.x, 12, a.y, a.x);\n"
1279b8e80941Smrg    "      float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));\n"
1280b8e80941Smrg    "      rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);\n"
1281b8e80941Smrg    "      return rval;\n"
1282b8e80941Smrg    "   }\n"
1283b8e80941Smrg    "   __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac);\n"
1284b8e80941Smrg    "   zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0);\n"
1285b8e80941Smrg    "   return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);\n"
1286b8e80941Smrg    "}\n"
1287b8e80941Smrg    "\n"
1288b8e80941Smrg    "float\n"
1289b8e80941Smrg    "__uint64_to_fp32(uint64_t __a)\n"
1290b8e80941Smrg    "{\n"
1291b8e80941Smrg    "   uint zFrac = 0u;\n"
1292b8e80941Smrg    "   uvec2 aFrac = unpackUint2x32(__a);\n"
1293b8e80941Smrg    "   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n"
1294b8e80941Smrg    "   shiftCount -= mix(40, 8, aFrac.y == 0u);\n"
1295b8e80941Smrg    "\n"
1296b8e80941Smrg    "   if (0 <= shiftCount) {\n"
1297b8e80941Smrg    "      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n"
1298b8e80941Smrg    "      bool is_zero = (aFrac.y | aFrac.x) == 0u;\n"
1299b8e80941Smrg    "      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);\n"
1300b8e80941Smrg    "   }\n"
1301b8e80941Smrg    "\n"
1302b8e80941Smrg    "   shiftCount += 7;\n"
1303b8e80941Smrg    "   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n"
1304b8e80941Smrg    "   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n"
1305b8e80941Smrg    "   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);\n"
1306b8e80941Smrg    "}\n"
1307b8e80941Smrg    "\n"
1308b8e80941Smrg    "float\n"
1309b8e80941Smrg    "__int64_to_fp32(int64_t __a)\n"
1310b8e80941Smrg    "{\n"
1311b8e80941Smrg    "   uint zFrac = 0u;\n"
1312b8e80941Smrg    "   uint aSign = uint(__a < 0);\n"
1313b8e80941Smrg    "   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);\n"
1314b8e80941Smrg    "   uvec2 aFrac = unpackUint2x32(absA);\n"
1315b8e80941Smrg    "   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n"
1316b8e80941Smrg    "   shiftCount -= mix(40, 8, aFrac.y == 0u);\n"
1317b8e80941Smrg    "\n"
1318b8e80941Smrg    "   if (0 <= shiftCount) {\n"
1319b8e80941Smrg    "      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n"
1320b8e80941Smrg    "      bool is_zero = (aFrac.y | aFrac.x) == 0u;\n"
1321b8e80941Smrg    "      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);\n"
1322b8e80941Smrg    "   }\n"
1323b8e80941Smrg    "\n"
1324b8e80941Smrg    "   shiftCount += 7;\n"
1325b8e80941Smrg    "   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n"
1326b8e80941Smrg    "   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n"
1327b8e80941Smrg    "   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);\n"
1328b8e80941Smrg    "}\n"
1329b8e80941Smrg    "\n"
1330b8e80941Smrg    "/* Returns the result of converting the single-precision floating-point value\n"
1331b8e80941Smrg    " * `a' to the double-precision floating-point format.\n"
1332b8e80941Smrg    " */\n"
1333b8e80941Smrg    "uint64_t\n"
1334b8e80941Smrg    "__fp32_to_fp64(float f)\n"
1335b8e80941Smrg    "{\n"
1336b8e80941Smrg    "   uint a = floatBitsToUint(f);\n"
1337b8e80941Smrg    "   uint aFrac = a & 0x007FFFFFu;\n"
1338b8e80941Smrg    "   int aExp = int((a>>23) & 0xFFu);\n"
1339b8e80941Smrg    "   uint aSign = a>>31;\n"
1340b8e80941Smrg    "   uint zFrac0 = 0u;\n"
1341b8e80941Smrg    "   uint zFrac1 = 0u;\n"
1342b8e80941Smrg    "\n"
1343b8e80941Smrg    "   if (aExp == 0xFF) {\n"
1344b8e80941Smrg    "      if (aFrac != 0u) {\n"
1345b8e80941Smrg    "         uint nanLo = 0u;\n"
1346b8e80941Smrg    "         uint nanHi = a<<9;\n"
1347b8e80941Smrg    "         __shift64Right(nanHi, nanLo, 12, nanHi, nanLo);\n"
1348b8e80941Smrg    "         nanHi |= ((aSign<<31) | 0x7FF80000u);\n"
1349b8e80941Smrg    "         return packUint2x32(uvec2(nanLo, nanHi));\n"
1350b8e80941Smrg    "      }\n"
1351b8e80941Smrg    "      return __packFloat64(aSign, 0x7FF, 0u, 0u);\n"
1352b8e80941Smrg    "    }\n"
1353b8e80941Smrg    "\n"
1354b8e80941Smrg    "   if (aExp == 0) {\n"
1355b8e80941Smrg    "      if (aFrac == 0u)\n"
1356b8e80941Smrg    "         return __packFloat64(aSign, 0, 0u, 0u);\n"
1357b8e80941Smrg    "      /* Normalize subnormal */\n"
1358b8e80941Smrg    "      int shiftCount = __countLeadingZeros32(aFrac) - 8;\n"
1359b8e80941Smrg    "      aFrac <<= shiftCount;\n"
1360b8e80941Smrg    "      aExp = 1 - shiftCount;\n"
1361b8e80941Smrg    "      --aExp;\n"
1362b8e80941Smrg    "   }\n"
1363b8e80941Smrg    "\n"
1364b8e80941Smrg    "   __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);\n"
1365b8e80941Smrg    "   return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);\n"
1366b8e80941Smrg    "}\n"
1367b8e80941Smrg    "\n"
1368b8e80941Smrg    "/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the\n"
1369b8e80941Smrg    " * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is\n"
1370b8e80941Smrg    " * modulo 2^96, so any carry out is lost.  The result is broken into three\n"
1371b8e80941Smrg    " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',\n"
1372b8e80941Smrg    " * `z1Ptr', and `z2Ptr'.\n"
1373b8e80941Smrg    " */\n"
1374b8e80941Smrg    "void\n"
1375b8e80941Smrg    "__add96(uint a0, uint a1, uint a2,\n"
1376b8e80941Smrg    "        uint b0, uint b1, uint b2,\n"
1377b8e80941Smrg    "        out uint z0Ptr,\n"
1378b8e80941Smrg    "        out uint z1Ptr,\n"
1379b8e80941Smrg    "        out uint z2Ptr)\n"
1380b8e80941Smrg    "{\n"
1381b8e80941Smrg    "   uint z2 = a2 + b2;\n"
1382b8e80941Smrg    "   uint carry1 = uint(z2 < a2);\n"
1383b8e80941Smrg    "   uint z1 = a1 + b1;\n"
1384b8e80941Smrg    "   uint carry0 = uint(z1 < a1);\n"
1385b8e80941Smrg    "   uint z0 = a0 + b0;\n"
1386b8e80941Smrg    "   z1 += carry1;\n"
1387b8e80941Smrg    "   z0 += uint(z1 < carry1);\n"
1388b8e80941Smrg    "   z0 += carry0;\n"
1389b8e80941Smrg    "   z2Ptr = z2;\n"
1390b8e80941Smrg    "   z1Ptr = z1;\n"
1391b8e80941Smrg    "   z0Ptr = z0;\n"
1392b8e80941Smrg    "}\n"
1393b8e80941Smrg    "\n"
1394b8e80941Smrg    "/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from\n"
1395b8e80941Smrg    " * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction\n"
1396b8e80941Smrg    " * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken\n"
1397b8e80941Smrg    " * into three 32-bit pieces which are stored at the locations pointed to by\n"
1398b8e80941Smrg    " * `z0Ptr', `z1Ptr', and `z2Ptr'.\n"
1399b8e80941Smrg    " */\n"
1400b8e80941Smrg    "void\n"
1401b8e80941Smrg    "__sub96(uint a0, uint a1, uint a2,\n"
1402b8e80941Smrg    "        uint b0, uint b1, uint b2,\n"
1403b8e80941Smrg    "        out uint z0Ptr,\n"
1404b8e80941Smrg    "        out uint z1Ptr,\n"
1405b8e80941Smrg    "        out uint z2Ptr)\n"
1406b8e80941Smrg    "{\n"
1407b8e80941Smrg    "   uint z2 = a2 - b2;\n"
1408b8e80941Smrg    "   uint borrow1 = uint(a2 < b2);\n"
1409b8e80941Smrg    "   uint z1 = a1 - b1;\n"
1410b8e80941Smrg    "   uint borrow0 = uint(a1 < b1);\n"
1411b8e80941Smrg    "   uint z0 = a0 - b0;\n"
1412b8e80941Smrg    "   z0 -= uint(z1 < borrow1);\n"
1413b8e80941Smrg    "   z1 -= borrow1;\n"
1414b8e80941Smrg    "   z0 -= borrow0;\n"
1415b8e80941Smrg    "   z2Ptr = z2;\n"
1416b8e80941Smrg    "   z1Ptr = z1;\n"
1417b8e80941Smrg    "   z0Ptr = z0;\n"
1418b8e80941Smrg    "}\n"
1419b8e80941Smrg    "\n"
1420b8e80941Smrg    "/* Returns an approximation to the 32-bit integer quotient obtained by dividing\n"
1421b8e80941Smrg    " * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The\n"
1422b8e80941Smrg    " * divisor `b' must be at least 2^31.  If q is the exact quotient truncated\n"
1423b8e80941Smrg    " * toward zero, the approximation returned lies between q and q + 2 inclusive.\n"
1424b8e80941Smrg    " * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit\n"
1425b8e80941Smrg    " * unsigned integer is returned.\n"
1426b8e80941Smrg    " */\n"
1427b8e80941Smrg    "uint\n"
1428b8e80941Smrg    "__estimateDiv64To32(uint a0, uint a1, uint b)\n"
1429b8e80941Smrg    "{\n"
1430b8e80941Smrg    "   uint b0;\n"
1431b8e80941Smrg    "   uint b1;\n"
1432b8e80941Smrg    "   uint rem0 = 0u;\n"
1433b8e80941Smrg    "   uint rem1 = 0u;\n"
1434b8e80941Smrg    "   uint term0 = 0u;\n"
1435b8e80941Smrg    "   uint term1 = 0u;\n"
1436b8e80941Smrg    "   uint z;\n"
1437b8e80941Smrg    "\n"
1438b8e80941Smrg    "   if (b <= a0)\n"
1439b8e80941Smrg    "      return 0xFFFFFFFFu;\n"
1440b8e80941Smrg    "   b0 = b>>16;\n"
1441b8e80941Smrg    "   z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;\n"
1442b8e80941Smrg    "   __mul32To64(b, z, term0, term1);\n"
1443b8e80941Smrg    "   __sub64(a0, a1, term0, term1, rem0, rem1);\n"
1444b8e80941Smrg    "   while (int(rem0) < 0) {\n"
1445b8e80941Smrg    "      z -= 0x10000u;\n"
1446b8e80941Smrg    "      b1 = b<<16;\n"
1447b8e80941Smrg    "      __add64(rem0, rem1, b0, b1, rem0, rem1);\n"
1448b8e80941Smrg    "   }\n"
1449b8e80941Smrg    "   rem0 = (rem0<<16) | (rem1>>16);\n"
1450b8e80941Smrg    "   z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;\n"
1451b8e80941Smrg    "   return z;\n"
1452b8e80941Smrg    "}\n"
1453b8e80941Smrg    "\n"
1454b8e80941Smrg    "uint\n"
1455b8e80941Smrg    "__sqrtOddAdjustments(int index)\n"
1456b8e80941Smrg    "{\n"
1457b8e80941Smrg    "   uint res = 0u;\n"
1458b8e80941Smrg    "   if (index == 0)\n"
1459b8e80941Smrg    "      res = 0x0004u;\n"
1460b8e80941Smrg    "   if (index == 1)\n"
1461b8e80941Smrg    "      res = 0x0022u;\n"
1462b8e80941Smrg    "   if (index == 2)\n"
1463b8e80941Smrg    "      res = 0x005Du;\n"
1464b8e80941Smrg    "   if (index == 3)\n"
1465b8e80941Smrg    "      res = 0x00B1u;\n"
1466b8e80941Smrg    "   if (index == 4)\n"
1467b8e80941Smrg    "      res = 0x011Du;\n"
1468b8e80941Smrg    "   if (index == 5)\n"
1469b8e80941Smrg    "      res = 0x019Fu;\n"
1470b8e80941Smrg    "   if (index == 6)\n"
1471b8e80941Smrg    "      res = 0x0236u;\n"
1472b8e80941Smrg    "   if (index == 7)\n"
1473b8e80941Smrg    "      res = 0x02E0u;\n"
1474b8e80941Smrg    "   if (index == 8)\n"
1475b8e80941Smrg    "      res = 0x039Cu;\n"
1476b8e80941Smrg    "   if (index == 9)\n"
1477b8e80941Smrg    "      res = 0x0468u;\n"
1478b8e80941Smrg    "   if (index == 10)\n"
1479b8e80941Smrg    "      res = 0x0545u;\n"
1480b8e80941Smrg    "   if (index == 11)\n"
1481b8e80941Smrg    "      res = 0x631u;\n"
1482b8e80941Smrg    "   if (index == 12)\n"
1483b8e80941Smrg    "      res = 0x072Bu;\n"
1484b8e80941Smrg    "   if (index == 13)\n"
1485b8e80941Smrg    "      res = 0x0832u;\n"
1486b8e80941Smrg    "   if (index == 14)\n"
1487b8e80941Smrg    "      res = 0x0946u;\n"
1488b8e80941Smrg    "   if (index == 15)\n"
1489b8e80941Smrg    "      res = 0x0A67u;\n"
1490b8e80941Smrg    "\n"
1491b8e80941Smrg    "   return res;\n"
1492b8e80941Smrg    "}\n"
1493b8e80941Smrg    "\n"
1494b8e80941Smrg    "uint\n"
1495b8e80941Smrg    "__sqrtEvenAdjustments(int index)\n"
1496b8e80941Smrg    "{\n"
1497b8e80941Smrg    "   uint res = 0u;\n"
1498b8e80941Smrg    "   if (index == 0)\n"
1499b8e80941Smrg    "      res = 0x0A2Du;\n"
1500b8e80941Smrg    "   if (index == 1)\n"
1501b8e80941Smrg    "      res = 0x08AFu;\n"
1502b8e80941Smrg    "   if (index == 2)\n"
1503b8e80941Smrg    "      res = 0x075Au;\n"
1504b8e80941Smrg    "   if (index == 3)\n"
1505b8e80941Smrg    "      res = 0x0629u;\n"
1506b8e80941Smrg    "   if (index == 4)\n"
1507b8e80941Smrg    "      res = 0x051Au;\n"
1508b8e80941Smrg    "   if (index == 5)\n"
1509b8e80941Smrg    "      res = 0x0429u;\n"
1510b8e80941Smrg    "   if (index == 6)\n"
1511b8e80941Smrg    "      res = 0x0356u;\n"
1512b8e80941Smrg    "   if (index == 7)\n"
1513b8e80941Smrg    "      res = 0x029Eu;\n"
1514b8e80941Smrg    "   if (index == 8)\n"
1515b8e80941Smrg    "      res = 0x0200u;\n"
1516b8e80941Smrg    "   if (index == 9)\n"
1517b8e80941Smrg    "      res = 0x0179u;\n"
1518b8e80941Smrg    "   if (index == 10)\n"
1519b8e80941Smrg    "      res = 0x0109u;\n"
1520b8e80941Smrg    "   if (index == 11)\n"
1521b8e80941Smrg    "      res = 0x00AFu;\n"
1522b8e80941Smrg    "   if (index == 12)\n"
1523b8e80941Smrg    "      res = 0x0068u;\n"
1524b8e80941Smrg    "   if (index == 13)\n"
1525b8e80941Smrg    "      res = 0x0034u;\n"
1526b8e80941Smrg    "   if (index == 14)\n"
1527b8e80941Smrg    "      res = 0x0012u;\n"
1528b8e80941Smrg    "   if (index == 15)\n"
1529b8e80941Smrg    "      res = 0x0002u;\n"
1530b8e80941Smrg    "\n"
1531b8e80941Smrg    "   return res;\n"
1532b8e80941Smrg    "}\n"
1533b8e80941Smrg    "\n"
1534b8e80941Smrg    "/* Returns an approximation to the square root of the 32-bit significand given\n"
1535b8e80941Smrg    " * by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of\n"
1536b8e80941Smrg    " * `aExp' (the least significant bit) is 1, the integer returned approximates\n"
1537b8e80941Smrg    " * 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'\n"
1538b8e80941Smrg    " * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either\n"
1539b8e80941Smrg    " * case, the approximation returned lies strictly within +/-2 of the exact\n"
1540b8e80941Smrg    " * value.\n"
1541b8e80941Smrg    " */\n"
1542b8e80941Smrg    "uint\n"
1543b8e80941Smrg    "__estimateSqrt32(int aExp, uint a)\n"
1544b8e80941Smrg    "{\n"
1545b8e80941Smrg    "   uint z;\n"
1546b8e80941Smrg    "\n"
1547b8e80941Smrg    "   int index = int(a>>27 & 15u);\n"
1548b8e80941Smrg    "   if ((aExp & 1) != 0) {\n"
1549b8e80941Smrg    "      z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index);\n"
1550b8e80941Smrg    "      z = ((a / z)<<14) + (z<<15);\n"
1551b8e80941Smrg    "      a >>= 1;\n"
1552b8e80941Smrg    "   } else {\n"
1553b8e80941Smrg    "      z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index);\n"
1554b8e80941Smrg    "      z = a / z + z;\n"
1555b8e80941Smrg    "      z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);\n"
1556b8e80941Smrg    "      if (z <= a)\n"
1557b8e80941Smrg    "         return uint(int(a)>>1);\n"
1558b8e80941Smrg    "   }\n"
1559b8e80941Smrg    "   return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1);\n"
1560b8e80941Smrg    "}\n"
1561b8e80941Smrg    "\n"
1562b8e80941Smrg    "/* Returns the square root of the double-precision floating-point value `a'.\n"
1563b8e80941Smrg    " * The operation is performed according to the IEEE Standard for Floating-Point\n"
1564b8e80941Smrg    " * Arithmetic.\n"
1565b8e80941Smrg    " */\n"
1566b8e80941Smrg    "uint64_t\n"
1567b8e80941Smrg    "__fsqrt64(uint64_t a)\n"
1568b8e80941Smrg    "{\n"
1569b8e80941Smrg    "   uint zFrac0 = 0u;\n"
1570b8e80941Smrg    "   uint zFrac1 = 0u;\n"
1571b8e80941Smrg    "   uint zFrac2 = 0u;\n"
1572b8e80941Smrg    "   uint doubleZFrac0 = 0u;\n"
1573b8e80941Smrg    "   uint rem0 = 0u;\n"
1574b8e80941Smrg    "   uint rem1 = 0u;\n"
1575b8e80941Smrg    "   uint rem2 = 0u;\n"
1576b8e80941Smrg    "   uint rem3 = 0u;\n"
1577b8e80941Smrg    "   uint term0 = 0u;\n"
1578b8e80941Smrg    "   uint term1 = 0u;\n"
1579b8e80941Smrg    "   uint term2 = 0u;\n"
1580b8e80941Smrg    "   uint term3 = 0u;\n"
1581b8e80941Smrg    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
1582b8e80941Smrg    "\n"
1583b8e80941Smrg    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1584b8e80941Smrg    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1585b8e80941Smrg    "   int aExp = __extractFloat64Exp(a);\n"
1586b8e80941Smrg    "   uint aSign = __extractFloat64Sign(a);\n"
1587b8e80941Smrg    "   if (aExp == 0x7FF) {\n"
1588b8e80941Smrg    "      if ((aFracHi | aFracLo) != 0u)\n"
1589b8e80941Smrg    "         return __propagateFloat64NaN(a, a);\n"
1590b8e80941Smrg    "      if (aSign == 0u)\n"
1591b8e80941Smrg    "         return a;\n"
1592b8e80941Smrg    "      return default_nan;\n"
1593b8e80941Smrg    "   }\n"
1594b8e80941Smrg    "   if (aSign != 0u) {\n"
1595b8e80941Smrg    "      if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n"
1596b8e80941Smrg    "         return a;\n"
1597b8e80941Smrg    "      return default_nan;\n"
1598b8e80941Smrg    "   }\n"
1599b8e80941Smrg    "   if (aExp == 0) {\n"
1600b8e80941Smrg    "      if ((aFracHi | aFracLo) == 0u)\n"
1601b8e80941Smrg    "         return __packFloat64(0u, 0, 0u, 0u);\n"
1602b8e80941Smrg    "      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n"
1603b8e80941Smrg    "   }\n"
1604b8e80941Smrg    "   int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;\n"
1605b8e80941Smrg    "   aFracHi |= 0x00100000u;\n"
1606b8e80941Smrg    "   __shortShift64Left(aFracHi, aFracLo, 11, term0, term1);\n"
1607b8e80941Smrg    "   zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u;\n"
1608b8e80941Smrg    "   if (zFrac0 == 0u)\n"
1609b8e80941Smrg    "      zFrac0 = 0x7FFFFFFFu;\n"
1610b8e80941Smrg    "   doubleZFrac0 = zFrac0 + zFrac0;\n"
1611b8e80941Smrg    "   __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);\n"
1612b8e80941Smrg    "   __mul32To64(zFrac0, zFrac0, term0, term1);\n"
1613b8e80941Smrg    "   __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);\n"
1614b8e80941Smrg    "   while (int(rem0) < 0) {\n"
1615b8e80941Smrg    "      --zFrac0;\n"
1616b8e80941Smrg    "      doubleZFrac0 -= 2u;\n"
1617b8e80941Smrg    "      __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);\n"
1618b8e80941Smrg    "   }\n"
1619b8e80941Smrg    "   zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0);\n"
1620b8e80941Smrg    "   if ((zFrac1 & 0x1FFu) <= 5u) {\n"
1621b8e80941Smrg    "      if (zFrac1 == 0u)\n"
1622b8e80941Smrg    "         zFrac1 = 1u;\n"
1623b8e80941Smrg    "      __mul32To64(doubleZFrac0, zFrac1, term1, term2);\n"
1624b8e80941Smrg    "      __sub64(rem1, 0u, term1, term2, rem1, rem2);\n"
1625b8e80941Smrg    "      __mul32To64(zFrac1, zFrac1, term2, term3);\n"
1626b8e80941Smrg    "      __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);\n"
1627b8e80941Smrg    "      while (int(rem1) < 0) {\n"
1628b8e80941Smrg    "         --zFrac1;\n"
1629b8e80941Smrg    "         __shortShift64Left(0u, zFrac1, 1, term2, term3);\n"
1630b8e80941Smrg    "         term3 |= 1u;\n"
1631b8e80941Smrg    "         term2 |= doubleZFrac0;\n"
1632b8e80941Smrg    "         __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);\n"
1633b8e80941Smrg    "      }\n"
1634b8e80941Smrg    "      zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);\n"
1635b8e80941Smrg    "   }\n"
1636b8e80941Smrg    "   __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);\n"
1637b8e80941Smrg    "   return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);\n"
1638b8e80941Smrg    "}\n"
1639b8e80941Smrg    "\n"
1640b8e80941Smrg    "uint64_t\n"
1641b8e80941Smrg    "__ftrunc64(uint64_t __a)\n"
1642b8e80941Smrg    "{\n"
1643b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
1644b8e80941Smrg    "   int aExp = __extractFloat64Exp(__a);\n"
1645b8e80941Smrg    "   uint zLo;\n"
1646b8e80941Smrg    "   uint zHi;\n"
1647b8e80941Smrg    "\n"
1648b8e80941Smrg    "   int unbiasedExp = aExp - 1023;\n"
1649b8e80941Smrg    "   int fracBits = 52 - unbiasedExp;\n"
1650b8e80941Smrg    "   uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32);\n"
1651b8e80941Smrg    "   uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33);\n"
1652b8e80941Smrg    "   zLo = maskLo & a.x;\n"
1653b8e80941Smrg    "   zHi = maskHi & a.y;\n"
1654b8e80941Smrg    "\n"
1655b8e80941Smrg    "   zLo = mix(zLo, 0u, unbiasedExp < 0);\n"
1656b8e80941Smrg    "   zHi = mix(zHi, 0u, unbiasedExp < 0);\n"
1657b8e80941Smrg    "   zLo = mix(zLo, a.x, unbiasedExp > 52);\n"
1658b8e80941Smrg    "   zHi = mix(zHi, a.y, unbiasedExp > 52);\n"
1659b8e80941Smrg    "   return packUint2x32(uvec2(zLo, zHi));\n"
1660b8e80941Smrg    "}\n"
1661b8e80941Smrg    "\n"
1662b8e80941Smrg    "uint64_t\n"
1663b8e80941Smrg    "__ffloor64(uint64_t a)\n"
1664b8e80941Smrg    "{\n"
1665b8e80941Smrg    "   bool is_positive = __fge64(a, 0ul);\n"
1666b8e80941Smrg    "   uint64_t tr = __ftrunc64(a);\n"
1667b8e80941Smrg    "\n"
1668b8e80941Smrg    "   if (is_positive || __feq64(tr, a)) {\n"
1669b8e80941Smrg    "      return tr;\n"
1670b8e80941Smrg    "   } else {\n"
1671b8e80941Smrg    "      return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);\n"
1672b8e80941Smrg    "   }\n"
1673b8e80941Smrg    "}\n"
1674b8e80941Smrg    "\n"
1675b8e80941Smrg    "uint64_t\n"
1676b8e80941Smrg    "__fround64(uint64_t __a)\n"
1677b8e80941Smrg    "{\n"
1678b8e80941Smrg    "   uvec2 a = unpackUint2x32(__a);\n"
1679b8e80941Smrg    "   int unbiasedExp = __extractFloat64Exp(__a) - 1023;\n"
1680b8e80941Smrg    "   uint aHi = a.y;\n"
1681b8e80941Smrg    "   uint aLo = a.x;\n"
1682b8e80941Smrg    "\n"
1683b8e80941Smrg    "   if (unbiasedExp < 20) {\n"
1684b8e80941Smrg    "      if (unbiasedExp < 0) {\n"
1685b8e80941Smrg    "         if ((aHi & 0x80000000u) != 0u && aLo == 0u) {\n"
1686b8e80941Smrg    "            return 0;\n"
1687b8e80941Smrg    "         }\n"
1688b8e80941Smrg    "         aHi &= 0x80000000u;\n"
1689b8e80941Smrg    "         if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) {\n"
1690b8e80941Smrg    "            aLo = 0u;\n"
1691b8e80941Smrg    "            return packUint2x32(uvec2(aLo, aHi));\n"
1692b8e80941Smrg    "         }\n"
1693b8e80941Smrg    "         aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1);\n"
1694b8e80941Smrg    "         aLo = 0u;\n"
1695b8e80941Smrg    "      } else {\n"
1696b8e80941Smrg    "         uint maskExp = 0x000FFFFFu >> unbiasedExp;\n"
1697b8e80941Smrg    "         uint lastBit = maskExp + 1;\n"
1698b8e80941Smrg    "         aHi += 0x00080000u >> unbiasedExp;\n"
1699b8e80941Smrg    "         if ((aHi & maskExp) == 0u)\n"
1700b8e80941Smrg    "            aHi &= ~lastBit;\n"
1701b8e80941Smrg    "         aHi &= ~maskExp;\n"
1702b8e80941Smrg    "         aLo = 0u;\n"
1703b8e80941Smrg    "      }\n"
1704b8e80941Smrg    "   } else if (unbiasedExp > 51 || unbiasedExp == 1024) {\n"
1705b8e80941Smrg    "      return __a;\n"
1706b8e80941Smrg    "   } else {\n"
1707b8e80941Smrg    "      uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);\n"
1708b8e80941Smrg    "      if ((aLo & maskExp) == 0u)\n"
1709b8e80941Smrg    "         return __a;\n"
1710b8e80941Smrg    "      uint tmp = aLo + (1u << (51 - unbiasedExp));\n"
1711b8e80941Smrg    "      if(tmp < aLo)\n"
1712b8e80941Smrg    "         aHi += 1u;\n"
1713b8e80941Smrg    "      aLo = tmp;\n"
1714b8e80941Smrg    "      aLo &= ~maskExp;\n"
1715b8e80941Smrg    "   }\n"
1716b8e80941Smrg    "\n"
1717b8e80941Smrg    "   return packUint2x32(uvec2(aLo, aHi));\n"
1718b8e80941Smrg    "}\n"
1719b8e80941Smrg    "\n"
1720b8e80941Smrg    "uint64_t\n"
1721b8e80941Smrg    "__fmin64(uint64_t a, uint64_t b)\n"
1722b8e80941Smrg    "{\n"
1723b8e80941Smrg    "   if (__is_nan(a)) return b;\n"
1724b8e80941Smrg    "   if (__is_nan(b)) return a;\n"
1725b8e80941Smrg    "\n"
1726b8e80941Smrg    "   if (__flt64_nonnan(a, b)) return a;\n"
1727b8e80941Smrg    "   return b;\n"
1728b8e80941Smrg    "}\n"
1729b8e80941Smrg    "\n"
1730b8e80941Smrg    "uint64_t\n"
1731b8e80941Smrg    "__fmax64(uint64_t a, uint64_t b)\n"
1732b8e80941Smrg    "{\n"
1733b8e80941Smrg    "   if (__is_nan(a)) return b;\n"
1734b8e80941Smrg    "   if (__is_nan(b)) return a;\n"
1735b8e80941Smrg    "\n"
1736b8e80941Smrg    "   if (__flt64_nonnan(a, b)) return b;\n"
1737b8e80941Smrg    "   return a;\n"
1738b8e80941Smrg    "}\n"
1739b8e80941Smrg    "\n"
1740b8e80941Smrg    "uint64_t\n"
1741b8e80941Smrg    "__ffract64(uint64_t a)\n"
1742b8e80941Smrg    "{\n"
1743b8e80941Smrg    "   return __fadd64(a, __fneg64(__ffloor64(a)));\n"
1744b8e80941Smrg    "}\n"
1745b8e80941Smrg    ""
1746b8e80941Smrg    ;
1747