1static const char float64_source[] =
2 "/*\n"
3    " * The implementations contained in this file are heavily based on the\n"
4    " * implementations found in the Berkeley SoftFloat library. As such, they are\n"
5    " * licensed under the same 3-clause BSD license:\n"
6    " *\n"
7    " * License for Berkeley SoftFloat Release 3e\n"
8    " *\n"
9    " * John R. Hauser\n"
10    " * 2018 January 20\n"
11    " *\n"
12    " * The following applies to the whole of SoftFloat Release 3e as well as to\n"
13    " * each source file individually.\n"
14    " *\n"
15    " * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the\n"
16    " * University of California.  All rights reserved.\n"
17    " *\n"
18    " * Redistribution and use in source and binary forms, with or without\n"
19    " * modification, are permitted provided that the following conditions are met:\n"
20    " *\n"
21    " *  1. Redistributions of source code must retain the above copyright notice,\n"
22    " *     this list of conditions, and the following disclaimer.\n"
23    " *\n"
24    " *  2. Redistributions in binary form must reproduce the above copyright\n"
25    " *     notice, this list of conditions, and the following disclaimer in the\n"
26    " *     documentation and/or other materials provided with the distribution.\n"
27    " *\n"
28    " *  3. Neither the name of the University nor the names of its contributors\n"
29    " *     may be used to endorse or promote products derived from this software\n"
30    " *     without specific prior written permission.\n"
31    " *\n"
32    " * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS \"AS IS\", AND ANY\n"
33    " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n"
34    " * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE\n"
35    " * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY\n"
36    " * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n"
37    " * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
38    " * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n"
39    " * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
40    " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
41    " * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
42    "*/\n"
43    "\n"
44    "#version 430\n"
45    "#extension GL_ARB_gpu_shader_int64 : enable\n"
46    "#extension GL_ARB_shader_bit_encoding : enable\n"
47    "#extension GL_EXT_shader_integer_mix : enable\n"
48    "#extension GL_MESA_shader_integer_functions : enable\n"
49    "\n"
50    "#pragma warning(off)\n"
51    "\n"
52    "/* Software IEEE floating-point rounding mode.\n"
53    " * GLSL spec section \"4.7.1 Range and Precision\":\n"
54    " * The rounding mode cannot be set and is undefined.\n"
55    " * But here, we are able to define the rounding mode at the compilation time.\n"
56    " */\n"
57    "#define FLOAT_ROUND_NEAREST_EVEN    0\n"
58    "#define FLOAT_ROUND_TO_ZERO         1\n"
59    "#define FLOAT_ROUND_DOWN            2\n"
60    "#define FLOAT_ROUND_UP              3\n"
61    "#define FLOAT_ROUNDING_MODE         FLOAT_ROUND_NEAREST_EVEN\n"
62    "\n"
63    "/* Absolute value of a Float64 :\n"
64    " * Clear the sign bit\n"
65    " */\n"
66    "uint64_t\n"
67    "__fabs64(uint64_t __a)\n"
68    "{\n"
69    "   uvec2 a = unpackUint2x32(__a);\n"
70    "   a.y &= 0x7FFFFFFFu;\n"
71    "   return packUint2x32(a);\n"
72    "}\n"
73    "\n"
74    "/* Returns 1 if the double-precision floating-point value `a' is a NaN;\n"
75    " * otherwise returns 0.\n"
76    " */\n"
77    "bool\n"
78    "__is_nan(uint64_t __a)\n"
79    "{\n"
80    "   uvec2 a = unpackUint2x32(__a);\n"
81    "   return (0xFFE00000u <= (a.y<<1)) &&\n"
82    "      ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u));\n"
83    "}\n"
84    "\n"
85    "/* Negate value of a Float64 :\n"
86    " * Toggle the sign bit\n"
87    " */\n"
88    "uint64_t\n"
89    "__fneg64(uint64_t __a)\n"
90    "{\n"
91    "   uvec2 a = unpackUint2x32(__a);\n"
92    "   uint t = a.y;\n"
93    "\n"
94    "   t ^= (1u << 31);\n"
95    "   a.y = mix(t, a.y, __is_nan(__a));\n"
96    "   return packUint2x32(a);\n"
97    "}\n"
98    "\n"
99    "uint64_t\n"
100    "__fsign64(uint64_t __a)\n"
101    "{\n"
102    "   uvec2 a = unpackUint2x32(__a);\n"
103    "   uvec2 retval;\n"
104    "   retval.x = 0u;\n"
105    "   retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u);\n"
106    "   return packUint2x32(retval);\n"
107    "}\n"
108    "\n"
109    "/* Returns the fraction bits of the double-precision floating-point value `a'.*/\n"
110    "uint\n"
111    "__extractFloat64FracLo(uint64_t a)\n"
112    "{\n"
113    "   return unpackUint2x32(a).x;\n"
114    "}\n"
115    "\n"
116    "uint\n"
117    "__extractFloat64FracHi(uint64_t a)\n"
118    "{\n"
119    "   return unpackUint2x32(a).y & 0x000FFFFFu;\n"
120    "}\n"
121    "\n"
122    "/* Returns the exponent bits of the double-precision floating-point value `a'.*/\n"
123    "int\n"
124    "__extractFloat64Exp(uint64_t __a)\n"
125    "{\n"
126    "   uvec2 a = unpackUint2x32(__a);\n"
127    "   return int((a.y>>20) & 0x7FFu);\n"
128    "}\n"
129    "\n"
130    "bool\n"
131    "__feq64_nonnan(uint64_t __a, uint64_t __b)\n"
132    "{\n"
133    "   uvec2 a = unpackUint2x32(__a);\n"
134    "   uvec2 b = unpackUint2x32(__b);\n"
135    "   return (a.x == b.x) &&\n"
136    "          ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u)));\n"
137    "}\n"
138    "\n"
139    "/* Returns true if the double-precision floating-point value `a' is equal to the\n"
140    " * corresponding value `b', and false otherwise.  The comparison is performed\n"
141    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
142    " */\n"
143    "bool\n"
144    "__feq64(uint64_t a, uint64_t b)\n"
145    "{\n"
146    "   if (__is_nan(a) || __is_nan(b))\n"
147    "      return false;\n"
148    "\n"
149    "   return __feq64_nonnan(a, b);\n"
150    "}\n"
151    "\n"
152    "/* Returns true if the double-precision floating-point value `a' is not equal\n"
153    " * to the corresponding value `b', and false otherwise.  The comparison is\n"
154    " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n"
155    " */\n"
156    "bool\n"
157    "__fne64(uint64_t a, uint64_t b)\n"
158    "{\n"
159    "   if (__is_nan(a) || __is_nan(b))\n"
160    "      return true;\n"
161    "\n"
162    "   return !__feq64_nonnan(a, b);\n"
163    "}\n"
164    "\n"
165    "/* Returns the sign bit of the double-precision floating-point value `a'.*/\n"
166    "uint\n"
167    "__extractFloat64Sign(uint64_t a)\n"
168    "{\n"
169    "   return unpackUint2x32(a).y >> 31;\n"
170    "}\n"
171    "\n"
172    "/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less\n"
173    " * than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,\n"
174    " * returns false.\n"
175    " */\n"
176    "bool\n"
177    "lt64(uint a0, uint a1, uint b0, uint b1)\n"
178    "{\n"
179    "   return (a0 < b0) || ((a0 == b0) && (a1 < b1));\n"
180    "}\n"
181    "\n"
182    "bool\n"
183    "__flt64_nonnan(uint64_t __a, uint64_t __b)\n"
184    "{\n"
185    "   uvec2 a = unpackUint2x32(__a);\n"
186    "   uvec2 b = unpackUint2x32(__b);\n"
187    "   uint aSign = __extractFloat64Sign(__a);\n"
188    "   uint bSign = __extractFloat64Sign(__b);\n"
189    "   if (aSign != bSign)\n"
190    "      return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);\n"
191    "\n"
192    "   return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);\n"
193    "}\n"
194    "\n"
195    "/* Returns true if the double-precision floating-point value `a' is less than\n"
196    " * the corresponding value `b', and false otherwise.  The comparison is performed\n"
197    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
198    " */\n"
199    "bool\n"
200    "__flt64(uint64_t a, uint64_t b)\n"
201    "{\n"
202    "   if (__is_nan(a) || __is_nan(b))\n"
203    "      return false;\n"
204    "\n"
205    "   return __flt64_nonnan(a, b);\n"
206    "}\n"
207    "\n"
208    "/* Returns true if the double-precision floating-point value `a' is greater\n"
209    " * than or equal to * the corresponding value `b', and false otherwise.  The\n"
210    " * comparison is performed * according to the IEEE Standard for Floating-Point\n"
211    " * Arithmetic.\n"
212    " */\n"
213    "bool\n"
214    "__fge64(uint64_t a, uint64_t b)\n"
215    "{\n"
216    "   if (__is_nan(a) || __is_nan(b))\n"
217    "      return false;\n"
218    "\n"
219    "   return !__flt64_nonnan(a, b);\n"
220    "}\n"
221    "\n"
222    "/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit\n"
223    " * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so\n"
224    " * any carry out is lost.  The result is broken into two 32-bit pieces which\n"
225    " * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
226    " */\n"
227    "void\n"
228    "__add64(uint a0, uint a1, uint b0, uint b1,\n"
229    "        out uint z0Ptr,\n"
230    "        out uint z1Ptr)\n"
231    "{\n"
232    "   uint z1 = a1 + b1;\n"
233    "   z1Ptr = z1;\n"
234    "   z0Ptr = a0 + b0 + uint(z1 < a1);\n"
235    "}\n"
236    "\n"
237    "\n"
238    "/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the\n"
239    " * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo\n"
240    " * 2^64, so any borrow out (carry out) is lost.  The result is broken into two\n"
241    " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and\n"
242    " * `z1Ptr'.\n"
243    " */\n"
244    "void\n"
245    "__sub64(uint a0, uint a1, uint b0, uint b1,\n"
246    "        out uint z0Ptr,\n"
247    "        out uint z1Ptr)\n"
248    "{\n"
249    "   z1Ptr = a1 - b1;\n"
250    "   z0Ptr = a0 - b0 - uint(a1 < b1);\n"
251    "}\n"
252    "\n"
253    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n"
254    " * number of bits given in `count'.  If any nonzero bits are shifted off, they\n"
255    " * are \"jammed\" into the least significant bit of the result by setting the\n"
256    " * least significant bit to 1.  The value of `count' can be arbitrarily large;\n"
257    " * in particular, if `count' is greater than 64, the result will be either 0\n"
258    " * or 1, depending on whether the concatenation of `a0' and `a1' is zero or\n"
259    " * nonzero.  The result is broken into two 32-bit pieces which are stored at\n"
260    " * the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
261    " */\n"
262    "void\n"
263    "__shift64RightJamming(uint a0,\n"
264    "                      uint a1,\n"
265    "                      int count,\n"
266    "                      out uint z0Ptr,\n"
267    "                      out uint z1Ptr)\n"
268    "{\n"
269    "   uint z0;\n"
270    "   uint z1;\n"
271    "   int negCount = (-count) & 31;\n"
272    "\n"
273    "   z0 = mix(0u, a0, count == 0);\n"
274    "   z0 = mix(z0, (a0 >> count), count < 32);\n"
275    "\n"
276    "   z1 = uint((a0 | a1) != 0u); /* count >= 64 */\n"
277    "   uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u);\n"
278    "   z1 = mix(z1, z1_lt64, count < 64);\n"
279    "   z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32);\n"
280    "   uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u);\n"
281    "   z1 = mix(z1, z1_lt32, count < 32);\n"
282    "   z1 = mix(z1, a1, count == 0);\n"
283    "   z1Ptr = z1;\n"
284    "   z0Ptr = z0;\n"
285    "}\n"
286    "\n"
287    "/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right\n"
288    " * by 32 _plus_ the number of bits given in `count'.  The shifted result is\n"
289    " * at most 64 nonzero bits; these are broken into two 32-bit pieces which are\n"
290    " * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted\n"
291    " * off form a third 32-bit result as follows:  The _last_ bit shifted off is\n"
292    " * the most-significant bit of the extra result, and the other 31 bits of the\n"
293    " * extra result are all zero if and only if _all_but_the_last_ bits shifted off\n"
294    " * were all zero.  This extra result is stored in the location pointed to by\n"
295    " * `z2Ptr'.  The value of `count' can be arbitrarily large.\n"
296    " *     (This routine makes more sense if `a0', `a1', and `a2' are considered\n"
297    " * to form a fixed-point value with binary point between `a1' and `a2'.  This\n"
298    " * fixed-point value is shifted right by the number of bits given in `count',\n"
299    " * and the integer part of the result is returned at the locations pointed to\n"
300    " * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly\n"
301    " * corrupted as described above, and is returned at the location pointed to by\n"
302    " * `z2Ptr'.)\n"
303    " */\n"
304    "void\n"
305    "__shift64ExtraRightJamming(uint a0, uint a1, uint a2,\n"
306    "                           int count,\n"
307    "                           out uint z0Ptr,\n"
308    "                           out uint z1Ptr,\n"
309    "                           out uint z2Ptr)\n"
310    "{\n"
311    "   uint z0 = 0u;\n"
312    "   uint z1;\n"
313    "   uint z2;\n"
314    "   int negCount = (-count) & 31;\n"
315    "\n"
316    "   z2 = mix(uint(a0 != 0u), a0, count == 64);\n"
317    "   z2 = mix(z2, a0 << negCount, count < 64);\n"
318    "   z2 = mix(z2, a1 << negCount, count < 32);\n"
319    "\n"
320    "   z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n"
321    "   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n"
322    "\n"
323    "   a2 = mix(a2 | a1, a2, count < 32);\n"
324    "   z0 = mix(z0, a0 >> count, count < 32);\n"
325    "   z2 |= uint(a2 != 0u);\n"
326    "\n"
327    "   z0 = mix(z0, 0u, (count == 32));\n"
328    "   z1 = mix(z1, a0, (count == 32));\n"
329    "   z2 = mix(z2, a1, (count == 32));\n"
330    "   z0 = mix(z0, a0, (count == 0));\n"
331    "   z1 = mix(z1, a1, (count == 0));\n"
332    "   z2 = mix(z2, a2, (count == 0));\n"
333    "   z2Ptr = z2;\n"
334    "   z1Ptr = z1;\n"
335    "   z0Ptr = z0;\n"
336    "}\n"
337    "\n"
338    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the\n"
339    " * number of bits given in `count'.  Any bits shifted off are lost.  The value\n"
340    " * of `count' must be less than 32.  The result is broken into two 32-bit\n"
341    " * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
342    " */\n"
343    "void\n"
344    "__shortShift64Left(uint a0, uint a1,\n"
345    "                   int count,\n"
346    "                   out uint z0Ptr,\n"
347    "                   out uint z1Ptr)\n"
348    "{\n"
349    "   z1Ptr = a1<<count;\n"
350    "   z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0);\n"
351    "}\n"
352    "\n"
353    "/* Packs the sign `zSign', the exponent `zExp', and the significand formed by\n"
354    " * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-\n"
355    " * point value, returning the result.  After being shifted into the proper\n"
356    " * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added\n"
357    " * together to form the most significant 32 bits of the result.  This means\n"
358    " * that any integer portion of `zFrac0' will be added into the exponent.  Since\n"
359    " * a properly normalized significand will have an integer portion equal to 1,\n"
360    " * the `zExp' input should be 1 less than the desired result exponent whenever\n"
361    " * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.\n"
362    " */\n"
363    "uint64_t\n"
364    "__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)\n"
365    "{\n"
366    "   uvec2 z;\n"
367    "\n"
368    "   z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;\n"
369    "   z.x = zFrac1;\n"
370    "   return packUint2x32(z);\n"
371    "}\n"
372    "\n"
373    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
374    " * and extended significand formed by the concatenation of `zFrac0', `zFrac1',\n"
375    " * and `zFrac2', and returns the proper double-precision floating-point value\n"
376    " * corresponding to the abstract input.  Ordinarily, the abstract value is\n"
377    " * simply rounded and packed into the double-precision format, with the inexact\n"
378    " * exception raised if the abstract input cannot be represented exactly.\n"
379    " * However, if the abstract value is too large, the overflow and inexact\n"
380    " * exceptions are raised and an infinity or maximal finite value is returned.\n"
381    " * If the abstract value is too small, the input value is rounded to a\n"
382    " * subnormal number, and the underflow and inexact exceptions are raised if the\n"
383    " * abstract input cannot be represented exactly as a subnormal double-precision\n"
384    " * floating-point number.\n"
385    " *     The input significand must be normalized or smaller.  If the input\n"
386    " * significand is not normalized, `zExp' must be 0; in that case, the result\n"
387    " * returned is a subnormal number, and it must not require rounding.  In the\n"
388    " * usual case that the input significand is normalized, `zExp' must be 1 less\n"
389    " * than the \"true\" floating-point exponent.  The handling of underflow and\n"
390    " * overflow follows the IEEE Standard for Floating-Point Arithmetic.\n"
391    " */\n"
392    "uint64_t\n"
393    "__roundAndPackFloat64(uint zSign,\n"
394    "                      int zExp,\n"
395    "                      uint zFrac0,\n"
396    "                      uint zFrac1,\n"
397    "                      uint zFrac2)\n"
398    "{\n"
399    "   bool roundNearestEven;\n"
400    "   bool increment;\n"
401    "\n"
402    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
403    "   increment = int(zFrac2) < 0;\n"
404    "   if (!roundNearestEven) {\n"
405    "      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n"
406    "         increment = false;\n"
407    "      } else {\n"
408    "         if (zSign != 0u) {\n"
409    "            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
410    "               (zFrac2 != 0u);\n"
411    "         } else {\n"
412    "            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
413    "               (zFrac2 != 0u);\n"
414    "         }\n"
415    "      }\n"
416    "   }\n"
417    "   if (0x7FD <= zExp) {\n"
418    "      if ((0x7FD < zExp) ||\n"
419    "         ((zExp == 0x7FD) &&\n"
420    "            (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) &&\n"
421    "               increment)) {\n"
422    "         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) ||\n"
423    "            ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) ||\n"
424    "               ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) {\n"
425    "            return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu);\n"
426    "         }\n"
427    "         return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
428    "      }\n"
429    "      if (zExp < 0) {\n"
430    "         __shift64ExtraRightJamming(\n"
431    "            zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);\n"
432    "         zExp = 0;\n"
433    "         if (roundNearestEven) {\n"
434    "            increment = zFrac2 < 0u;\n"
435    "         } else {\n"
436    "            if (zSign != 0u) {\n"
437    "               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
438    "                  (zFrac2 != 0u);\n"
439    "            } else {\n"
440    "               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
441    "                  (zFrac2 != 0u);\n"
442    "            }\n"
443    "         }\n"
444    "      }\n"
445    "   }\n"
446    "   if (increment) {\n"
447    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
448    "      zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));\n"
449    "   } else {\n"
450    "      zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u);\n"
451    "   }\n"
452    "   return __packFloat64(zSign, zExp, zFrac0, zFrac1);\n"
453    "}\n"
454    "\n"
455    "uint64_t\n"
456    "__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n"
457    "{\n"
458    "   bool roundNearestEven;\n"
459    "   bool increment;\n"
460    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
461    "\n"
462    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
463    "\n"
464    "   if (zFrac2 >= 0x80000000u)\n"
465    "      increment = false;\n"
466    "\n"
467    "   if (!roundNearestEven) {\n"
468    "      if (zSign != 0u) {\n"
469    "         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {\n"
470    "            increment = false;\n"
471    "         }\n"
472    "      } else {\n"
473    "         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
474    "            (zFrac2 != 0u);\n"
475    "      }\n"
476    "   }\n"
477    "\n"
478    "   if (increment) {\n"
479    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
480    "      if ((zFrac0 | zFrac1) != 0u)\n"
481    "         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n"
482    "   }\n"
483    "   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,\n"
484    "              (zSign !=0u && (zFrac0 | zFrac1) != 0u));\n"
485    "}\n"
486    "\n"
487    "int64_t\n"
488    "__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)\n"
489    "{\n"
490    "   bool roundNearestEven;\n"
491    "   bool increment;\n"
492    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
493    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
494    "\n"
495    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
496    "\n"
497    "   if (zFrac2 >= 0x80000000u)\n"
498    "      increment = false;\n"
499    "\n"
500    "   if (!roundNearestEven) {\n"
501    "      if (zSign != 0u) {\n"
502    "         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&\n"
503    "            (zFrac2 != 0u));\n"
504    "      } else {\n"
505    "         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&\n"
506    "            (zFrac2 != 0u);\n"
507    "      }\n"
508    "   }\n"
509    "\n"
510    "   if (increment) {\n"
511    "      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);\n"
512    "      if ((zFrac0 | zFrac1) != 0u)\n"
513    "         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);\n"
514    "   }\n"
515    "\n"
516    "   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n"
517    "                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),\n"
518    "                      (zSign != 0u));\n"
519    "   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));\n"
520    "   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));\n"
521    "}\n"
522    "\n"
523    "/* Returns the number of leading 0 bits before the most-significant 1 bit of\n"
524    " * `a'.  If `a' is zero, 32 is returned.\n"
525    " */\n"
526    "int\n"
527    "__countLeadingZeros32(uint a)\n"
528    "{\n"
529    "   int shiftCount;\n"
530    "   shiftCount = mix(31 - findMSB(a), 32, a == 0u);\n"
531    "   return shiftCount;\n"
532    "}\n"
533    "\n"
534    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
535    " * and significand formed by the concatenation of `zSig0' and `zSig1', and\n"
536    " * returns the proper double-precision floating-point value corresponding\n"
537    " * to the abstract input.  This routine is just like `__roundAndPackFloat64'\n"
538    " * except that the input significand has fewer bits and does not have to be\n"
539    " * normalized.  In all cases, `zExp' must be 1 less than the \"true\" floating-\n"
540    " * point exponent.\n"
541    " */\n"
542    "uint64_t\n"
543    "__normalizeRoundAndPackFloat64(uint zSign,\n"
544    "                               int zExp,\n"
545    "                               uint zFrac0,\n"
546    "                               uint zFrac1)\n"
547    "{\n"
548    "   int shiftCount;\n"
549    "   uint zFrac2;\n"
550    "\n"
551    "   if (zFrac0 == 0u) {\n"
552    "      zExp -= 32;\n"
553    "      zFrac0 = zFrac1;\n"
554    "      zFrac1 = 0u;\n"
555    "   }\n"
556    "\n"
557    "   shiftCount = __countLeadingZeros32(zFrac0) - 11;\n"
558    "   if (0 <= shiftCount) {\n"
559    "      zFrac2 = 0u;\n"
560    "      __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1);\n"
561    "   } else {\n"
562    "      __shift64ExtraRightJamming(\n"
563    "         zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2);\n"
564    "   }\n"
565    "   zExp -= shiftCount;\n"
566    "   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n"
567    "}\n"
568    "\n"
569    "/* Takes two double-precision floating-point values `a' and `b', one of which\n"
570    " * is a NaN, and returns the appropriate NaN result.\n"
571    " */\n"
572    "uint64_t\n"
573    "__propagateFloat64NaN(uint64_t __a, uint64_t __b)\n"
574    "{\n"
575    "   bool aIsNaN = __is_nan(__a);\n"
576    "   bool bIsNaN = __is_nan(__b);\n"
577    "   uvec2 a = unpackUint2x32(__a);\n"
578    "   uvec2 b = unpackUint2x32(__b);\n"
579    "   a.y |= 0x00080000u;\n"
580    "   b.y |= 0x00080000u;\n"
581    "\n"
582    "   return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));\n"
583    "}\n"
584    "\n"
585    "/* Returns the result of adding the double-precision floating-point values\n"
586    " * `a' and `b'.  The operation is performed according to the IEEE Standard for\n"
587    " * Floating-Point Arithmetic.\n"
588    " */\n"
589    "uint64_t\n"
590    "__fadd64(uint64_t a, uint64_t b)\n"
591    "{\n"
592    "   uint aSign = __extractFloat64Sign(a);\n"
593    "   uint bSign = __extractFloat64Sign(b);\n"
594    "   uint aFracLo = __extractFloat64FracLo(a);\n"
595    "   uint aFracHi = __extractFloat64FracHi(a);\n"
596    "   uint bFracLo = __extractFloat64FracLo(b);\n"
597    "   uint bFracHi = __extractFloat64FracHi(b);\n"
598    "   int aExp = __extractFloat64Exp(a);\n"
599    "   int bExp = __extractFloat64Exp(b);\n"
600    "   uint zFrac0 = 0u;\n"
601    "   uint zFrac1 = 0u;\n"
602    "   int expDiff = aExp - bExp;\n"
603    "   if (aSign == bSign) {\n"
604    "      uint zFrac2 = 0u;\n"
605    "      int zExp;\n"
606    "      bool orig_exp_diff_is_zero = (expDiff == 0);\n"
607    "\n"
608    "      if (orig_exp_diff_is_zero) {\n"
609    "         if (aExp == 0x7FF) {\n"
610    "            bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n"
611    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
612    "         }\n"
613    "         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
614    "         if (aExp == 0)\n"
615    "            return __packFloat64(aSign, 0, zFrac0, zFrac1);\n"
616    "         zFrac2 = 0u;\n"
617    "         zFrac0 |= 0x00200000u;\n"
618    "         zExp = aExp;\n"
619    "         __shift64ExtraRightJamming(\n"
620    "            zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
621    "      } else if (0 < expDiff) {\n"
622    "         if (aExp == 0x7FF) {\n"
623    "            bool propagate = (aFracHi | aFracLo) != 0u;\n"
624    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
625    "         }\n"
626    "\n"
627    "         expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n"
628    "         bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);\n"
629    "         __shift64ExtraRightJamming(\n"
630    "            bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);\n"
631    "         zExp = aExp;\n"
632    "      } else if (expDiff < 0) {\n"
633    "         if (bExp == 0x7FF) {\n"
634    "            bool propagate = (bFracHi | bFracLo) != 0u;\n"
635    "            return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n"
636    "         }\n"
637    "         expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n"
638    "         aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);\n"
639    "         __shift64ExtraRightJamming(\n"
640    "            aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);\n"
641    "         zExp = bExp;\n"
642    "      }\n"
643    "      if (!orig_exp_diff_is_zero) {\n"
644    "         aFracHi |= 0x00100000u;\n"
645    "         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
646    "         --zExp;\n"
647    "         if (!(zFrac0 < 0x00200000u)) {\n"
648    "            __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
649    "            ++zExp;\n"
650    "         }\n"
651    "      }\n"
652    "      return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2);\n"
653    "\n"
654    "   } else {\n"
655    "      int zExp;\n"
656    "\n"
657    "      __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);\n"
658    "      __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);\n"
659    "      if (0 < expDiff) {\n"
660    "         if (aExp == 0x7FF) {\n"
661    "            bool propagate = (aFracHi | aFracLo) != 0u;\n"
662    "            return mix(a, __propagateFloat64NaN(a, b), propagate);\n"
663    "         }\n"
664    "         expDiff = mix(expDiff, expDiff - 1, bExp == 0);\n"
665    "         bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);\n"
666    "         __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);\n"
667    "         aFracHi |= 0x40000000u;\n"
668    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
669    "         zExp = aExp;\n"
670    "         --zExp;\n"
671    "         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n"
672    "      }\n"
673    "      if (expDiff < 0) {\n"
674    "         if (bExp == 0x7FF) {\n"
675    "            bool propagate = (bFracHi | bFracLo) != 0u;\n"
676    "            return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);\n"
677    "         }\n"
678    "         expDiff = mix(expDiff, expDiff + 1, aExp == 0);\n"
679    "         aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);\n"
680    "         __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);\n"
681    "         bFracHi |= 0x40000000u;\n"
682    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
683    "         zExp = bExp;\n"
684    "         aSign ^= 1u;\n"
685    "         --zExp;\n"
686    "         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);\n"
687    "      }\n"
688    "      if (aExp == 0x7FF) {\n"
689    "          bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;\n"
690    "         return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);\n"
691    "      }\n"
692    "      bExp = mix(bExp, 1, aExp == 0);\n"
693    "      aExp = mix(aExp, 1, aExp == 0);\n"
694    "      bool zexp_normal = false;\n"
695    "      bool blta = true;\n"
696    "      if (bFracHi < aFracHi) {\n"
697    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
698    "         zexp_normal = true;\n"
699    "      }\n"
700    "      else if (aFracHi < bFracHi) {\n"
701    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
702    "         blta = false;\n"
703    "         zexp_normal = true;\n"
704    "      }\n"
705    "      else if (bFracLo < aFracLo) {\n"
706    "         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);\n"
707    "         zexp_normal = true;\n"
708    "      }\n"
709    "      else if (aFracLo < bFracLo) {\n"
710    "         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);\n"
711    "          blta = false;\n"
712    "          zexp_normal = true;\n"
713    "      }\n"
714    "      zExp = mix(bExp, aExp, blta);\n"
715    "      aSign = mix(aSign ^ 1u, aSign, blta);\n"
716    "      uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);\n"
717    "      uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);\n"
718    "      return mix(retval_0, retval_1, zexp_normal);\n"
719    "   }\n"
720    "}\n"
721    "\n"
722    "/* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken\n"
723    " * into two 32-bit pieces which are stored at the locations pointed to by\n"
724    " * `z0Ptr' and `z1Ptr'.\n"
725    " */\n"
726    "void\n"
727    "__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)\n"
728    "{\n"
729    "   uint aLow = a & 0x0000FFFFu;\n"
730    "   uint aHigh = a>>16;\n"
731    "   uint bLow = b & 0x0000FFFFu;\n"
732    "   uint bHigh = b>>16;\n"
733    "   uint z1 = aLow * bLow;\n"
734    "   uint zMiddleA = aLow * bHigh;\n"
735    "   uint zMiddleB = aHigh * bLow;\n"
736    "   uint z0 = aHigh * bHigh;\n"
737    "   zMiddleA += zMiddleB;\n"
738    "   z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);\n"
739    "   zMiddleA <<= 16;\n"
740    "   z1 += zMiddleA;\n"
741    "   z0 += uint(z1 < zMiddleA);\n"
742    "   z1Ptr = z1;\n"
743    "   z0Ptr = z0;\n"
744    "}\n"
745    "\n"
746    "/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the\n"
747    " * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit\n"
748    " * product.  The product is broken into four 32-bit pieces which are stored at\n"
749    " * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.\n"
750    " */\n"
751    "void\n"
752    "__mul64To128(uint a0, uint a1, uint b0, uint b1,\n"
753    "             out uint z0Ptr,\n"
754    "             out uint z1Ptr,\n"
755    "             out uint z2Ptr,\n"
756    "             out uint z3Ptr)\n"
757    "{\n"
758    "   uint z0 = 0u;\n"
759    "   uint z1 = 0u;\n"
760    "   uint z2 = 0u;\n"
761    "   uint z3 = 0u;\n"
762    "   uint more1 = 0u;\n"
763    "   uint more2 = 0u;\n"
764    "\n"
765    "   __mul32To64(a1, b1, z2, z3);\n"
766    "   __mul32To64(a1, b0, z1, more2);\n"
767    "   __add64(z1, more2, 0u, z2, z1, z2);\n"
768    "   __mul32To64(a0, b0, z0, more1);\n"
769    "   __add64(z0, more1, 0u, z1, z0, z1);\n"
770    "   __mul32To64(a0, b1, more1, more2);\n"
771    "   __add64(more1, more2, 0u, z2, more1, z2);\n"
772    "   __add64(z0, z1, 0u, more1, z0, z1);\n"
773    "   z3Ptr = z3;\n"
774    "   z2Ptr = z2;\n"
775    "   z1Ptr = z1;\n"
776    "   z0Ptr = z0;\n"
777    "}\n"
778    "\n"
779    "/* Normalizes the subnormal double-precision floating-point value represented\n"
780    " * by the denormalized significand formed by the concatenation of `aFrac0' and\n"
781    " * `aFrac1'.  The normalized exponent is stored at the location pointed to by\n"
782    " * `zExpPtr'.  The most significant 21 bits of the normalized significand are\n"
783    " * stored at the location pointed to by `zFrac0Ptr', and the least significant\n"
784    " * 32 bits of the normalized significand are stored at the location pointed to\n"
785    " * by `zFrac1Ptr'.\n"
786    " */\n"
787    "void\n"
788    "__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1,\n"
789    "                            out int zExpPtr,\n"
790    "                            out uint zFrac0Ptr,\n"
791    "                            out uint zFrac1Ptr)\n"
792    "{\n"
793    "   int shiftCount;\n"
794    "   uint temp_zfrac0, temp_zfrac1;\n"
795    "   shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11;\n"
796    "   zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u);\n"
797    "\n"
798    "   temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0);\n"
799    "   temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0);\n"
800    "\n"
801    "   __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr);\n"
802    "\n"
803    "   zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0);\n"
804    "   zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0);\n"
805    "}\n"
806    "\n"
807    "/* Returns the result of multiplying the double-precision floating-point values\n"
808    " * `a' and `b'.  The operation is performed according to the IEEE Standard for\n"
809    " * Floating-Point Arithmetic.\n"
810    " */\n"
811    "uint64_t\n"
812    "__fmul64(uint64_t a, uint64_t b)\n"
813    "{\n"
814    "   uint zFrac0 = 0u;\n"
815    "   uint zFrac1 = 0u;\n"
816    "   uint zFrac2 = 0u;\n"
817    "   uint zFrac3 = 0u;\n"
818    "   int zExp;\n"
819    "\n"
820    "   uint aFracLo = __extractFloat64FracLo(a);\n"
821    "   uint aFracHi = __extractFloat64FracHi(a);\n"
822    "   uint bFracLo = __extractFloat64FracLo(b);\n"
823    "   uint bFracHi = __extractFloat64FracHi(b);\n"
824    "   int aExp = __extractFloat64Exp(a);\n"
825    "   uint aSign = __extractFloat64Sign(a);\n"
826    "   int bExp = __extractFloat64Exp(b);\n"
827    "   uint bSign = __extractFloat64Sign(b);\n"
828    "   uint zSign = aSign ^ bSign;\n"
829    "   if (aExp == 0x7FF) {\n"
830    "      if (((aFracHi | aFracLo) != 0u) ||\n"
831    "         ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) {\n"
832    "         return __propagateFloat64NaN(a, b);\n"
833    "      }\n"
834    "      if ((uint(bExp) | bFracHi | bFracLo) == 0u)\n"
835    "            return 0xFFFFFFFFFFFFFFFFUL;\n"
836    "      return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
837    "   }\n"
838    "   if (bExp == 0x7FF) {\n"
839    "      if ((bFracHi | bFracLo) != 0u)\n"
840    "         return __propagateFloat64NaN(a, b);\n"
841    "      if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n"
842    "         return 0xFFFFFFFFFFFFFFFFUL;\n"
843    "      return __packFloat64(zSign, 0x7FF, 0u, 0u);\n"
844    "   }\n"
845    "   if (aExp == 0) {\n"
846    "      if ((aFracHi | aFracLo) == 0u)\n"
847    "         return __packFloat64(zSign, 0, 0u, 0u);\n"
848    "      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n"
849    "   }\n"
850    "   if (bExp == 0) {\n"
851    "      if ((bFracHi | bFracLo) == 0u)\n"
852    "         return __packFloat64(zSign, 0, 0u, 0u);\n"
853    "      __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo);\n"
854    "   }\n"
855    "   zExp = aExp + bExp - 0x400;\n"
856    "   aFracHi |= 0x00100000u;\n"
857    "   __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo);\n"
858    "   __mul64To128(\n"
859    "      aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3);\n"
860    "   __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1);\n"
861    "   zFrac2 |= uint(zFrac3 != 0u);\n"
862    "   if (0x00200000u <= zFrac0) {\n"
863    "      __shift64ExtraRightJamming(\n"
864    "         zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);\n"
865    "      ++zExp;\n"
866    "   }\n"
867    "   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);\n"
868    "}\n"
869    "\n"
870    "uint64_t\n"
871    "__ffma64(uint64_t a, uint64_t b, uint64_t c)\n"
872    "{\n"
873    "   return __fadd64(__fmul64(a, b), c);\n"
874    "}\n"
875    "\n"
876    "/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\n"
877    " * number of bits given in `count'.  Any bits shifted off are lost.  The value\n"
878    " * of `count' can be arbitrarily large; in particular, if `count' is greater\n"
879    " * than 64, the result will be 0.  The result is broken into two 32-bit pieces\n"
880    " * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\n"
881    " */\n"
882    "void\n"
883    "__shift64Right(uint a0, uint a1,\n"
884    "               int count,\n"
885    "               out uint z0Ptr,\n"
886    "               out uint z1Ptr)\n"
887    "{\n"
888    "   uint z0;\n"
889    "   uint z1;\n"
890    "   int negCount = (-count) & 31;\n"
891    "\n"
892    "   z0 = 0u;\n"
893    "   z0 = mix(z0, (a0 >> count), count < 32);\n"
894    "   z0 = mix(z0, a0, count == 0);\n"
895    "\n"
896    "   z1 = mix(0u, (a0 >> (count & 31)), count < 64);\n"
897    "   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);\n"
898    "   z1 = mix(z1, a0, count == 0);\n"
899    "\n"
900    "   z1Ptr = z1;\n"
901    "   z0Ptr = z0;\n"
902    "}\n"
903    "\n"
904    "/* Returns the result of converting the double-precision floating-point value\n"
905    " * `a' to the unsigned integer format.  The conversion is performed according\n"
906    " * to the IEEE Standard for Floating-Point Arithmetic.\n"
907    " */\n"
908    "uint\n"
909    "__fp64_to_uint(uint64_t a)\n"
910    "{\n"
911    "   uint aFracLo = __extractFloat64FracLo(a);\n"
912    "   uint aFracHi = __extractFloat64FracHi(a);\n"
913    "   int aExp = __extractFloat64Exp(a);\n"
914    "   uint aSign = __extractFloat64Sign(a);\n"
915    "\n"
916    "   if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u))\n"
917    "      return 0xFFFFFFFFu;\n"
918    "\n"
919    "   aFracHi |= mix(0u, 0x00100000u, aExp != 0);\n"
920    "\n"
921    "   int shiftDist = 0x427 - aExp;\n"
922    "   if (0 < shiftDist)\n"
923    "      __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);\n"
924    "\n"
925    "   if ((aFracHi & 0xFFFFF000u) != 0u)\n"
926    "      return mix(~0u, 0u, (aSign != 0u));\n"
927    "\n"
928    "   uint z = 0u;\n"
929    "   uint zero = 0u;\n"
930    "   __shift64Right(aFracHi, aFracLo, 12, zero, z);\n"
931    "\n"
932    "   uint expt = mix(~0u, 0u, (aSign != 0u));\n"
933    "\n"
934    "   return mix(z, expt, (aSign != 0u) && (z != 0u));\n"
935    "}\n"
936    "\n"
937    "uint64_t\n"
938    "__uint_to_fp64(uint a)\n"
939    "{\n"
940    "   if (a == 0u)\n"
941    "      return 0ul;\n"
942    "\n"
943    "   int shiftDist = __countLeadingZeros32(a) + 21;\n"
944    "\n"
945    "   uint aHigh = 0u;\n"
946    "   uint aLow = 0u;\n"
947    "   int negCount = (- shiftDist) & 31;\n"
948    "\n"
949    "   aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64);\n"
950    "   aLow = 0u;\n"
951    "   aHigh = mix(aHigh, 0u, shiftDist == 0);\n"
952    "   aLow = mix(aLow, a, shiftDist ==0);\n"
953    "   aHigh = mix(aHigh, a >> negCount, shiftDist < 32);\n"
954    "   aLow = mix(aLow, a << shiftDist, shiftDist < 32);\n"
955    "\n"
956    "   return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);\n"
957    "}\n"
958    "\n"
959    "uint64_t\n"
960    "__uint64_to_fp64(uint64_t a)\n"
961    "{\n"
962    "   if (a == 0u)\n"
963    "      return 0ul;\n"
964    "\n"
965    "   uvec2 aFrac = unpackUint2x32(a);\n"
966    "   uint aFracLo = __extractFloat64FracLo(a);\n"
967    "   uint aFracHi = __extractFloat64FracHi(a);\n"
968    "\n"
969    "   if ((aFracHi & 0x80000000u) != 0u) {\n"
970    "      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);\n"
971    "      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);\n"
972    "   } else {\n"
973    "      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);\n"
974    "   }\n"
975    "}\n"
976    "\n"
977    "uint64_t\n"
978    "__fp64_to_uint64(uint64_t a)\n"
979    "{\n"
980    "   uint aFracLo = __extractFloat64FracLo(a);\n"
981    "   uint aFracHi = __extractFloat64FracHi(a);\n"
982    "   int aExp = __extractFloat64Exp(a);\n"
983    "   uint aSign = __extractFloat64Sign(a);\n"
984    "   uint zFrac2 = 0u;\n"
985    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
986    "\n"
987    "   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n"
988    "   int shiftCount = 0x433 - aExp;\n"
989    "\n"
990    "   if ( shiftCount <= 0 ) {\n"
991    "      if (shiftCount < -11 && aExp == 0x7FF) {\n"
992    "         if ((aFracHi | aFracLo) != 0u)\n"
993    "            return __propagateFloat64NaN(a, a);\n"
994    "         return mix(default_nan, a, aSign == 0u);\n"
995    "      }\n"
996    "      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n"
997    "   } else {\n"
998    "      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n"
999    "                                 aFracHi, aFracLo, zFrac2);\n"
1000    "   }\n"
1001    "   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);\n"
1002    "}\n"
1003    "\n"
1004    "int64_t\n"
1005    "__fp64_to_int64(uint64_t a)\n"
1006    "{\n"
1007    "   uint zFrac2 = 0u;\n"
1008    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1009    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1010    "   int aExp = __extractFloat64Exp(a);\n"
1011    "   uint aSign = __extractFloat64Sign(a);\n"
1012    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
1013    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
1014    "\n"
1015    "   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);\n"
1016    "   int shiftCount = 0x433 - aExp;\n"
1017    "\n"
1018    "   if (shiftCount <= 0) {\n"
1019    "      if (shiftCount < -11 && aExp == 0x7FF) {\n"
1020    "         if ((aFracHi | aFracLo) != 0u)\n"
1021    "            return default_NegNaN;\n"
1022    "         return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n"
1023    "      }\n"
1024    "      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);\n"
1025    "   } else {\n"
1026    "      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,\n"
1027    "                                 aFracHi, aFracLo, zFrac2);\n"
1028    "   }\n"
1029    "\n"
1030    "   return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2);\n"
1031    "}\n"
1032    "\n"
1033    "uint64_t\n"
1034    "__fp32_to_uint64(float f)\n"
1035    "{\n"
1036    "   uint a = floatBitsToUint(f);\n"
1037    "   uint aFrac = a & 0x007FFFFFu;\n"
1038    "   int aExp = int((a>>23) & 0xFFu);\n"
1039    "   uint aSign = a>>31;\n"
1040    "   uint zFrac0 = 0u;\n"
1041    "   uint zFrac1 = 0u;\n"
1042    "   uint zFrac2 = 0u;\n"
1043    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
1044    "   int shiftCount = 0xBE - aExp;\n"
1045    "\n"
1046    "   if (shiftCount <0) {\n"
1047    "      if (aExp == 0xFF)\n"
1048    "         return default_nan;\n"
1049    "   }\n"
1050    "\n"
1051    "   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n"
1052    "   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n"
1053    "\n"
1054    "   if (shiftCount != 0) {\n"
1055    "      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n"
1056    "                                 zFrac0, zFrac1, zFrac2);\n"
1057    "   }\n"
1058    "\n"
1059    "   return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2);\n"
1060    "}\n"
1061    "\n"
1062    "int64_t\n"
1063    "__fp32_to_int64(float f)\n"
1064    "{\n"
1065    "   uint a = floatBitsToUint(f);\n"
1066    "   uint aFrac = a & 0x007FFFFFu;\n"
1067    "   int aExp = int((a>>23) & 0xFFu);\n"
1068    "   uint aSign = a>>31;\n"
1069    "   uint zFrac0 = 0u;\n"
1070    "   uint zFrac1 = 0u;\n"
1071    "   uint zFrac2 = 0u;\n"
1072    "   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;\n"
1073    "   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;\n"
1074    "   int shiftCount = 0xBE - aExp;\n"
1075    "\n"
1076    "   if (shiftCount <0) {\n"
1077    "      if (aExp == 0xFF && aFrac != 0u)\n"
1078    "         return default_NegNaN;\n"
1079    "      return mix(default_NegNaN, default_PosNaN, aSign == 0u);\n"
1080    "   }\n"
1081    "\n"
1082    "   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);\n"
1083    "   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);\n"
1084    "\n"
1085    "   if (shiftCount != 0) {\n"
1086    "      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,\n"
1087    "                                 zFrac0, zFrac1, zFrac2);\n"
1088    "   }\n"
1089    "\n"
1090    "   return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2);\n"
1091    "}\n"
1092    "\n"
1093    "uint64_t\n"
1094    "__int64_to_fp64(int64_t a)\n"
1095    "{\n"
1096    "   if (a==0)\n"
1097    "      return 0ul;\n"
1098    "\n"
1099    "   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);\n"
1100    "   uint aFracHi = __extractFloat64FracHi(absA);\n"
1101    "   uvec2 aFrac = unpackUint2x32(absA);\n"
1102    "   uint zSign = uint(a < 0);\n"
1103    "\n"
1104    "   if ((aFracHi & 0x80000000u) != 0u) {\n"
1105    "      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);\n"
1106    "   }\n"
1107    "\n"
1108    "   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);\n"
1109    "}\n"
1110    "\n"
1111    "/* Returns the result of converting the double-precision floating-point value\n"
1112    " * `a' to the 32-bit two's complement integer format.  The conversion is\n"
1113    " * performed according to the IEEE Standard for Floating-Point Arithmetic---\n"
1114    " * which means in particular that the conversion is rounded according to the\n"
1115    " * current rounding mode.  If `a' is a NaN, the largest positive integer is\n"
1116    " * returned.  Otherwise, if the conversion overflows, the largest integer with\n"
1117    " * the same sign as `a' is returned.\n"
1118    " */\n"
1119    "int\n"
1120    "__fp64_to_int(uint64_t a)\n"
1121    "{\n"
1122    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1123    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1124    "   int aExp = __extractFloat64Exp(a);\n"
1125    "   uint aSign = __extractFloat64Sign(a);\n"
1126    "\n"
1127    "   uint absZ = 0u;\n"
1128    "   uint aFracExtra = 0u;\n"
1129    "   int shiftCount = aExp - 0x413;\n"
1130    "\n"
1131    "   if (0 <= shiftCount) {\n"
1132    "      if (0x41E < aExp) {\n"
1133    "         if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))\n"
1134    "            aSign = 0u;\n"
1135    "         return mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n"
1136    "      }\n"
1137    "      __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);\n"
1138    "   } else {\n"
1139    "      if (aExp < 0x3FF)\n"
1140    "         return 0;\n"
1141    "\n"
1142    "      aFracHi |= 0x00100000u;\n"
1143    "      aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo;\n"
1144    "      absZ = aFracHi >> (- shiftCount);\n"
1145    "   }\n"
1146    "\n"
1147    "   int z = mix(int(absZ), -int(absZ), (aSign != 0u));\n"
1148    "   int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));\n"
1149    "   return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));\n"
1150    "}\n"
1151    "\n"
1152    "/* Returns the result of converting the 32-bit two's complement integer `a'\n"
1153    " * to the double-precision floating-point format.  The conversion is performed\n"
1154    " * according to the IEEE Standard for Floating-Point Arithmetic.\n"
1155    " */\n"
1156    "uint64_t\n"
1157    "__int_to_fp64(int a)\n"
1158    "{\n"
1159    "   uint zFrac0 = 0u;\n"
1160    "   uint zFrac1 = 0u;\n"
1161    "   if (a==0)\n"
1162    "      return __packFloat64(0u, 0, 0u, 0u);\n"
1163    "   uint zSign = uint(a < 0);\n"
1164    "   uint absA = mix(uint(a), uint(-a), a < 0);\n"
1165    "   int shiftCount = __countLeadingZeros32(absA) - 11;\n"
1166    "   if (0 <= shiftCount) {\n"
1167    "      zFrac0 = absA << shiftCount;\n"
1168    "      zFrac1 = 0u;\n"
1169    "   } else {\n"
1170    "      __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1);\n"
1171    "   }\n"
1172    "   return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);\n"
1173    "}\n"
1174    "\n"
1175    "bool\n"
1176    "__fp64_to_bool(uint64_t a)\n"
1177    "{\n"
1178    "   return !__feq64_nonnan(__fabs64(a), 0ul);\n"
1179    "}\n"
1180    "\n"
1181    "uint64_t\n"
1182    "__bool_to_fp64(bool a)\n"
1183    "{\n"
1184    "   return __int_to_fp64(int(a));\n"
1185    "}\n"
1186    "\n"
1187    "/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a\n"
1188    " * single-precision floating-point value, returning the result.  After being\n"
1189    " * shifted into the proper positions, the three fields are simply added\n"
1190    " * together to form the result.  This means that any integer portion of `zSig'\n"
1191    " * will be added into the exponent.  Since a properly normalized significand\n"
1192    " * will have an integer portion equal to 1, the `zExp' input should be 1 less\n"
1193    " * than the desired result exponent whenever `zFrac' is a complete, normalized\n"
1194    " * significand.\n"
1195    " */\n"
1196    "float\n"
1197    "__packFloat32(uint zSign, int zExp, uint zFrac)\n"
1198    "{\n"
1199    "   return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);\n"
1200    "}\n"
1201    "\n"
1202    "/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',\n"
1203    " * and significand `zFrac', and returns the proper single-precision floating-\n"
1204    " * point value corresponding to the abstract input.  Ordinarily, the abstract\n"
1205    " * value is simply rounded and packed into the single-precision format, with\n"
1206    " * the inexact exception raised if the abstract input cannot be represented\n"
1207    " * exactly.  However, if the abstract value is too large, the overflow and\n"
1208    " * inexact exceptions are raised and an infinity or maximal finite value is\n"
1209    " * returned.  If the abstract value is too small, the input value is rounded to\n"
1210    " * a subnormal number, and the underflow and inexact exceptions are raised if\n"
1211    " * the abstract input cannot be represented exactly as a subnormal single-\n"
1212    " * precision floating-point number.\n"
1213    " *     The input significand `zFrac' has its binary point between bits 30\n"
1214    " * and 29, which is 7 bits to the left of the usual location.  This shifted\n"
1215    " * significand must be normalized or smaller.  If `zFrac' is not normalized,\n"
1216    " * `zExp' must be 0; in that case, the result returned is a subnormal number,\n"
1217    " * and it must not require rounding.  In the usual case that `zFrac' is\n"
1218    " * normalized, `zExp' must be 1 less than the \"true\" floating-point exponent.\n"
1219    " * The handling of underflow and overflow follows the IEEE Standard for\n"
1220    " * Floating-Point Arithmetic.\n"
1221    " */\n"
1222    "float\n"
1223    "__roundAndPackFloat32(uint zSign, int zExp, uint zFrac)\n"
1224    "{\n"
1225    "   bool roundNearestEven;\n"
1226    "   int roundIncrement;\n"
1227    "   int roundBits;\n"
1228    "\n"
1229    "   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;\n"
1230    "   roundIncrement = 0x40;\n"
1231    "   if (!roundNearestEven) {\n"
1232    "      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {\n"
1233    "         roundIncrement = 0;\n"
1234    "      } else {\n"
1235    "         roundIncrement = 0x7F;\n"
1236    "         if (zSign != 0u) {\n"
1237    "            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)\n"
1238    "               roundIncrement = 0;\n"
1239    "         } else {\n"
1240    "            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN)\n"
1241    "               roundIncrement = 0;\n"
1242    "         }\n"
1243    "      }\n"
1244    "   }\n"
1245    "   roundBits = int(zFrac & 0x7Fu);\n"
1246    "   if (0xFDu <= uint(zExp)) {\n"
1247    "      if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0))\n"
1248    "         return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0);\n"
1249    "      int count = -zExp;\n"
1250    "      bool zexp_lt0 = zExp < 0;\n"
1251    "      uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32);\n"
1252    "      zFrac = mix(zFrac, zFrac_lt0, zexp_lt0);\n"
1253    "      roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0);\n"
1254    "      zExp = mix(zExp, 0, zexp_lt0);\n"
1255    "   }\n"
1256    "   zFrac = (zFrac + uint(roundIncrement))>>7;\n"
1257    "   zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven);\n"
1258    "\n"
1259    "   return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac);\n"
1260    "}\n"
1261    "\n"
1262    "/* Returns the result of converting the double-precision floating-point value\n"
1263    " * `a' to the single-precision floating-point format.  The conversion is\n"
1264    " * performed according to the IEEE Standard for Floating-Point Arithmetic.\n"
1265    " */\n"
1266    "float\n"
1267    "__fp64_to_fp32(uint64_t __a)\n"
1268    "{\n"
1269    "   uvec2 a = unpackUint2x32(__a);\n"
1270    "   uint zFrac = 0u;\n"
1271    "   uint allZero = 0u;\n"
1272    "\n"
1273    "   uint aFracLo = __extractFloat64FracLo(__a);\n"
1274    "   uint aFracHi = __extractFloat64FracHi(__a);\n"
1275    "   int aExp = __extractFloat64Exp(__a);\n"
1276    "   uint aSign = __extractFloat64Sign(__a);\n"
1277    "   if (aExp == 0x7FF) {\n"
1278    "      __shortShift64Left(a.y, a.x, 12, a.y, a.x);\n"
1279    "      float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));\n"
1280    "      rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);\n"
1281    "      return rval;\n"
1282    "   }\n"
1283    "   __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac);\n"
1284    "   zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0);\n"
1285    "   return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);\n"
1286    "}\n"
1287    "\n"
1288    "float\n"
1289    "__uint64_to_fp32(uint64_t __a)\n"
1290    "{\n"
1291    "   uint zFrac = 0u;\n"
1292    "   uvec2 aFrac = unpackUint2x32(__a);\n"
1293    "   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n"
1294    "   shiftCount -= mix(40, 8, aFrac.y == 0u);\n"
1295    "\n"
1296    "   if (0 <= shiftCount) {\n"
1297    "      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n"
1298    "      bool is_zero = (aFrac.y | aFrac.x) == 0u;\n"
1299    "      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);\n"
1300    "   }\n"
1301    "\n"
1302    "   shiftCount += 7;\n"
1303    "   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n"
1304    "   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n"
1305    "   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);\n"
1306    "}\n"
1307    "\n"
1308    "float\n"
1309    "__int64_to_fp32(int64_t __a)\n"
1310    "{\n"
1311    "   uint zFrac = 0u;\n"
1312    "   uint aSign = uint(__a < 0);\n"
1313    "   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);\n"
1314    "   uvec2 aFrac = unpackUint2x32(absA);\n"
1315    "   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));\n"
1316    "   shiftCount -= mix(40, 8, aFrac.y == 0u);\n"
1317    "\n"
1318    "   if (0 <= shiftCount) {\n"
1319    "      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);\n"
1320    "      bool is_zero = (aFrac.y | aFrac.x) == 0u;\n"
1321    "      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);\n"
1322    "   }\n"
1323    "\n"
1324    "   shiftCount += 7;\n"
1325    "   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);\n"
1326    "   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);\n"
1327    "   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);\n"
1328    "}\n"
1329    "\n"
1330    "/* Returns the result of converting the single-precision floating-point value\n"
1331    " * `a' to the double-precision floating-point format.\n"
1332    " */\n"
1333    "uint64_t\n"
1334    "__fp32_to_fp64(float f)\n"
1335    "{\n"
1336    "   uint a = floatBitsToUint(f);\n"
1337    "   uint aFrac = a & 0x007FFFFFu;\n"
1338    "   int aExp = int((a>>23) & 0xFFu);\n"
1339    "   uint aSign = a>>31;\n"
1340    "   uint zFrac0 = 0u;\n"
1341    "   uint zFrac1 = 0u;\n"
1342    "\n"
1343    "   if (aExp == 0xFF) {\n"
1344    "      if (aFrac != 0u) {\n"
1345    "         uint nanLo = 0u;\n"
1346    "         uint nanHi = a<<9;\n"
1347    "         __shift64Right(nanHi, nanLo, 12, nanHi, nanLo);\n"
1348    "         nanHi |= ((aSign<<31) | 0x7FF80000u);\n"
1349    "         return packUint2x32(uvec2(nanLo, nanHi));\n"
1350    "      }\n"
1351    "      return __packFloat64(aSign, 0x7FF, 0u, 0u);\n"
1352    "    }\n"
1353    "\n"
1354    "   if (aExp == 0) {\n"
1355    "      if (aFrac == 0u)\n"
1356    "         return __packFloat64(aSign, 0, 0u, 0u);\n"
1357    "      /* Normalize subnormal */\n"
1358    "      int shiftCount = __countLeadingZeros32(aFrac) - 8;\n"
1359    "      aFrac <<= shiftCount;\n"
1360    "      aExp = 1 - shiftCount;\n"
1361    "      --aExp;\n"
1362    "   }\n"
1363    "\n"
1364    "   __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);\n"
1365    "   return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);\n"
1366    "}\n"
1367    "\n"
1368    "/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the\n"
1369    " * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is\n"
1370    " * modulo 2^96, so any carry out is lost.  The result is broken into three\n"
1371    " * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',\n"
1372    " * `z1Ptr', and `z2Ptr'.\n"
1373    " */\n"
1374    "void\n"
1375    "__add96(uint a0, uint a1, uint a2,\n"
1376    "        uint b0, uint b1, uint b2,\n"
1377    "        out uint z0Ptr,\n"
1378    "        out uint z1Ptr,\n"
1379    "        out uint z2Ptr)\n"
1380    "{\n"
1381    "   uint z2 = a2 + b2;\n"
1382    "   uint carry1 = uint(z2 < a2);\n"
1383    "   uint z1 = a1 + b1;\n"
1384    "   uint carry0 = uint(z1 < a1);\n"
1385    "   uint z0 = a0 + b0;\n"
1386    "   z1 += carry1;\n"
1387    "   z0 += uint(z1 < carry1);\n"
1388    "   z0 += carry0;\n"
1389    "   z2Ptr = z2;\n"
1390    "   z1Ptr = z1;\n"
1391    "   z0Ptr = z0;\n"
1392    "}\n"
1393    "\n"
1394    "/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from\n"
1395    " * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction\n"
1396    " * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken\n"
1397    " * into three 32-bit pieces which are stored at the locations pointed to by\n"
1398    " * `z0Ptr', `z1Ptr', and `z2Ptr'.\n"
1399    " */\n"
1400    "void\n"
1401    "__sub96(uint a0, uint a1, uint a2,\n"
1402    "        uint b0, uint b1, uint b2,\n"
1403    "        out uint z0Ptr,\n"
1404    "        out uint z1Ptr,\n"
1405    "        out uint z2Ptr)\n"
1406    "{\n"
1407    "   uint z2 = a2 - b2;\n"
1408    "   uint borrow1 = uint(a2 < b2);\n"
1409    "   uint z1 = a1 - b1;\n"
1410    "   uint borrow0 = uint(a1 < b1);\n"
1411    "   uint z0 = a0 - b0;\n"
1412    "   z0 -= uint(z1 < borrow1);\n"
1413    "   z1 -= borrow1;\n"
1414    "   z0 -= borrow0;\n"
1415    "   z2Ptr = z2;\n"
1416    "   z1Ptr = z1;\n"
1417    "   z0Ptr = z0;\n"
1418    "}\n"
1419    "\n"
1420    "/* Returns an approximation to the 32-bit integer quotient obtained by dividing\n"
1421    " * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The\n"
1422    " * divisor `b' must be at least 2^31.  If q is the exact quotient truncated\n"
1423    " * toward zero, the approximation returned lies between q and q + 2 inclusive.\n"
1424    " * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit\n"
1425    " * unsigned integer is returned.\n"
1426    " */\n"
1427    "uint\n"
1428    "__estimateDiv64To32(uint a0, uint a1, uint b)\n"
1429    "{\n"
1430    "   uint b0;\n"
1431    "   uint b1;\n"
1432    "   uint rem0 = 0u;\n"
1433    "   uint rem1 = 0u;\n"
1434    "   uint term0 = 0u;\n"
1435    "   uint term1 = 0u;\n"
1436    "   uint z;\n"
1437    "\n"
1438    "   if (b <= a0)\n"
1439    "      return 0xFFFFFFFFu;\n"
1440    "   b0 = b>>16;\n"
1441    "   z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;\n"
1442    "   __mul32To64(b, z, term0, term1);\n"
1443    "   __sub64(a0, a1, term0, term1, rem0, rem1);\n"
1444    "   while (int(rem0) < 0) {\n"
1445    "      z -= 0x10000u;\n"
1446    "      b1 = b<<16;\n"
1447    "      __add64(rem0, rem1, b0, b1, rem0, rem1);\n"
1448    "   }\n"
1449    "   rem0 = (rem0<<16) | (rem1>>16);\n"
1450    "   z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;\n"
1451    "   return z;\n"
1452    "}\n"
1453    "\n"
1454    "uint\n"
1455    "__sqrtOddAdjustments(int index)\n"
1456    "{\n"
1457    "   uint res = 0u;\n"
1458    "   if (index == 0)\n"
1459    "      res = 0x0004u;\n"
1460    "   if (index == 1)\n"
1461    "      res = 0x0022u;\n"
1462    "   if (index == 2)\n"
1463    "      res = 0x005Du;\n"
1464    "   if (index == 3)\n"
1465    "      res = 0x00B1u;\n"
1466    "   if (index == 4)\n"
1467    "      res = 0x011Du;\n"
1468    "   if (index == 5)\n"
1469    "      res = 0x019Fu;\n"
1470    "   if (index == 6)\n"
1471    "      res = 0x0236u;\n"
1472    "   if (index == 7)\n"
1473    "      res = 0x02E0u;\n"
1474    "   if (index == 8)\n"
1475    "      res = 0x039Cu;\n"
1476    "   if (index == 9)\n"
1477    "      res = 0x0468u;\n"
1478    "   if (index == 10)\n"
1479    "      res = 0x0545u;\n"
1480    "   if (index == 11)\n"
1481    "      res = 0x631u;\n"
1482    "   if (index == 12)\n"
1483    "      res = 0x072Bu;\n"
1484    "   if (index == 13)\n"
1485    "      res = 0x0832u;\n"
1486    "   if (index == 14)\n"
1487    "      res = 0x0946u;\n"
1488    "   if (index == 15)\n"
1489    "      res = 0x0A67u;\n"
1490    "\n"
1491    "   return res;\n"
1492    "}\n"
1493    "\n"
1494    "uint\n"
1495    "__sqrtEvenAdjustments(int index)\n"
1496    "{\n"
1497    "   uint res = 0u;\n"
1498    "   if (index == 0)\n"
1499    "      res = 0x0A2Du;\n"
1500    "   if (index == 1)\n"
1501    "      res = 0x08AFu;\n"
1502    "   if (index == 2)\n"
1503    "      res = 0x075Au;\n"
1504    "   if (index == 3)\n"
1505    "      res = 0x0629u;\n"
1506    "   if (index == 4)\n"
1507    "      res = 0x051Au;\n"
1508    "   if (index == 5)\n"
1509    "      res = 0x0429u;\n"
1510    "   if (index == 6)\n"
1511    "      res = 0x0356u;\n"
1512    "   if (index == 7)\n"
1513    "      res = 0x029Eu;\n"
1514    "   if (index == 8)\n"
1515    "      res = 0x0200u;\n"
1516    "   if (index == 9)\n"
1517    "      res = 0x0179u;\n"
1518    "   if (index == 10)\n"
1519    "      res = 0x0109u;\n"
1520    "   if (index == 11)\n"
1521    "      res = 0x00AFu;\n"
1522    "   if (index == 12)\n"
1523    "      res = 0x0068u;\n"
1524    "   if (index == 13)\n"
1525    "      res = 0x0034u;\n"
1526    "   if (index == 14)\n"
1527    "      res = 0x0012u;\n"
1528    "   if (index == 15)\n"
1529    "      res = 0x0002u;\n"
1530    "\n"
1531    "   return res;\n"
1532    "}\n"
1533    "\n"
1534    "/* Returns an approximation to the square root of the 32-bit significand given\n"
1535    " * by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of\n"
1536    " * `aExp' (the least significant bit) is 1, the integer returned approximates\n"
1537    " * 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'\n"
1538    " * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either\n"
1539    " * case, the approximation returned lies strictly within +/-2 of the exact\n"
1540    " * value.\n"
1541    " */\n"
1542    "uint\n"
1543    "__estimateSqrt32(int aExp, uint a)\n"
1544    "{\n"
1545    "   uint z;\n"
1546    "\n"
1547    "   int index = int(a>>27 & 15u);\n"
1548    "   if ((aExp & 1) != 0) {\n"
1549    "      z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index);\n"
1550    "      z = ((a / z)<<14) + (z<<15);\n"
1551    "      a >>= 1;\n"
1552    "   } else {\n"
1553    "      z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index);\n"
1554    "      z = a / z + z;\n"
1555    "      z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);\n"
1556    "      if (z <= a)\n"
1557    "         return uint(int(a)>>1);\n"
1558    "   }\n"
1559    "   return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1);\n"
1560    "}\n"
1561    "\n"
1562    "/* Returns the square root of the double-precision floating-point value `a'.\n"
1563    " * The operation is performed according to the IEEE Standard for Floating-Point\n"
1564    " * Arithmetic.\n"
1565    " */\n"
1566    "uint64_t\n"
1567    "__fsqrt64(uint64_t a)\n"
1568    "{\n"
1569    "   uint zFrac0 = 0u;\n"
1570    "   uint zFrac1 = 0u;\n"
1571    "   uint zFrac2 = 0u;\n"
1572    "   uint doubleZFrac0 = 0u;\n"
1573    "   uint rem0 = 0u;\n"
1574    "   uint rem1 = 0u;\n"
1575    "   uint rem2 = 0u;\n"
1576    "   uint rem3 = 0u;\n"
1577    "   uint term0 = 0u;\n"
1578    "   uint term1 = 0u;\n"
1579    "   uint term2 = 0u;\n"
1580    "   uint term3 = 0u;\n"
1581    "   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;\n"
1582    "\n"
1583    "   uint aFracLo = __extractFloat64FracLo(a);\n"
1584    "   uint aFracHi = __extractFloat64FracHi(a);\n"
1585    "   int aExp = __extractFloat64Exp(a);\n"
1586    "   uint aSign = __extractFloat64Sign(a);\n"
1587    "   if (aExp == 0x7FF) {\n"
1588    "      if ((aFracHi | aFracLo) != 0u)\n"
1589    "         return __propagateFloat64NaN(a, a);\n"
1590    "      if (aSign == 0u)\n"
1591    "         return a;\n"
1592    "      return default_nan;\n"
1593    "   }\n"
1594    "   if (aSign != 0u) {\n"
1595    "      if ((uint(aExp) | aFracHi | aFracLo) == 0u)\n"
1596    "         return a;\n"
1597    "      return default_nan;\n"
1598    "   }\n"
1599    "   if (aExp == 0) {\n"
1600    "      if ((aFracHi | aFracLo) == 0u)\n"
1601    "         return __packFloat64(0u, 0, 0u, 0u);\n"
1602    "      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);\n"
1603    "   }\n"
1604    "   int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;\n"
1605    "   aFracHi |= 0x00100000u;\n"
1606    "   __shortShift64Left(aFracHi, aFracLo, 11, term0, term1);\n"
1607    "   zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u;\n"
1608    "   if (zFrac0 == 0u)\n"
1609    "      zFrac0 = 0x7FFFFFFFu;\n"
1610    "   doubleZFrac0 = zFrac0 + zFrac0;\n"
1611    "   __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);\n"
1612    "   __mul32To64(zFrac0, zFrac0, term0, term1);\n"
1613    "   __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);\n"
1614    "   while (int(rem0) < 0) {\n"
1615    "      --zFrac0;\n"
1616    "      doubleZFrac0 -= 2u;\n"
1617    "      __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);\n"
1618    "   }\n"
1619    "   zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0);\n"
1620    "   if ((zFrac1 & 0x1FFu) <= 5u) {\n"
1621    "      if (zFrac1 == 0u)\n"
1622    "         zFrac1 = 1u;\n"
1623    "      __mul32To64(doubleZFrac0, zFrac1, term1, term2);\n"
1624    "      __sub64(rem1, 0u, term1, term2, rem1, rem2);\n"
1625    "      __mul32To64(zFrac1, zFrac1, term2, term3);\n"
1626    "      __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);\n"
1627    "      while (int(rem1) < 0) {\n"
1628    "         --zFrac1;\n"
1629    "         __shortShift64Left(0u, zFrac1, 1, term2, term3);\n"
1630    "         term3 |= 1u;\n"
1631    "         term2 |= doubleZFrac0;\n"
1632    "         __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);\n"
1633    "      }\n"
1634    "      zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);\n"
1635    "   }\n"
1636    "   __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);\n"
1637    "   return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);\n"
1638    "}\n"
1639    "\n"
1640    "uint64_t\n"
1641    "__ftrunc64(uint64_t __a)\n"
1642    "{\n"
1643    "   uvec2 a = unpackUint2x32(__a);\n"
1644    "   int aExp = __extractFloat64Exp(__a);\n"
1645    "   uint zLo;\n"
1646    "   uint zHi;\n"
1647    "\n"
1648    "   int unbiasedExp = aExp - 1023;\n"
1649    "   int fracBits = 52 - unbiasedExp;\n"
1650    "   uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32);\n"
1651    "   uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33);\n"
1652    "   zLo = maskLo & a.x;\n"
1653    "   zHi = maskHi & a.y;\n"
1654    "\n"
1655    "   zLo = mix(zLo, 0u, unbiasedExp < 0);\n"
1656    "   zHi = mix(zHi, 0u, unbiasedExp < 0);\n"
1657    "   zLo = mix(zLo, a.x, unbiasedExp > 52);\n"
1658    "   zHi = mix(zHi, a.y, unbiasedExp > 52);\n"
1659    "   return packUint2x32(uvec2(zLo, zHi));\n"
1660    "}\n"
1661    "\n"
1662    "uint64_t\n"
1663    "__ffloor64(uint64_t a)\n"
1664    "{\n"
1665    "   bool is_positive = __fge64(a, 0ul);\n"
1666    "   uint64_t tr = __ftrunc64(a);\n"
1667    "\n"
1668    "   if (is_positive || __feq64(tr, a)) {\n"
1669    "      return tr;\n"
1670    "   } else {\n"
1671    "      return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);\n"
1672    "   }\n"
1673    "}\n"
1674    "\n"
1675    "uint64_t\n"
1676    "__fround64(uint64_t __a)\n"
1677    "{\n"
1678    "   uvec2 a = unpackUint2x32(__a);\n"
1679    "   int unbiasedExp = __extractFloat64Exp(__a) - 1023;\n"
1680    "   uint aHi = a.y;\n"
1681    "   uint aLo = a.x;\n"
1682    "\n"
1683    "   if (unbiasedExp < 20) {\n"
1684    "      if (unbiasedExp < 0) {\n"
1685    "         if ((aHi & 0x80000000u) != 0u && aLo == 0u) {\n"
1686    "            return 0;\n"
1687    "         }\n"
1688    "         aHi &= 0x80000000u;\n"
1689    "         if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) {\n"
1690    "            aLo = 0u;\n"
1691    "            return packUint2x32(uvec2(aLo, aHi));\n"
1692    "         }\n"
1693    "         aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1);\n"
1694    "         aLo = 0u;\n"
1695    "      } else {\n"
1696    "         uint maskExp = 0x000FFFFFu >> unbiasedExp;\n"
1697    "         uint lastBit = maskExp + 1;\n"
1698    "         aHi += 0x00080000u >> unbiasedExp;\n"
1699    "         if ((aHi & maskExp) == 0u)\n"
1700    "            aHi &= ~lastBit;\n"
1701    "         aHi &= ~maskExp;\n"
1702    "         aLo = 0u;\n"
1703    "      }\n"
1704    "   } else if (unbiasedExp > 51 || unbiasedExp == 1024) {\n"
1705    "      return __a;\n"
1706    "   } else {\n"
1707    "      uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);\n"
1708    "      if ((aLo & maskExp) == 0u)\n"
1709    "         return __a;\n"
1710    "      uint tmp = aLo + (1u << (51 - unbiasedExp));\n"
1711    "      if(tmp < aLo)\n"
1712    "         aHi += 1u;\n"
1713    "      aLo = tmp;\n"
1714    "      aLo &= ~maskExp;\n"
1715    "   }\n"
1716    "\n"
1717    "   return packUint2x32(uvec2(aLo, aHi));\n"
1718    "}\n"
1719    "\n"
1720    "uint64_t\n"
1721    "__fmin64(uint64_t a, uint64_t b)\n"
1722    "{\n"
1723    "   if (__is_nan(a)) return b;\n"
1724    "   if (__is_nan(b)) return a;\n"
1725    "\n"
1726    "   if (__flt64_nonnan(a, b)) return a;\n"
1727    "   return b;\n"
1728    "}\n"
1729    "\n"
1730    "uint64_t\n"
1731    "__fmax64(uint64_t a, uint64_t b)\n"
1732    "{\n"
1733    "   if (__is_nan(a)) return b;\n"
1734    "   if (__is_nan(b)) return a;\n"
1735    "\n"
1736    "   if (__flt64_nonnan(a, b)) return b;\n"
1737    "   return a;\n"
1738    "}\n"
1739    "\n"
1740    "uint64_t\n"
1741    "__ffract64(uint64_t a)\n"
1742    "{\n"
1743    "   return __fadd64(a, __fneg64(__ffloor64(a)));\n"
1744    "}\n"
1745    ""
1746    ;
1747