1/*
2 * The implementations contained in this file are heavily based on the
3 * implementations found in the Berkeley SoftFloat library. As such, they are
4 * licensed under the same 3-clause BSD license:
5 *
6 * License for Berkeley SoftFloat Release 3e
7 *
8 * John R. Hauser
9 * 2018 January 20
10 *
11 * The following applies to the whole of SoftFloat Release 3e as well as to
12 * each source file individually.
13 *
14 * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
15 * University of California.  All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are met:
19 *
20 *  1. Redistributions of source code must retain the above copyright notice,
21 *     this list of conditions, and the following disclaimer.
22 *
23 *  2. Redistributions in binary form must reproduce the above copyright
24 *     notice, this list of conditions, and the following disclaimer in the
25 *     documentation and/or other materials provided with the distribution.
26 *
27 *  3. Neither the name of the University nor the names of its contributors
28 *     may be used to endorse or promote products derived from this software
29 *     without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
32 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
34 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
35 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
38 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43#version 430
44#extension GL_ARB_gpu_shader_int64 : enable
45#extension GL_ARB_shader_bit_encoding : enable
46#extension GL_EXT_shader_integer_mix : enable
47#extension GL_MESA_shader_integer_functions : enable
48
49#pragma warning(off)
50
51/* Software IEEE floating-point rounding mode.
52 * GLSL spec section "4.7.1 Range and Precision":
53 * The rounding mode cannot be set and is undefined.
54 * But here, we are able to define the rounding mode at the compilation time.
55 */
56#define FLOAT_ROUND_NEAREST_EVEN    0
57#define FLOAT_ROUND_TO_ZERO         1
58#define FLOAT_ROUND_DOWN            2
59#define FLOAT_ROUND_UP              3
60#define FLOAT_ROUNDING_MODE         FLOAT_ROUND_NEAREST_EVEN
61
62/* Absolute value of a Float64 :
63 * Clear the sign bit
64 */
65uint64_t
66__fabs64(uint64_t __a)
67{
68   uvec2 a = unpackUint2x32(__a);
69   a.y &= 0x7FFFFFFFu;
70   return packUint2x32(a);
71}
72
73/* Returns 1 if the double-precision floating-point value `a' is a NaN;
74 * otherwise returns 0.
75 */
76bool
77__is_nan(uint64_t __a)
78{
79   uvec2 a = unpackUint2x32(__a);
80   return (0xFFE00000u <= (a.y<<1)) &&
81      ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u));
82}
83
84/* Negate value of a Float64 :
85 * Toggle the sign bit
86 */
87uint64_t
88__fneg64(uint64_t __a)
89{
90   uvec2 a = unpackUint2x32(__a);
91   uint t = a.y;
92
93   t ^= (1u << 31);
94   a.y = mix(t, a.y, __is_nan(__a));
95   return packUint2x32(a);
96}
97
98uint64_t
99__fsign64(uint64_t __a)
100{
101   uvec2 a = unpackUint2x32(__a);
102   uvec2 retval;
103   retval.x = 0u;
104   retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u);
105   return packUint2x32(retval);
106}
107
108/* Returns the fraction bits of the double-precision floating-point value `a'.*/
109uint
110__extractFloat64FracLo(uint64_t a)
111{
112   return unpackUint2x32(a).x;
113}
114
115uint
116__extractFloat64FracHi(uint64_t a)
117{
118   return unpackUint2x32(a).y & 0x000FFFFFu;
119}
120
121/* Returns the exponent bits of the double-precision floating-point value `a'.*/
122int
123__extractFloat64Exp(uint64_t __a)
124{
125   uvec2 a = unpackUint2x32(__a);
126   return int((a.y>>20) & 0x7FFu);
127}
128
129bool
130__feq64_nonnan(uint64_t __a, uint64_t __b)
131{
132   uvec2 a = unpackUint2x32(__a);
133   uvec2 b = unpackUint2x32(__b);
134   return (a.x == b.x) &&
135          ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u)));
136}
137
138/* Returns true if the double-precision floating-point value `a' is equal to the
139 * corresponding value `b', and false otherwise.  The comparison is performed
140 * according to the IEEE Standard for Floating-Point Arithmetic.
141 */
142bool
143__feq64(uint64_t a, uint64_t b)
144{
145   if (__is_nan(a) || __is_nan(b))
146      return false;
147
148   return __feq64_nonnan(a, b);
149}
150
151/* Returns true if the double-precision floating-point value `a' is not equal
152 * to the corresponding value `b', and false otherwise.  The comparison is
153 * performed according to the IEEE Standard for Floating-Point Arithmetic.
154 */
155bool
156__fne64(uint64_t a, uint64_t b)
157{
158   if (__is_nan(a) || __is_nan(b))
159      return true;
160
161   return !__feq64_nonnan(a, b);
162}
163
164/* Returns the sign bit of the double-precision floating-point value `a'.*/
165uint
166__extractFloat64Sign(uint64_t a)
167{
168   return unpackUint2x32(a).y >> 31;
169}
170
171/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
172 * than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
173 * returns false.
174 */
175bool
176lt64(uint a0, uint a1, uint b0, uint b1)
177{
178   return (a0 < b0) || ((a0 == b0) && (a1 < b1));
179}
180
181bool
182__flt64_nonnan(uint64_t __a, uint64_t __b)
183{
184   uvec2 a = unpackUint2x32(__a);
185   uvec2 b = unpackUint2x32(__b);
186   uint aSign = __extractFloat64Sign(__a);
187   uint bSign = __extractFloat64Sign(__b);
188   if (aSign != bSign)
189      return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);
190
191   return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);
192}
193
194/* Returns true if the double-precision floating-point value `a' is less than
195 * the corresponding value `b', and false otherwise.  The comparison is performed
196 * according to the IEEE Standard for Floating-Point Arithmetic.
197 */
198bool
199__flt64(uint64_t a, uint64_t b)
200{
201   if (__is_nan(a) || __is_nan(b))
202      return false;
203
204   return __flt64_nonnan(a, b);
205}
206
207/* Returns true if the double-precision floating-point value `a' is greater
208 * than or equal to * the corresponding value `b', and false otherwise.  The
209 * comparison is performed * according to the IEEE Standard for Floating-Point
210 * Arithmetic.
211 */
212bool
213__fge64(uint64_t a, uint64_t b)
214{
215   if (__is_nan(a) || __is_nan(b))
216      return false;
217
218   return !__flt64_nonnan(a, b);
219}
220
221/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
222 * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
223 * any carry out is lost.  The result is broken into two 32-bit pieces which
224 * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
225 */
226void
227__add64(uint a0, uint a1, uint b0, uint b1,
228        out uint z0Ptr,
229        out uint z1Ptr)
230{
231   uint z1 = a1 + b1;
232   z1Ptr = z1;
233   z0Ptr = a0 + b0 + uint(z1 < a1);
234}
235
236
237/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
238 * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
239 * 2^64, so any borrow out (carry out) is lost.  The result is broken into two
240 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
241 * `z1Ptr'.
242 */
243void
244__sub64(uint a0, uint a1, uint b0, uint b1,
245        out uint z0Ptr,
246        out uint z1Ptr)
247{
248   z1Ptr = a1 - b1;
249   z0Ptr = a0 - b0 - uint(a1 < b1);
250}
251
252/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
253 * number of bits given in `count'.  If any nonzero bits are shifted off, they
254 * are "jammed" into the least significant bit of the result by setting the
255 * least significant bit to 1.  The value of `count' can be arbitrarily large;
256 * in particular, if `count' is greater than 64, the result will be either 0
257 * or 1, depending on whether the concatenation of `a0' and `a1' is zero or
258 * nonzero.  The result is broken into two 32-bit pieces which are stored at
259 * the locations pointed to by `z0Ptr' and `z1Ptr'.
260 */
261void
262__shift64RightJamming(uint a0,
263                      uint a1,
264                      int count,
265                      out uint z0Ptr,
266                      out uint z1Ptr)
267{
268   uint z0;
269   uint z1;
270   int negCount = (-count) & 31;
271
272   z0 = mix(0u, a0, count == 0);
273   z0 = mix(z0, (a0 >> count), count < 32);
274
275   z1 = uint((a0 | a1) != 0u); /* count >= 64 */
276   uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u);
277   z1 = mix(z1, z1_lt64, count < 64);
278   z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32);
279   uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u);
280   z1 = mix(z1, z1_lt32, count < 32);
281   z1 = mix(z1, a1, count == 0);
282   z1Ptr = z1;
283   z0Ptr = z0;
284}
285
286/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
287 * by 32 _plus_ the number of bits given in `count'.  The shifted result is
288 * at most 64 nonzero bits; these are broken into two 32-bit pieces which are
289 * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
290 * off form a third 32-bit result as follows:  The _last_ bit shifted off is
291 * the most-significant bit of the extra result, and the other 31 bits of the
292 * extra result are all zero if and only if _all_but_the_last_ bits shifted off
293 * were all zero.  This extra result is stored in the location pointed to by
294 * `z2Ptr'.  The value of `count' can be arbitrarily large.
295 *     (This routine makes more sense if `a0', `a1', and `a2' are considered
296 * to form a fixed-point value with binary point between `a1' and `a2'.  This
297 * fixed-point value is shifted right by the number of bits given in `count',
298 * and the integer part of the result is returned at the locations pointed to
299 * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
300 * corrupted as described above, and is returned at the location pointed to by
301 * `z2Ptr'.)
302 */
303void
304__shift64ExtraRightJamming(uint a0, uint a1, uint a2,
305                           int count,
306                           out uint z0Ptr,
307                           out uint z1Ptr,
308                           out uint z2Ptr)
309{
310   uint z0 = 0u;
311   uint z1;
312   uint z2;
313   int negCount = (-count) & 31;
314
315   z2 = mix(uint(a0 != 0u), a0, count == 64);
316   z2 = mix(z2, a0 << negCount, count < 64);
317   z2 = mix(z2, a1 << negCount, count < 32);
318
319   z1 = mix(0u, (a0 >> (count & 31)), count < 64);
320   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);
321
322   a2 = mix(a2 | a1, a2, count < 32);
323   z0 = mix(z0, a0 >> count, count < 32);
324   z2 |= uint(a2 != 0u);
325
326   z0 = mix(z0, 0u, (count == 32));
327   z1 = mix(z1, a0, (count == 32));
328   z2 = mix(z2, a1, (count == 32));
329   z0 = mix(z0, a0, (count == 0));
330   z1 = mix(z1, a1, (count == 0));
331   z2 = mix(z2, a2, (count == 0));
332   z2Ptr = z2;
333   z1Ptr = z1;
334   z0Ptr = z0;
335}
336
337/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
338 * number of bits given in `count'.  Any bits shifted off are lost.  The value
339 * of `count' must be less than 32.  The result is broken into two 32-bit
340 * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
341 */
342void
343__shortShift64Left(uint a0, uint a1,
344                   int count,
345                   out uint z0Ptr,
346                   out uint z1Ptr)
347{
348   z1Ptr = a1<<count;
349   z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0);
350}
351
352/* Packs the sign `zSign', the exponent `zExp', and the significand formed by
353 * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
354 * point value, returning the result.  After being shifted into the proper
355 * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
356 * together to form the most significant 32 bits of the result.  This means
357 * that any integer portion of `zFrac0' will be added into the exponent.  Since
358 * a properly normalized significand will have an integer portion equal to 1,
359 * the `zExp' input should be 1 less than the desired result exponent whenever
360 * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
361 */
362uint64_t
363__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)
364{
365   uvec2 z;
366
367   z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;
368   z.x = zFrac1;
369   return packUint2x32(z);
370}
371
372/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
373 * and extended significand formed by the concatenation of `zFrac0', `zFrac1',
374 * and `zFrac2', and returns the proper double-precision floating-point value
375 * corresponding to the abstract input.  Ordinarily, the abstract value is
376 * simply rounded and packed into the double-precision format, with the inexact
377 * exception raised if the abstract input cannot be represented exactly.
378 * However, if the abstract value is too large, the overflow and inexact
379 * exceptions are raised and an infinity or maximal finite value is returned.
380 * If the abstract value is too small, the input value is rounded to a
381 * subnormal number, and the underflow and inexact exceptions are raised if the
382 * abstract input cannot be represented exactly as a subnormal double-precision
383 * floating-point number.
384 *     The input significand must be normalized or smaller.  If the input
385 * significand is not normalized, `zExp' must be 0; in that case, the result
386 * returned is a subnormal number, and it must not require rounding.  In the
387 * usual case that the input significand is normalized, `zExp' must be 1 less
388 * than the "true" floating-point exponent.  The handling of underflow and
389 * overflow follows the IEEE Standard for Floating-Point Arithmetic.
390 */
391uint64_t
392__roundAndPackFloat64(uint zSign,
393                      int zExp,
394                      uint zFrac0,
395                      uint zFrac1,
396                      uint zFrac2)
397{
398   bool roundNearestEven;
399   bool increment;
400
401   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
402   increment = int(zFrac2) < 0;
403   if (!roundNearestEven) {
404      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {
405         increment = false;
406      } else {
407         if (zSign != 0u) {
408            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
409               (zFrac2 != 0u);
410         } else {
411            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
412               (zFrac2 != 0u);
413         }
414      }
415   }
416   if (0x7FD <= zExp) {
417      if ((0x7FD < zExp) ||
418         ((zExp == 0x7FD) &&
419            (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) &&
420               increment)) {
421         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) ||
422            ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) ||
423               ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) {
424            return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu);
425         }
426         return __packFloat64(zSign, 0x7FF, 0u, 0u);
427      }
428      if (zExp < 0) {
429         __shift64ExtraRightJamming(
430            zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
431         zExp = 0;
432         if (roundNearestEven) {
433            increment = zFrac2 < 0u;
434         } else {
435            if (zSign != 0u) {
436               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
437                  (zFrac2 != 0u);
438            } else {
439               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
440                  (zFrac2 != 0u);
441            }
442         }
443      }
444   }
445   if (increment) {
446      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
447      zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));
448   } else {
449      zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u);
450   }
451   return __packFloat64(zSign, zExp, zFrac0, zFrac1);
452}
453
454uint64_t
455__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
456{
457   bool roundNearestEven;
458   bool increment;
459   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
460
461   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
462
463   if (zFrac2 >= 0x80000000u)
464      increment = false;
465
466   if (!roundNearestEven) {
467      if (zSign != 0u) {
468         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {
469            increment = false;
470         }
471      } else {
472         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
473            (zFrac2 != 0u);
474      }
475   }
476
477   if (increment) {
478      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
479      if ((zFrac0 | zFrac1) != 0u)
480         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
481   }
482   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
483              (zSign !=0u && (zFrac0 | zFrac1) != 0u));
484}
485
486int64_t
487__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
488{
489   bool roundNearestEven;
490   bool increment;
491   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
492   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
493
494   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
495
496   if (zFrac2 >= 0x80000000u)
497      increment = false;
498
499   if (!roundNearestEven) {
500      if (zSign != 0u) {
501         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
502            (zFrac2 != 0u));
503      } else {
504         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
505            (zFrac2 != 0u);
506      }
507   }
508
509   if (increment) {
510      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
511      if ((zFrac0 | zFrac1) != 0u)
512         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
513   }
514
515   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
516                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
517                      (zSign != 0u));
518   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
519   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
520}
521
522/* Returns the number of leading 0 bits before the most-significant 1 bit of
523 * `a'.  If `a' is zero, 32 is returned.
524 */
525int
526__countLeadingZeros32(uint a)
527{
528   int shiftCount;
529   shiftCount = mix(31 - findMSB(a), 32, a == 0u);
530   return shiftCount;
531}
532
533/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
534 * and significand formed by the concatenation of `zSig0' and `zSig1', and
535 * returns the proper double-precision floating-point value corresponding
536 * to the abstract input.  This routine is just like `__roundAndPackFloat64'
537 * except that the input significand has fewer bits and does not have to be
538 * normalized.  In all cases, `zExp' must be 1 less than the "true" floating-
539 * point exponent.
540 */
541uint64_t
542__normalizeRoundAndPackFloat64(uint zSign,
543                               int zExp,
544                               uint zFrac0,
545                               uint zFrac1)
546{
547   int shiftCount;
548   uint zFrac2;
549
550   if (zFrac0 == 0u) {
551      zExp -= 32;
552      zFrac0 = zFrac1;
553      zFrac1 = 0u;
554   }
555
556   shiftCount = __countLeadingZeros32(zFrac0) - 11;
557   if (0 <= shiftCount) {
558      zFrac2 = 0u;
559      __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1);
560   } else {
561      __shift64ExtraRightJamming(
562         zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2);
563   }
564   zExp -= shiftCount;
565   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
566}
567
568/* Takes two double-precision floating-point values `a' and `b', one of which
569 * is a NaN, and returns the appropriate NaN result.
570 */
571uint64_t
572__propagateFloat64NaN(uint64_t __a, uint64_t __b)
573{
574   bool aIsNaN = __is_nan(__a);
575   bool bIsNaN = __is_nan(__b);
576   uvec2 a = unpackUint2x32(__a);
577   uvec2 b = unpackUint2x32(__b);
578   a.y |= 0x00080000u;
579   b.y |= 0x00080000u;
580
581   return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));
582}
583
584/* Returns the result of adding the double-precision floating-point values
585 * `a' and `b'.  The operation is performed according to the IEEE Standard for
586 * Floating-Point Arithmetic.
587 */
588uint64_t
589__fadd64(uint64_t a, uint64_t b)
590{
591   uint aSign = __extractFloat64Sign(a);
592   uint bSign = __extractFloat64Sign(b);
593   uint aFracLo = __extractFloat64FracLo(a);
594   uint aFracHi = __extractFloat64FracHi(a);
595   uint bFracLo = __extractFloat64FracLo(b);
596   uint bFracHi = __extractFloat64FracHi(b);
597   int aExp = __extractFloat64Exp(a);
598   int bExp = __extractFloat64Exp(b);
599   uint zFrac0 = 0u;
600   uint zFrac1 = 0u;
601   int expDiff = aExp - bExp;
602   if (aSign == bSign) {
603      uint zFrac2 = 0u;
604      int zExp;
605      bool orig_exp_diff_is_zero = (expDiff == 0);
606
607      if (orig_exp_diff_is_zero) {
608         if (aExp == 0x7FF) {
609            bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
610            return mix(a, __propagateFloat64NaN(a, b), propagate);
611         }
612         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
613         if (aExp == 0)
614            return __packFloat64(aSign, 0, zFrac0, zFrac1);
615         zFrac2 = 0u;
616         zFrac0 |= 0x00200000u;
617         zExp = aExp;
618         __shift64ExtraRightJamming(
619            zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
620      } else if (0 < expDiff) {
621         if (aExp == 0x7FF) {
622            bool propagate = (aFracHi | aFracLo) != 0u;
623            return mix(a, __propagateFloat64NaN(a, b), propagate);
624         }
625
626         expDiff = mix(expDiff, expDiff - 1, bExp == 0);
627         bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);
628         __shift64ExtraRightJamming(
629            bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);
630         zExp = aExp;
631      } else if (expDiff < 0) {
632         if (bExp == 0x7FF) {
633            bool propagate = (bFracHi | bFracLo) != 0u;
634            return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
635         }
636         expDiff = mix(expDiff, expDiff + 1, aExp == 0);
637         aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);
638         __shift64ExtraRightJamming(
639            aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);
640         zExp = bExp;
641      }
642      if (!orig_exp_diff_is_zero) {
643         aFracHi |= 0x00100000u;
644         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
645         --zExp;
646         if (!(zFrac0 < 0x00200000u)) {
647            __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
648            ++zExp;
649         }
650      }
651      return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2);
652
653   } else {
654      int zExp;
655
656      __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
657      __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
658      if (0 < expDiff) {
659         if (aExp == 0x7FF) {
660            bool propagate = (aFracHi | aFracLo) != 0u;
661            return mix(a, __propagateFloat64NaN(a, b), propagate);
662         }
663         expDiff = mix(expDiff, expDiff - 1, bExp == 0);
664         bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
665         __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
666         aFracHi |= 0x40000000u;
667         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
668         zExp = aExp;
669         --zExp;
670         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
671      }
672      if (expDiff < 0) {
673         if (bExp == 0x7FF) {
674            bool propagate = (bFracHi | bFracLo) != 0u;
675            return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
676         }
677         expDiff = mix(expDiff, expDiff + 1, aExp == 0);
678         aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
679         __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
680         bFracHi |= 0x40000000u;
681         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
682         zExp = bExp;
683         aSign ^= 1u;
684         --zExp;
685         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
686      }
687      if (aExp == 0x7FF) {
688          bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
689         return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
690      }
691      bExp = mix(bExp, 1, aExp == 0);
692      aExp = mix(aExp, 1, aExp == 0);
693      bool zexp_normal = false;
694      bool blta = true;
695      if (bFracHi < aFracHi) {
696         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
697         zexp_normal = true;
698      }
699      else if (aFracHi < bFracHi) {
700         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
701         blta = false;
702         zexp_normal = true;
703      }
704      else if (bFracLo < aFracLo) {
705         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
706         zexp_normal = true;
707      }
708      else if (aFracLo < bFracLo) {
709         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
710          blta = false;
711          zexp_normal = true;
712      }
713      zExp = mix(bExp, aExp, blta);
714      aSign = mix(aSign ^ 1u, aSign, blta);
715      uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);
716      uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
717      return mix(retval_0, retval_1, zexp_normal);
718   }
719}
720
721/* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
722 * into two 32-bit pieces which are stored at the locations pointed to by
723 * `z0Ptr' and `z1Ptr'.
724 */
725void
726__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)
727{
728   uint aLow = a & 0x0000FFFFu;
729   uint aHigh = a>>16;
730   uint bLow = b & 0x0000FFFFu;
731   uint bHigh = b>>16;
732   uint z1 = aLow * bLow;
733   uint zMiddleA = aLow * bHigh;
734   uint zMiddleB = aHigh * bLow;
735   uint z0 = aHigh * bHigh;
736   zMiddleA += zMiddleB;
737   z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);
738   zMiddleA <<= 16;
739   z1 += zMiddleA;
740   z0 += uint(z1 < zMiddleA);
741   z1Ptr = z1;
742   z0Ptr = z0;
743}
744
745/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
746 * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
747 * product.  The product is broken into four 32-bit pieces which are stored at
748 * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
749 */
750void
751__mul64To128(uint a0, uint a1, uint b0, uint b1,
752             out uint z0Ptr,
753             out uint z1Ptr,
754             out uint z2Ptr,
755             out uint z3Ptr)
756{
757   uint z0 = 0u;
758   uint z1 = 0u;
759   uint z2 = 0u;
760   uint z3 = 0u;
761   uint more1 = 0u;
762   uint more2 = 0u;
763
764   __mul32To64(a1, b1, z2, z3);
765   __mul32To64(a1, b0, z1, more2);
766   __add64(z1, more2, 0u, z2, z1, z2);
767   __mul32To64(a0, b0, z0, more1);
768   __add64(z0, more1, 0u, z1, z0, z1);
769   __mul32To64(a0, b1, more1, more2);
770   __add64(more1, more2, 0u, z2, more1, z2);
771   __add64(z0, z1, 0u, more1, z0, z1);
772   z3Ptr = z3;
773   z2Ptr = z2;
774   z1Ptr = z1;
775   z0Ptr = z0;
776}
777
778/* Normalizes the subnormal double-precision floating-point value represented
779 * by the denormalized significand formed by the concatenation of `aFrac0' and
780 * `aFrac1'.  The normalized exponent is stored at the location pointed to by
781 * `zExpPtr'.  The most significant 21 bits of the normalized significand are
782 * stored at the location pointed to by `zFrac0Ptr', and the least significant
783 * 32 bits of the normalized significand are stored at the location pointed to
784 * by `zFrac1Ptr'.
785 */
786void
787__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1,
788                            out int zExpPtr,
789                            out uint zFrac0Ptr,
790                            out uint zFrac1Ptr)
791{
792   int shiftCount;
793   uint temp_zfrac0, temp_zfrac1;
794   shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11;
795   zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u);
796
797   temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0);
798   temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0);
799
800   __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr);
801
802   zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0);
803   zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0);
804}
805
806/* Returns the result of multiplying the double-precision floating-point values
807 * `a' and `b'.  The operation is performed according to the IEEE Standard for
808 * Floating-Point Arithmetic.
809 */
810uint64_t
811__fmul64(uint64_t a, uint64_t b)
812{
813   uint zFrac0 = 0u;
814   uint zFrac1 = 0u;
815   uint zFrac2 = 0u;
816   uint zFrac3 = 0u;
817   int zExp;
818
819   uint aFracLo = __extractFloat64FracLo(a);
820   uint aFracHi = __extractFloat64FracHi(a);
821   uint bFracLo = __extractFloat64FracLo(b);
822   uint bFracHi = __extractFloat64FracHi(b);
823   int aExp = __extractFloat64Exp(a);
824   uint aSign = __extractFloat64Sign(a);
825   int bExp = __extractFloat64Exp(b);
826   uint bSign = __extractFloat64Sign(b);
827   uint zSign = aSign ^ bSign;
828   if (aExp == 0x7FF) {
829      if (((aFracHi | aFracLo) != 0u) ||
830         ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) {
831         return __propagateFloat64NaN(a, b);
832      }
833      if ((uint(bExp) | bFracHi | bFracLo) == 0u)
834            return 0xFFFFFFFFFFFFFFFFUL;
835      return __packFloat64(zSign, 0x7FF, 0u, 0u);
836   }
837   if (bExp == 0x7FF) {
838      if ((bFracHi | bFracLo) != 0u)
839         return __propagateFloat64NaN(a, b);
840      if ((uint(aExp) | aFracHi | aFracLo) == 0u)
841         return 0xFFFFFFFFFFFFFFFFUL;
842      return __packFloat64(zSign, 0x7FF, 0u, 0u);
843   }
844   if (aExp == 0) {
845      if ((aFracHi | aFracLo) == 0u)
846         return __packFloat64(zSign, 0, 0u, 0u);
847      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);
848   }
849   if (bExp == 0) {
850      if ((bFracHi | bFracLo) == 0u)
851         return __packFloat64(zSign, 0, 0u, 0u);
852      __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo);
853   }
854   zExp = aExp + bExp - 0x400;
855   aFracHi |= 0x00100000u;
856   __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo);
857   __mul64To128(
858      aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3);
859   __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1);
860   zFrac2 |= uint(zFrac3 != 0u);
861   if (0x00200000u <= zFrac0) {
862      __shift64ExtraRightJamming(
863         zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
864      ++zExp;
865   }
866   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
867}
868
869uint64_t
870__ffma64(uint64_t a, uint64_t b, uint64_t c)
871{
872   return __fadd64(__fmul64(a, b), c);
873}
874
875/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
876 * number of bits given in `count'.  Any bits shifted off are lost.  The value
877 * of `count' can be arbitrarily large; in particular, if `count' is greater
878 * than 64, the result will be 0.  The result is broken into two 32-bit pieces
879 * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
880 */
881void
882__shift64Right(uint a0, uint a1,
883               int count,
884               out uint z0Ptr,
885               out uint z1Ptr)
886{
887   uint z0;
888   uint z1;
889   int negCount = (-count) & 31;
890
891   z0 = 0u;
892   z0 = mix(z0, (a0 >> count), count < 32);
893   z0 = mix(z0, a0, count == 0);
894
895   z1 = mix(0u, (a0 >> (count & 31)), count < 64);
896   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);
897   z1 = mix(z1, a0, count == 0);
898
899   z1Ptr = z1;
900   z0Ptr = z0;
901}
902
903/* Returns the result of converting the double-precision floating-point value
904 * `a' to the unsigned integer format.  The conversion is performed according
905 * to the IEEE Standard for Floating-Point Arithmetic.
906 */
907uint
908__fp64_to_uint(uint64_t a)
909{
910   uint aFracLo = __extractFloat64FracLo(a);
911   uint aFracHi = __extractFloat64FracHi(a);
912   int aExp = __extractFloat64Exp(a);
913   uint aSign = __extractFloat64Sign(a);
914
915   if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u))
916      return 0xFFFFFFFFu;
917
918   aFracHi |= mix(0u, 0x00100000u, aExp != 0);
919
920   int shiftDist = 0x427 - aExp;
921   if (0 < shiftDist)
922      __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);
923
924   if ((aFracHi & 0xFFFFF000u) != 0u)
925      return mix(~0u, 0u, (aSign != 0u));
926
927   uint z = 0u;
928   uint zero = 0u;
929   __shift64Right(aFracHi, aFracLo, 12, zero, z);
930
931   uint expt = mix(~0u, 0u, (aSign != 0u));
932
933   return mix(z, expt, (aSign != 0u) && (z != 0u));
934}
935
936uint64_t
937__uint_to_fp64(uint a)
938{
939   if (a == 0u)
940      return 0ul;
941
942   int shiftDist = __countLeadingZeros32(a) + 21;
943
944   uint aHigh = 0u;
945   uint aLow = 0u;
946   int negCount = (- shiftDist) & 31;
947
948   aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64);
949   aLow = 0u;
950   aHigh = mix(aHigh, 0u, shiftDist == 0);
951   aLow = mix(aLow, a, shiftDist ==0);
952   aHigh = mix(aHigh, a >> negCount, shiftDist < 32);
953   aLow = mix(aLow, a << shiftDist, shiftDist < 32);
954
955   return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);
956}
957
958uint64_t
959__uint64_to_fp64(uint64_t a)
960{
961   if (a == 0u)
962      return 0ul;
963
964   uvec2 aFrac = unpackUint2x32(a);
965   uint aFracLo = __extractFloat64FracLo(a);
966   uint aFracHi = __extractFloat64FracHi(a);
967
968   if ((aFracHi & 0x80000000u) != 0u) {
969      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);
970      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);
971   } else {
972      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);
973   }
974}
975
976uint64_t
977__fp64_to_uint64(uint64_t a)
978{
979   uint aFracLo = __extractFloat64FracLo(a);
980   uint aFracHi = __extractFloat64FracHi(a);
981   int aExp = __extractFloat64Exp(a);
982   uint aSign = __extractFloat64Sign(a);
983   uint zFrac2 = 0u;
984   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
985
986   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
987   int shiftCount = 0x433 - aExp;
988
989   if ( shiftCount <= 0 ) {
990      if (shiftCount < -11 && aExp == 0x7FF) {
991         if ((aFracHi | aFracLo) != 0u)
992            return __propagateFloat64NaN(a, a);
993         return mix(default_nan, a, aSign == 0u);
994      }
995      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
996   } else {
997      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
998                                 aFracHi, aFracLo, zFrac2);
999   }
1000   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);
1001}
1002
1003int64_t
1004__fp64_to_int64(uint64_t a)
1005{
1006   uint zFrac2 = 0u;
1007   uint aFracLo = __extractFloat64FracLo(a);
1008   uint aFracHi = __extractFloat64FracHi(a);
1009   int aExp = __extractFloat64Exp(a);
1010   uint aSign = __extractFloat64Sign(a);
1011   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
1012   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
1013
1014   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
1015   int shiftCount = 0x433 - aExp;
1016
1017   if (shiftCount <= 0) {
1018      if (shiftCount < -11 && aExp == 0x7FF) {
1019         if ((aFracHi | aFracLo) != 0u)
1020            return default_NegNaN;
1021         return mix(default_NegNaN, default_PosNaN, aSign == 0u);
1022      }
1023      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
1024   } else {
1025      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
1026                                 aFracHi, aFracLo, zFrac2);
1027   }
1028
1029   return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2);
1030}
1031
1032uint64_t
1033__fp32_to_uint64(float f)
1034{
1035   uint a = floatBitsToUint(f);
1036   uint aFrac = a & 0x007FFFFFu;
1037   int aExp = int((a>>23) & 0xFFu);
1038   uint aSign = a>>31;
1039   uint zFrac0 = 0u;
1040   uint zFrac1 = 0u;
1041   uint zFrac2 = 0u;
1042   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
1043   int shiftCount = 0xBE - aExp;
1044
1045   if (shiftCount <0) {
1046      if (aExp == 0xFF)
1047         return default_nan;
1048   }
1049
1050   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);
1051   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);
1052
1053   if (shiftCount != 0) {
1054      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,
1055                                 zFrac0, zFrac1, zFrac2);
1056   }
1057
1058   return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2);
1059}
1060
1061int64_t
1062__fp32_to_int64(float f)
1063{
1064   uint a = floatBitsToUint(f);
1065   uint aFrac = a & 0x007FFFFFu;
1066   int aExp = int((a>>23) & 0xFFu);
1067   uint aSign = a>>31;
1068   uint zFrac0 = 0u;
1069   uint zFrac1 = 0u;
1070   uint zFrac2 = 0u;
1071   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
1072   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
1073   int shiftCount = 0xBE - aExp;
1074
1075   if (shiftCount <0) {
1076      if (aExp == 0xFF && aFrac != 0u)
1077         return default_NegNaN;
1078      return mix(default_NegNaN, default_PosNaN, aSign == 0u);
1079   }
1080
1081   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);
1082   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);
1083
1084   if (shiftCount != 0) {
1085      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,
1086                                 zFrac0, zFrac1, zFrac2);
1087   }
1088
1089   return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2);
1090}
1091
1092uint64_t
1093__int64_to_fp64(int64_t a)
1094{
1095   if (a==0)
1096      return 0ul;
1097
1098   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
1099   uint aFracHi = __extractFloat64FracHi(absA);
1100   uvec2 aFrac = unpackUint2x32(absA);
1101   uint zSign = uint(a < 0);
1102
1103   if ((aFracHi & 0x80000000u) != 0u) {
1104      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
1105   }
1106
1107   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
1108}
1109
1110/* Returns the result of converting the double-precision floating-point value
1111 * `a' to the 32-bit two's complement integer format.  The conversion is
1112 * performed according to the IEEE Standard for Floating-Point Arithmetic---
1113 * which means in particular that the conversion is rounded according to the
1114 * current rounding mode.  If `a' is a NaN, the largest positive integer is
1115 * returned.  Otherwise, if the conversion overflows, the largest integer with
1116 * the same sign as `a' is returned.
1117 */
1118int
1119__fp64_to_int(uint64_t a)
1120{
1121   uint aFracLo = __extractFloat64FracLo(a);
1122   uint aFracHi = __extractFloat64FracHi(a);
1123   int aExp = __extractFloat64Exp(a);
1124   uint aSign = __extractFloat64Sign(a);
1125
1126   uint absZ = 0u;
1127   uint aFracExtra = 0u;
1128   int shiftCount = aExp - 0x413;
1129
1130   if (0 <= shiftCount) {
1131      if (0x41E < aExp) {
1132         if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))
1133            aSign = 0u;
1134         return mix(0x7FFFFFFF, 0x80000000, bool(aSign));
1135      }
1136      __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);
1137   } else {
1138      if (aExp < 0x3FF)
1139         return 0;
1140
1141      aFracHi |= 0x00100000u;
1142      aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo;
1143      absZ = aFracHi >> (- shiftCount);
1144   }
1145
1146   int z = mix(int(absZ), -int(absZ), (aSign != 0u));
1147   int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));
1148   return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));
1149}
1150
1151/* Returns the result of converting the 32-bit two's complement integer `a'
1152 * to the double-precision floating-point format.  The conversion is performed
1153 * according to the IEEE Standard for Floating-Point Arithmetic.
1154 */
1155uint64_t
1156__int_to_fp64(int a)
1157{
1158   uint zFrac0 = 0u;
1159   uint zFrac1 = 0u;
1160   if (a==0)
1161      return __packFloat64(0u, 0, 0u, 0u);
1162   uint zSign = uint(a < 0);
1163   uint absA = mix(uint(a), uint(-a), a < 0);
1164   int shiftCount = __countLeadingZeros32(absA) - 11;
1165   if (0 <= shiftCount) {
1166      zFrac0 = absA << shiftCount;
1167      zFrac1 = 0u;
1168   } else {
1169      __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1);
1170   }
1171   return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
1172}
1173
1174bool
1175__fp64_to_bool(uint64_t a)
1176{
1177   return !__feq64_nonnan(__fabs64(a), 0ul);
1178}
1179
1180uint64_t
1181__bool_to_fp64(bool a)
1182{
1183   return __int_to_fp64(int(a));
1184}
1185
1186/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
1187 * single-precision floating-point value, returning the result.  After being
1188 * shifted into the proper positions, the three fields are simply added
1189 * together to form the result.  This means that any integer portion of `zSig'
1190 * will be added into the exponent.  Since a properly normalized significand
1191 * will have an integer portion equal to 1, the `zExp' input should be 1 less
1192 * than the desired result exponent whenever `zFrac' is a complete, normalized
1193 * significand.
1194 */
1195float
1196__packFloat32(uint zSign, int zExp, uint zFrac)
1197{
1198   return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);
1199}
1200
1201/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1202 * and significand `zFrac', and returns the proper single-precision floating-
1203 * point value corresponding to the abstract input.  Ordinarily, the abstract
1204 * value is simply rounded and packed into the single-precision format, with
1205 * the inexact exception raised if the abstract input cannot be represented
1206 * exactly.  However, if the abstract value is too large, the overflow and
1207 * inexact exceptions are raised and an infinity or maximal finite value is
1208 * returned.  If the abstract value is too small, the input value is rounded to
1209 * a subnormal number, and the underflow and inexact exceptions are raised if
1210 * the abstract input cannot be represented exactly as a subnormal single-
1211 * precision floating-point number.
1212 *     The input significand `zFrac' has its binary point between bits 30
1213 * and 29, which is 7 bits to the left of the usual location.  This shifted
1214 * significand must be normalized or smaller.  If `zFrac' is not normalized,
1215 * `zExp' must be 0; in that case, the result returned is a subnormal number,
1216 * and it must not require rounding.  In the usual case that `zFrac' is
1217 * normalized, `zExp' must be 1 less than the "true" floating-point exponent.
1218 * The handling of underflow and overflow follows the IEEE Standard for
1219 * Floating-Point Arithmetic.
1220 */
1221float
1222__roundAndPackFloat32(uint zSign, int zExp, uint zFrac)
1223{
1224   bool roundNearestEven;
1225   int roundIncrement;
1226   int roundBits;
1227
1228   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
1229   roundIncrement = 0x40;
1230   if (!roundNearestEven) {
1231      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {
1232         roundIncrement = 0;
1233      } else {
1234         roundIncrement = 0x7F;
1235         if (zSign != 0u) {
1236            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)
1237               roundIncrement = 0;
1238         } else {
1239            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN)
1240               roundIncrement = 0;
1241         }
1242      }
1243   }
1244   roundBits = int(zFrac & 0x7Fu);
1245   if (0xFDu <= uint(zExp)) {
1246      if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0))
1247         return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0);
1248      int count = -zExp;
1249      bool zexp_lt0 = zExp < 0;
1250      uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32);
1251      zFrac = mix(zFrac, zFrac_lt0, zexp_lt0);
1252      roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0);
1253      zExp = mix(zExp, 0, zexp_lt0);
1254   }
1255   zFrac = (zFrac + uint(roundIncrement))>>7;
1256   zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven);
1257
1258   return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac);
1259}
1260
1261/* Returns the result of converting the double-precision floating-point value
1262 * `a' to the single-precision floating-point format.  The conversion is
1263 * performed according to the IEEE Standard for Floating-Point Arithmetic.
1264 */
1265float
1266__fp64_to_fp32(uint64_t __a)
1267{
1268   uvec2 a = unpackUint2x32(__a);
1269   uint zFrac = 0u;
1270   uint allZero = 0u;
1271
1272   uint aFracLo = __extractFloat64FracLo(__a);
1273   uint aFracHi = __extractFloat64FracHi(__a);
1274   int aExp = __extractFloat64Exp(__a);
1275   uint aSign = __extractFloat64Sign(__a);
1276   if (aExp == 0x7FF) {
1277      __shortShift64Left(a.y, a.x, 12, a.y, a.x);
1278      float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));
1279      rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);
1280      return rval;
1281   }
1282   __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac);
1283   zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0);
1284   return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);
1285}
1286
1287float
1288__uint64_to_fp32(uint64_t __a)
1289{
1290   uint zFrac = 0u;
1291   uvec2 aFrac = unpackUint2x32(__a);
1292   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
1293   shiftCount -= mix(40, 8, aFrac.y == 0u);
1294
1295   if (0 <= shiftCount) {
1296      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
1297      bool is_zero = (aFrac.y | aFrac.x) == 0u;
1298      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);
1299   }
1300
1301   shiftCount += 7;
1302   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
1303   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
1304   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);
1305}
1306
1307float
1308__int64_to_fp32(int64_t __a)
1309{
1310   uint zFrac = 0u;
1311   uint aSign = uint(__a < 0);
1312   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
1313   uvec2 aFrac = unpackUint2x32(absA);
1314   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
1315   shiftCount -= mix(40, 8, aFrac.y == 0u);
1316
1317   if (0 <= shiftCount) {
1318      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
1319      bool is_zero = (aFrac.y | aFrac.x) == 0u;
1320      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);
1321   }
1322
1323   shiftCount += 7;
1324   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
1325   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
1326   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);
1327}
1328
1329/* Returns the result of converting the single-precision floating-point value
1330 * `a' to the double-precision floating-point format.
1331 */
1332uint64_t
1333__fp32_to_fp64(float f)
1334{
1335   uint a = floatBitsToUint(f);
1336   uint aFrac = a & 0x007FFFFFu;
1337   int aExp = int((a>>23) & 0xFFu);
1338   uint aSign = a>>31;
1339   uint zFrac0 = 0u;
1340   uint zFrac1 = 0u;
1341
1342   if (aExp == 0xFF) {
1343      if (aFrac != 0u) {
1344         uint nanLo = 0u;
1345         uint nanHi = a<<9;
1346         __shift64Right(nanHi, nanLo, 12, nanHi, nanLo);
1347         nanHi |= ((aSign<<31) | 0x7FF80000u);
1348         return packUint2x32(uvec2(nanLo, nanHi));
1349      }
1350      return __packFloat64(aSign, 0x7FF, 0u, 0u);
1351    }
1352
1353   if (aExp == 0) {
1354      if (aFrac == 0u)
1355         return __packFloat64(aSign, 0, 0u, 0u);
1356      /* Normalize subnormal */
1357      int shiftCount = __countLeadingZeros32(aFrac) - 8;
1358      aFrac <<= shiftCount;
1359      aExp = 1 - shiftCount;
1360      --aExp;
1361   }
1362
1363   __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);
1364   return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);
1365}
1366
1367/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
1368 * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
1369 * modulo 2^96, so any carry out is lost.  The result is broken into three
1370 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
1371 * `z1Ptr', and `z2Ptr'.
1372 */
1373void
1374__add96(uint a0, uint a1, uint a2,
1375        uint b0, uint b1, uint b2,
1376        out uint z0Ptr,
1377        out uint z1Ptr,
1378        out uint z2Ptr)
1379{
1380   uint z2 = a2 + b2;
1381   uint carry1 = uint(z2 < a2);
1382   uint z1 = a1 + b1;
1383   uint carry0 = uint(z1 < a1);
1384   uint z0 = a0 + b0;
1385   z1 += carry1;
1386   z0 += uint(z1 < carry1);
1387   z0 += carry0;
1388   z2Ptr = z2;
1389   z1Ptr = z1;
1390   z0Ptr = z0;
1391}
1392
1393/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
1394 * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
1395 * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
1396 * into three 32-bit pieces which are stored at the locations pointed to by
1397 * `z0Ptr', `z1Ptr', and `z2Ptr'.
1398 */
1399void
1400__sub96(uint a0, uint a1, uint a2,
1401        uint b0, uint b1, uint b2,
1402        out uint z0Ptr,
1403        out uint z1Ptr,
1404        out uint z2Ptr)
1405{
1406   uint z2 = a2 - b2;
1407   uint borrow1 = uint(a2 < b2);
1408   uint z1 = a1 - b1;
1409   uint borrow0 = uint(a1 < b1);
1410   uint z0 = a0 - b0;
1411   z0 -= uint(z1 < borrow1);
1412   z1 -= borrow1;
1413   z0 -= borrow0;
1414   z2Ptr = z2;
1415   z1Ptr = z1;
1416   z0Ptr = z0;
1417}
1418
1419/* Returns an approximation to the 32-bit integer quotient obtained by dividing
1420 * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
1421 * divisor `b' must be at least 2^31.  If q is the exact quotient truncated
1422 * toward zero, the approximation returned lies between q and q + 2 inclusive.
1423 * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
1424 * unsigned integer is returned.
1425 */
1426uint
1427__estimateDiv64To32(uint a0, uint a1, uint b)
1428{
1429   uint b0;
1430   uint b1;
1431   uint rem0 = 0u;
1432   uint rem1 = 0u;
1433   uint term0 = 0u;
1434   uint term1 = 0u;
1435   uint z;
1436
1437   if (b <= a0)
1438      return 0xFFFFFFFFu;
1439   b0 = b>>16;
1440   z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;
1441   __mul32To64(b, z, term0, term1);
1442   __sub64(a0, a1, term0, term1, rem0, rem1);
1443   while (int(rem0) < 0) {
1444      z -= 0x10000u;
1445      b1 = b<<16;
1446      __add64(rem0, rem1, b0, b1, rem0, rem1);
1447   }
1448   rem0 = (rem0<<16) | (rem1>>16);
1449   z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;
1450   return z;
1451}
1452
1453uint
1454__sqrtOddAdjustments(int index)
1455{
1456   uint res = 0u;
1457   if (index == 0)
1458      res = 0x0004u;
1459   if (index == 1)
1460      res = 0x0022u;
1461   if (index == 2)
1462      res = 0x005Du;
1463   if (index == 3)
1464      res = 0x00B1u;
1465   if (index == 4)
1466      res = 0x011Du;
1467   if (index == 5)
1468      res = 0x019Fu;
1469   if (index == 6)
1470      res = 0x0236u;
1471   if (index == 7)
1472      res = 0x02E0u;
1473   if (index == 8)
1474      res = 0x039Cu;
1475   if (index == 9)
1476      res = 0x0468u;
1477   if (index == 10)
1478      res = 0x0545u;
1479   if (index == 11)
1480      res = 0x631u;
1481   if (index == 12)
1482      res = 0x072Bu;
1483   if (index == 13)
1484      res = 0x0832u;
1485   if (index == 14)
1486      res = 0x0946u;
1487   if (index == 15)
1488      res = 0x0A67u;
1489
1490   return res;
1491}
1492
1493uint
1494__sqrtEvenAdjustments(int index)
1495{
1496   uint res = 0u;
1497   if (index == 0)
1498      res = 0x0A2Du;
1499   if (index == 1)
1500      res = 0x08AFu;
1501   if (index == 2)
1502      res = 0x075Au;
1503   if (index == 3)
1504      res = 0x0629u;
1505   if (index == 4)
1506      res = 0x051Au;
1507   if (index == 5)
1508      res = 0x0429u;
1509   if (index == 6)
1510      res = 0x0356u;
1511   if (index == 7)
1512      res = 0x029Eu;
1513   if (index == 8)
1514      res = 0x0200u;
1515   if (index == 9)
1516      res = 0x0179u;
1517   if (index == 10)
1518      res = 0x0109u;
1519   if (index == 11)
1520      res = 0x00AFu;
1521   if (index == 12)
1522      res = 0x0068u;
1523   if (index == 13)
1524      res = 0x0034u;
1525   if (index == 14)
1526      res = 0x0012u;
1527   if (index == 15)
1528      res = 0x0002u;
1529
1530   return res;
1531}
1532
1533/* Returns an approximation to the square root of the 32-bit significand given
1534 * by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
1535 * `aExp' (the least significant bit) is 1, the integer returned approximates
1536 * 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
1537 * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
1538 * case, the approximation returned lies strictly within +/-2 of the exact
1539 * value.
1540 */
1541uint
1542__estimateSqrt32(int aExp, uint a)
1543{
1544   uint z;
1545
1546   int index = int(a>>27 & 15u);
1547   if ((aExp & 1) != 0) {
1548      z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index);
1549      z = ((a / z)<<14) + (z<<15);
1550      a >>= 1;
1551   } else {
1552      z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index);
1553      z = a / z + z;
1554      z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);
1555      if (z <= a)
1556         return uint(int(a)>>1);
1557   }
1558   return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1);
1559}
1560
1561/* Returns the square root of the double-precision floating-point value `a'.
1562 * The operation is performed according to the IEEE Standard for Floating-Point
1563 * Arithmetic.
1564 */
1565uint64_t
1566__fsqrt64(uint64_t a)
1567{
1568   uint zFrac0 = 0u;
1569   uint zFrac1 = 0u;
1570   uint zFrac2 = 0u;
1571   uint doubleZFrac0 = 0u;
1572   uint rem0 = 0u;
1573   uint rem1 = 0u;
1574   uint rem2 = 0u;
1575   uint rem3 = 0u;
1576   uint term0 = 0u;
1577   uint term1 = 0u;
1578   uint term2 = 0u;
1579   uint term3 = 0u;
1580   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
1581
1582   uint aFracLo = __extractFloat64FracLo(a);
1583   uint aFracHi = __extractFloat64FracHi(a);
1584   int aExp = __extractFloat64Exp(a);
1585   uint aSign = __extractFloat64Sign(a);
1586   if (aExp == 0x7FF) {
1587      if ((aFracHi | aFracLo) != 0u)
1588         return __propagateFloat64NaN(a, a);
1589      if (aSign == 0u)
1590         return a;
1591      return default_nan;
1592   }
1593   if (aSign != 0u) {
1594      if ((uint(aExp) | aFracHi | aFracLo) == 0u)
1595         return a;
1596      return default_nan;
1597   }
1598   if (aExp == 0) {
1599      if ((aFracHi | aFracLo) == 0u)
1600         return __packFloat64(0u, 0, 0u, 0u);
1601      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);
1602   }
1603   int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;
1604   aFracHi |= 0x00100000u;
1605   __shortShift64Left(aFracHi, aFracLo, 11, term0, term1);
1606   zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u;
1607   if (zFrac0 == 0u)
1608      zFrac0 = 0x7FFFFFFFu;
1609   doubleZFrac0 = zFrac0 + zFrac0;
1610   __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);
1611   __mul32To64(zFrac0, zFrac0, term0, term1);
1612   __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);
1613   while (int(rem0) < 0) {
1614      --zFrac0;
1615      doubleZFrac0 -= 2u;
1616      __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);
1617   }
1618   zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0);
1619   if ((zFrac1 & 0x1FFu) <= 5u) {
1620      if (zFrac1 == 0u)
1621         zFrac1 = 1u;
1622      __mul32To64(doubleZFrac0, zFrac1, term1, term2);
1623      __sub64(rem1, 0u, term1, term2, rem1, rem2);
1624      __mul32To64(zFrac1, zFrac1, term2, term3);
1625      __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);
1626      while (int(rem1) < 0) {
1627         --zFrac1;
1628         __shortShift64Left(0u, zFrac1, 1, term2, term3);
1629         term3 |= 1u;
1630         term2 |= doubleZFrac0;
1631         __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);
1632      }
1633      zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);
1634   }
1635   __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);
1636   return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);
1637}
1638
1639uint64_t
1640__ftrunc64(uint64_t __a)
1641{
1642   uvec2 a = unpackUint2x32(__a);
1643   int aExp = __extractFloat64Exp(__a);
1644   uint zLo;
1645   uint zHi;
1646
1647   int unbiasedExp = aExp - 1023;
1648   int fracBits = 52 - unbiasedExp;
1649   uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32);
1650   uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33);
1651   zLo = maskLo & a.x;
1652   zHi = maskHi & a.y;
1653
1654   zLo = mix(zLo, 0u, unbiasedExp < 0);
1655   zHi = mix(zHi, 0u, unbiasedExp < 0);
1656   zLo = mix(zLo, a.x, unbiasedExp > 52);
1657   zHi = mix(zHi, a.y, unbiasedExp > 52);
1658   return packUint2x32(uvec2(zLo, zHi));
1659}
1660
1661uint64_t
1662__ffloor64(uint64_t a)
1663{
1664   bool is_positive = __fge64(a, 0ul);
1665   uint64_t tr = __ftrunc64(a);
1666
1667   if (is_positive || __feq64(tr, a)) {
1668      return tr;
1669   } else {
1670      return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);
1671   }
1672}
1673
1674uint64_t
1675__fround64(uint64_t __a)
1676{
1677   uvec2 a = unpackUint2x32(__a);
1678   int unbiasedExp = __extractFloat64Exp(__a) - 1023;
1679   uint aHi = a.y;
1680   uint aLo = a.x;
1681
1682   if (unbiasedExp < 20) {
1683      if (unbiasedExp < 0) {
1684         if ((aHi & 0x80000000u) != 0u && aLo == 0u) {
1685            return 0;
1686         }
1687         aHi &= 0x80000000u;
1688         if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) {
1689            aLo = 0u;
1690            return packUint2x32(uvec2(aLo, aHi));
1691         }
1692         aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1);
1693         aLo = 0u;
1694      } else {
1695         uint maskExp = 0x000FFFFFu >> unbiasedExp;
1696         uint lastBit = maskExp + 1;
1697         aHi += 0x00080000u >> unbiasedExp;
1698         if ((aHi & maskExp) == 0u)
1699            aHi &= ~lastBit;
1700         aHi &= ~maskExp;
1701         aLo = 0u;
1702      }
1703   } else if (unbiasedExp > 51 || unbiasedExp == 1024) {
1704      return __a;
1705   } else {
1706      uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);
1707      if ((aLo & maskExp) == 0u)
1708         return __a;
1709      uint tmp = aLo + (1u << (51 - unbiasedExp));
1710      if(tmp < aLo)
1711         aHi += 1u;
1712      aLo = tmp;
1713      aLo &= ~maskExp;
1714   }
1715
1716   return packUint2x32(uvec2(aLo, aHi));
1717}
1718
1719uint64_t
1720__fmin64(uint64_t a, uint64_t b)
1721{
1722   if (__is_nan(a)) return b;
1723   if (__is_nan(b)) return a;
1724
1725   if (__flt64_nonnan(a, b)) return a;
1726   return b;
1727}
1728
1729uint64_t
1730__fmax64(uint64_t a, uint64_t b)
1731{
1732   if (__is_nan(a)) return b;
1733   if (__is_nan(b)) return a;
1734
1735   if (__flt64_nonnan(a, b)) return b;
1736   return a;
1737}
1738
1739uint64_t
1740__ffract64(uint64_t a)
1741{
1742   return __fadd64(a, __fneg64(__ffloor64(a)));
1743}
1744