Home | History | Annotate | Line # | Download | only in src
      1 /* mpfr_set_d -- convert a machine double precision float to
      2                  a multiple precision floating-point number
      3 
      4 Copyright 1999-2004, 2006-2023 Free Software Foundation, Inc.
      5 Contributed by the AriC and Caramba projects, INRIA.
      6 
      7 This file is part of the GNU MPFR Library.
      8 
      9 The GNU MPFR Library is free software; you can redistribute it and/or modify
     10 it under the terms of the GNU Lesser General Public License as published by
     11 the Free Software Foundation; either version 3 of the License, or (at your
     12 option) any later version.
     13 
     14 The GNU MPFR Library is distributed in the hope that it will be useful, but
     15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     17 License for more details.
     18 
     19 You should have received a copy of the GNU Lesser General Public License
     20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     23 
     24 #include <float.h>  /* For DOUBLE_ISINF and DOUBLE_ISNAN */
     25 
     26 #define MPFR_NEED_LONGLONG_H
     27 #include "mpfr-impl.h"
     28 
     29 /* Extracts the bits of |d| in rp[0..n-1] where n=ceil(53/GMP_NUMB_BITS).
     30    Assumes d finite and <> 0.
     31    Returns the corresponding exponent such that |d| = {rp, n} * 2^exp,
     32    with the value of {rp, n} in [1/2, 1).
     33    The int type should be sufficient for exp.
     34 */
     35 static int
     36 extract_double (mpfr_limb_ptr rp, double d)
     37 {
     38   int exp;
     39   mp_limb_t man[MPFR_LIMBS_PER_DOUBLE];
     40 
     41   /* FIXME: Generalize to handle GMP_NUMB_BITS < 16. */
     42 
     43   MPFR_ASSERTD(!DOUBLE_ISNAN(d));
     44   MPFR_ASSERTD(!DOUBLE_ISINF(d));
     45   MPFR_ASSERTD(d != 0.0);
     46 
     47 #if _MPFR_IEEE_FLOATS
     48 
     49   {
     50     union mpfr_ieee_double_extract x;
     51     x.d = d;
     52 
     53     exp = x.s.exp;
     54     if (exp)
     55       {
     56         /* x.s.manh has 20 bits (in its low bits), x.s.manl has 32 bits */
     57 #if GMP_NUMB_BITS >= 64
     58         man[0] = ((MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)) |
     59                   ((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
     60                   ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
     61 #elif GMP_NUMB_BITS == 32
     62         man[1] = (MPFR_LIMB_ONE << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
     63         man[0] = x.s.manl << 11;
     64 #elif GMP_NUMB_BITS == 16
     65         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
     66         man[3] = (MPFR_LIMB_ONE << 15) | (x.s.manh >> 5);
     67         man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
     68         man[1] = x.s.manl >> 5;
     69         man[0] = MPFR_LIMB_LSHIFT(x.s.manl,11);
     70 #else
     71         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
     72         man[6] = (MPFR_LIMB_ONE << 7) | (x.s.manh >> 13);
     73         man[5] = (mp_limb_t) (x.s.manh >> 5);
     74         man[4] = MPFR_LIMB_LSHIFT(x.s.manh, 3) | (mp_limb_t) (x.s.manl >> 29);
     75         man[3] = (mp_limb_t) (x.s.manl >> 21);
     76         man[2] = (mp_limb_t) (x.s.manl >> 13);
     77         man[1] = (mp_limb_t) (x.s.manl >> 5);
     78         man[0] = MPFR_LIMB_LSHIFT(x.s.manl,3);
     79 #endif
     80         exp -= 1022;
     81       }
     82     else /* subnormal number */
     83       {
     84         int cnt;
     85         exp = -1021;
     86 #if GMP_NUMB_BITS >= 64
     87         man[0] = (((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
     88                   ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
     89         count_leading_zeros (cnt, man[0]);
     90 #elif GMP_NUMB_BITS == 32
     91         man[1] = (x.s.manh << 11) /* high 21 bits */
     92           | (x.s.manl >> 21); /* middle 11 bits */
     93         man[0] = x.s.manl << 11; /* low 21 bits */
     94         if (man[1] == 0)
     95           {
     96             man[1] = man[0];
     97             man[0] = 0;
     98             exp -= GMP_NUMB_BITS;
     99           }
    100         count_leading_zeros (cnt, man[1]);
    101         man[1] = (man[1] << cnt) |
    102           (cnt != 0 ? man[0] >> (GMP_NUMB_BITS - cnt) : 0);
    103 #elif GMP_NUMB_BITS == 16
    104         man[3] = x.s.manh >> 5;
    105         man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
    106         man[1] = x.s.manl >> 5;
    107         man[0] = x.s.manl << 11;
    108         while (man[3] == 0) /* d is assumed <> 0 */
    109           {
    110             man[3] = man[2];
    111             man[2] = man[1];
    112             man[1] = man[0];
    113             man[0] = 0;
    114             exp -= GMP_NUMB_BITS;
    115           }
    116         count_leading_zeros (cnt, man[3]);
    117         if (cnt)
    118           {
    119             man[3] = (man[3] << cnt) | (man[2] >> (GMP_NUMB_BITS - cnt));
    120             man[2] = (man[2] << cnt) | (man[1] >> (GMP_NUMB_BITS - cnt));
    121             man[1] = (man[1] << cnt) | (man[0] >> (GMP_NUMB_BITS - cnt));
    122           }
    123 #else
    124         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
    125         man[6] = x.s.manh >> 13;
    126         man[5] = x.s.manh >> 5;
    127         man[4] = (x.s.manh << 3) | (x.s.manl >> 29);
    128         man[3] = x.s.manl >> 21;
    129         man[2] = x.s.manl >> 13;
    130         man[1] = x.s.manl >> 5;
    131         man[0] = x.s.manl << 3;
    132         while (man[6] == 0) /* d is assumed <> 0 */
    133           {
    134             man[6] = man[5];
    135             man[5] = man[4];
    136             man[4] = man[3];
    137             man[3] = man[2];
    138             man[2] = man[1];
    139             man[1] = man[0];
    140             man[0] = 0;
    141             exp -= GMP_NUMB_BITS;
    142           }
    143         count_leading_zeros (cnt, man[6]);
    144         if (cnt)
    145           {
    146             int i;
    147             for (i = 6; i > 0; i--)
    148               man[i] = (man[i] << cnt) | (man[i-1] >> (GMP_NUMB_BITS - cnt));
    149           }
    150 #endif
    151         man[0] <<= cnt;
    152         exp -= cnt;
    153       }
    154   }
    155 
    156 #else /* _MPFR_IEEE_FLOATS */
    157 
    158   {
    159     /* Unknown (or known to be non-IEEE) double format.  */
    160     exp = 0;
    161     d = ABS (d);
    162     if (d >= 1.0)
    163       {
    164         while (d >= 32768.0)
    165           {
    166             d *= (1.0 / 65536.0);
    167             exp += 16;
    168           }
    169         while (d >= 1.0)
    170           {
    171             d *= 0.5;
    172             exp += 1;
    173           }
    174       }
    175     else if (d < 0.5)
    176       {
    177         while (d < (1.0 / 65536.0))
    178           {
    179             d *=  65536.0;
    180             exp -= 16;
    181           }
    182         while (d < 0.5)
    183           {
    184             d *= 2.0;
    185             exp -= 1;
    186           }
    187       }
    188 
    189     d *= MP_BASE_AS_DOUBLE;
    190 #if GMP_NUMB_BITS >= 64
    191 #ifndef __clang__
    192     man[0] = d;
    193 #else
    194     /* clang up to version 11 produces an invalid exception when d >= 2^63,
    195        see <https://github.com/llvm/llvm-project/issues/18060>
    196        (old URL: <https://bugs.llvm.org/show_bug.cgi?id=17686>).
    197        Since this is always the case, here, we use the following patch. */
    198     MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
    199     man[0] = 0x8000000000000000 + (mp_limb_t) (d - 0x8000000000000000);
    200 #endif /* __clang__ */
    201 #elif GMP_NUMB_BITS == 32
    202     man[1] = (mp_limb_t) d;
    203     man[0] = (mp_limb_t) ((d - man[1]) * MP_BASE_AS_DOUBLE);
    204 #else
    205     MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
    206     {
    207       double r = d;
    208       man[3] = (mp_limb_t) r;
    209       r = (r - man[3]) * MP_BASE_AS_DOUBLE;
    210       man[2] = (mp_limb_t) r;
    211       r = (r - man[2]) * MP_BASE_AS_DOUBLE;
    212       man[1] = (mp_limb_t) r;
    213       r = (r - man[1]) * MP_BASE_AS_DOUBLE;
    214       man[0] = (mp_limb_t) r;
    215     }
    216 #endif
    217   }
    218 
    219 #endif /* _MPFR_IEEE_FLOATS */
    220 
    221   rp[0] = man[0];
    222 #if GMP_NUMB_BITS <= 32
    223   rp[1] = man[1];
    224 #endif
    225 #if GMP_NUMB_BITS <= 16
    226   rp[2] = man[2];
    227   rp[3] = man[3];
    228 #endif
    229 #if GMP_NUMB_BITS <= 8
    230   rp[4] = man[4];
    231   rp[5] = man[5];
    232   rp[6] = man[6];
    233 #endif
    234 
    235   MPFR_ASSERTD((rp[MPFR_LIMBS_PER_DOUBLE - 1] & MPFR_LIMB_HIGHBIT) != 0);
    236 
    237   return exp;
    238 }
    239 
    240 /* End of part included from gmp-2.0.2 */
    241 
    242 int
    243 mpfr_set_d (mpfr_ptr r, double d, mpfr_rnd_t rnd_mode)
    244 {
    245   int inexact;
    246   mpfr_t tmp;
    247   mp_limb_t tmpmant[MPFR_LIMBS_PER_DOUBLE];
    248   MPFR_SAVE_EXPO_DECL (expo);
    249 
    250   if (MPFR_UNLIKELY(DOUBLE_ISNAN(d)))
    251     {
    252       /* we don't propagate the sign bit */
    253       MPFR_SET_NAN(r);
    254       MPFR_RET_NAN;
    255     }
    256   else if (MPFR_UNLIKELY(d == 0))
    257     {
    258 #if _MPFR_IEEE_FLOATS
    259       union mpfr_ieee_double_extract x;
    260 
    261       MPFR_SET_ZERO(r);
    262       /* set correct sign */
    263       x.d = d;
    264       if (x.s.sig == 1)
    265         MPFR_SET_NEG(r);
    266       else
    267         MPFR_SET_POS(r);
    268 #else /* _MPFR_IEEE_FLOATS */
    269       MPFR_SET_ZERO(r);
    270       {
    271         /* This is to get the sign of zero on non-IEEE hardware
    272            Some systems support +0.0, -0.0, and unsigned zero.
    273            Some other systems may just have an unsigned zero.
    274            We can't use d == +0.0 since it should be always true,
    275            so we check that the memory representation of d is the
    276            same as +0.0, etc.
    277            Note: r is set to -0 only if d is detected as a negative zero
    278            *and*, for the double type, -0 has a different representation
    279            from +0. If -0.0 has several representations, the code below
    280            may not work as expected, but this is hardly fixable in a
    281            portable way (without depending on a math library) and only
    282            the sign could be incorrect. Such systems should be taken
    283            into account on a case-by-case basis. If the code is changed
    284            here, set_d64.c code should be updated too. */
    285         double poszero = +0.0, negzero = DBL_NEG_ZERO;
    286         if (memcmp(&d, &poszero, sizeof(double)) == 0)
    287           MPFR_SET_POS(r);
    288         else if (memcmp(&d, &negzero, sizeof(double)) == 0)
    289           MPFR_SET_NEG(r);
    290         else
    291           MPFR_SET_POS(r);
    292       }
    293 #endif /* _MPFR_IEEE_FLOATS */
    294       return 0; /* 0 is exact */
    295     }
    296   else if (MPFR_UNLIKELY(DOUBLE_ISINF(d)))
    297     {
    298       MPFR_SET_INF(r);
    299       if (d > 0)
    300         MPFR_SET_POS(r);
    301       else
    302         MPFR_SET_NEG(r);
    303       return 0; /* infinity is exact */
    304     }
    305 
    306   /* now d is neither 0, nor NaN nor Inf */
    307 
    308   MPFR_SAVE_EXPO_MARK (expo);
    309 
    310   /* warning: don't use tmp=r here, even if SIZE(r) >= MPFR_LIMBS_PER_DOUBLE,
    311      since PREC(r) may be different from PREC(tmp), and then both variables
    312      would have same precision in the mpfr_set4 call below. */
    313   MPFR_MANT(tmp) = tmpmant;
    314   MPFR_PREC(tmp) = IEEE_DBL_MANT_DIG;
    315 
    316   /* don't use MPFR_SET_EXP here since the exponent may be out of range */
    317   MPFR_EXP(tmp) = extract_double (tmpmant, d);
    318 
    319   /* tmp is exact since PREC(tmp)=53 */
    320   inexact = mpfr_set4 (r, tmp, rnd_mode,
    321                        (d < 0) ? MPFR_SIGN_NEG : MPFR_SIGN_POS);
    322 
    323   MPFR_SAVE_EXPO_FREE (expo);
    324   return mpfr_check_range (r, inexact, rnd_mode);
    325 }
    326 
    327 
    328 
    329