1 /* mpfr_set_ld -- convert a machine long double to 2 a multiple precision floating-point number 3 4 Copyright 2002-2023 Free Software Foundation, Inc. 5 Contributed by the AriC and Caramba projects, INRIA. 6 7 This file is part of the GNU MPFR Library. 8 9 The GNU MPFR Library is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 The GNU MPFR Library is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24 #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */ 25 26 #define MPFR_NEED_LONGLONG_H 27 #include "mpfr-impl.h" 28 29 /* To check for +inf, one can use the test x > LDBL_MAX, as LDBL_MAX is 30 the maximum finite number representable in a long double, according 31 to DR 467; see 32 https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2092.htm 33 If this fails on some platform, a test x - x != 0 might be used. */ 34 35 #if defined(HAVE_LDOUBLE_IS_DOUBLE) 36 37 /* the "long double" format is the same as "double" */ 38 int 39 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode) 40 { 41 return mpfr_set_d (r, (double) d, rnd_mode); 42 } 43 44 #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE) 45 46 #if GMP_NUMB_BITS >= 64 47 # define MPFR_LIMBS_PER_LONG_DOUBLE 1 48 #elif GMP_NUMB_BITS == 32 49 # define MPFR_LIMBS_PER_LONG_DOUBLE 2 50 #elif GMP_NUMB_BITS == 16 51 # define MPFR_LIMBS_PER_LONG_DOUBLE 4 52 #elif GMP_NUMB_BITS == 8 53 # define MPFR_LIMBS_PER_LONG_DOUBLE 8 54 #else 55 #error "GMP_NUMB_BITS is assumed to be 8, 16, 32 or >= 64" 56 #endif 57 /* The hypothetical GMP_NUMB_BITS == 16 is not supported. It will trigger 58 an error below. */ 59 60 /* IEEE Extended Little Endian Code */ 61 int 62 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode) 63 { 64 int inexact, k, cnt; 65 mpfr_t tmp; 66 mp_limb_t tmpmant[MPFR_LIMBS_PER_LONG_DOUBLE]; 67 mpfr_long_double_t x; 68 mpfr_exp_t exp; 69 int signd; 70 MPFR_SAVE_EXPO_DECL (expo); 71 72 /* Check for NAN */ 73 if (MPFR_UNLIKELY (DOUBLE_ISNAN (d))) 74 { 75 /* we don't propagate the sign bit */ 76 MPFR_SET_NAN (r); 77 MPFR_RET_NAN; 78 } 79 /* Check for INF */ 80 else if (MPFR_UNLIKELY (d > LDBL_MAX)) 81 { 82 MPFR_SET_INF (r); 83 MPFR_SET_POS (r); 84 return 0; 85 } 86 else if (MPFR_UNLIKELY (d < -LDBL_MAX)) 87 { 88 MPFR_SET_INF (r); 89 MPFR_SET_NEG (r); 90 return 0; 91 } 92 /* Check for ZERO */ 93 else if (MPFR_UNLIKELY (d == 0.0)) 94 { 95 x.ld = d; 96 MPFR_SET_ZERO (r); 97 if (x.s.sign == 1) 98 MPFR_SET_NEG(r); 99 else 100 MPFR_SET_POS(r); 101 return 0; 102 } 103 104 /* now d is neither 0, nor NaN nor Inf */ 105 MPFR_SAVE_EXPO_MARK (expo); 106 107 MPFR_MANT (tmp) = tmpmant; 108 MPFR_PREC (tmp) = 64; 109 110 /* Extract sign */ 111 x.ld = d; 112 signd = MPFR_SIGN_POS; 113 if (x.ld < 0.0) 114 { 115 signd = MPFR_SIGN_NEG; 116 x.ld = -x.ld; 117 } 118 119 /* Extract and normalize the significand */ 120 #if MPFR_LIMBS_PER_LONG_DOUBLE == 1 121 tmpmant[0] = ((mp_limb_t) x.s.manh << 32) | ((mp_limb_t) x.s.manl); 122 count_leading_zeros (cnt, tmpmant[0]); 123 tmpmant[0] <<= cnt; 124 k = 0; /* number of limbs shifted */ 125 #else /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */ 126 #if MPFR_LIMBS_PER_LONG_DOUBLE == 2 127 tmpmant[0] = (mp_limb_t) x.s.manl; 128 tmpmant[1] = (mp_limb_t) x.s.manh; 129 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 4 130 tmpmant[0] = (mp_limb_t) x.s.manl; 131 tmpmant[1] = (mp_limb_t) (x.s.manl >> 16); 132 tmpmant[2] = (mp_limb_t) x.s.manh; 133 tmpmant[3] = (mp_limb_t) (x.s.manh >> 16); 134 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 8 135 tmpmant[0] = (mp_limb_t) x.s.manl; 136 tmpmant[1] = (mp_limb_t) (x.s.manl >> 8); 137 tmpmant[2] = (mp_limb_t) (x.s.manl >> 16); 138 tmpmant[3] = (mp_limb_t) (x.s.manl >> 24); 139 tmpmant[4] = (mp_limb_t) x.s.manh; 140 tmpmant[5] = (mp_limb_t) (x.s.manh >> 8); 141 tmpmant[6] = (mp_limb_t) (x.s.manh >> 16); 142 tmpmant[7] = (mp_limb_t) (x.s.manh >> 24); 143 #else 144 #error "MPFR_LIMBS_PER_LONG_DOUBLE should be 1, 2, 4 or 8" 145 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */ 146 { 147 int i = MPFR_LIMBS_PER_LONG_DOUBLE; 148 MPN_NORMALIZE_NOT_ZERO (tmpmant, i); 149 k = MPFR_LIMBS_PER_LONG_DOUBLE - i; 150 count_leading_zeros (cnt, tmpmant[i - 1]); 151 if (cnt != 0) 152 mpn_lshift (tmpmant + k, tmpmant, i, cnt); 153 else if (k != 0) 154 /* since we copy {tmpmant, i} into {tmpmant + k, i}, we should work 155 decreasingly, thus call mpn_copyd */ 156 mpn_copyd (tmpmant + k, tmpmant, i); 157 if (k != 0) 158 MPN_ZERO (tmpmant, k); 159 } 160 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE == 1 */ 161 162 /* Set exponent */ 163 exp = (mpfr_exp_t) ((x.s.exph << 8) + x.s.expl); /* 15-bit unsigned int */ 164 if (MPFR_UNLIKELY (exp == 0)) 165 exp -= 0x3FFD; 166 else 167 exp -= 0x3FFE; 168 169 MPFR_SET_EXP (tmp, exp - cnt - k * GMP_NUMB_BITS); 170 171 /* tmp is exact */ 172 inexact = mpfr_set4 (r, tmp, rnd_mode, signd); 173 174 MPFR_SAVE_EXPO_FREE (expo); 175 return mpfr_check_range (r, inexact, rnd_mode); 176 } 177 178 #elif defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE) 179 180 /* double-double code, see 181 https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/config/rs6000/ibm-ldouble-format;h=e8ada17f7696cd942e710d5b67d4149f5fcccf45;hb=HEAD */ 182 int 183 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode) 184 { 185 mpfr_t t, u; 186 int inexact; 187 double h, l; 188 MPFR_SAVE_EXPO_DECL (expo); 189 190 /* Check for NAN. Since we can't use isnan(), we rely on the 191 LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */ 192 LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; }); 193 194 /* Check for INF */ 195 if (d > LDBL_MAX) 196 { 197 mpfr_set_inf (r, 1); 198 return 0; 199 } 200 else if (d < -LDBL_MAX) 201 { 202 mpfr_set_inf (r, -1); 203 return 0; 204 } 205 /* Check for ZERO */ 206 else if (d == 0.0) 207 return mpfr_set_d (r, (double) d, rnd_mode); 208 209 if (d >= LDBL_MAX || d <= -LDBL_MAX) 210 h = (d >= LDBL_MAX) ? LDBL_MAX : -LDBL_MAX; 211 else 212 h = (double) d; /* should not overflow */ 213 l = (double) (d - (long double) h); 214 215 MPFR_SAVE_EXPO_MARK (expo); 216 217 mpfr_init2 (t, IEEE_DBL_MANT_DIG); 218 mpfr_init2 (u, IEEE_DBL_MANT_DIG); 219 220 inexact = mpfr_set_d (t, h, MPFR_RNDN); 221 MPFR_ASSERTN(inexact == 0); 222 inexact = mpfr_set_d (u, l, MPFR_RNDN); 223 MPFR_ASSERTN(inexact == 0); 224 inexact = mpfr_add (r, t, u, rnd_mode); 225 226 mpfr_clear (t); 227 mpfr_clear (u); 228 229 MPFR_SAVE_EXPO_FREE (expo); 230 inexact = mpfr_check_range (r, inexact, rnd_mode); 231 return inexact; 232 } 233 234 #else 235 236 /* Generic code */ 237 int 238 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode) 239 { 240 mpfr_t t, u; 241 int inexact, shift_exp; 242 long double x; 243 MPFR_SAVE_EXPO_DECL (expo); 244 245 /* Check for NAN. Since we can't use isnan(), we rely on the 246 LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */ 247 LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; }); 248 249 /* Check for INF */ 250 if (d > LDBL_MAX) 251 { 252 mpfr_set_inf (r, 1); 253 return 0; 254 } 255 else if (d < -LDBL_MAX) 256 { 257 mpfr_set_inf (r, -1); 258 return 0; 259 } 260 /* Check for ZERO */ 261 else if (d == 0.0) 262 return mpfr_set_d (r, (double) d, rnd_mode); 263 264 mpfr_init2 (t, MPFR_LDBL_MANT_DIG); 265 mpfr_init2 (u, IEEE_DBL_MANT_DIG); 266 267 MPFR_SAVE_EXPO_MARK (expo); 268 269 convert: 270 x = d; 271 MPFR_SET_ZERO (t); /* The sign doesn't matter. */ 272 shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */ 273 while (x != (long double) 0.0) 274 { 275 /* Check overflow of double */ 276 if (x > (long double) DBL_MAX || (-x) > (long double) DBL_MAX) 277 { 278 long double div9, div10, div11, div12, div13; 279 280 #define TWO_64 18446744073709551616.0 /* 2^64 */ 281 #define TWO_128 (TWO_64 * TWO_64) 282 #define TWO_256 (TWO_128 * TWO_128) 283 div9 = (long double) (double) (TWO_256 * TWO_256); /* 2^(2^9) */ 284 div10 = div9 * div9; 285 div11 = div10 * div10; /* 2^(2^11) */ 286 div12 = div11 * div11; /* 2^(2^12) */ 287 div13 = div12 * div12; /* 2^(2^13) */ 288 if (ABS (x) >= div13) 289 { 290 x /= div13; /* exact */ 291 shift_exp += 8192; 292 mpfr_div_2si (t, t, 8192, MPFR_RNDZ); 293 } 294 if (ABS (x) >= div12) 295 { 296 x /= div12; /* exact */ 297 shift_exp += 4096; 298 mpfr_div_2si (t, t, 4096, MPFR_RNDZ); 299 } 300 if (ABS (x) >= div11) 301 { 302 x /= div11; /* exact */ 303 shift_exp += 2048; 304 mpfr_div_2si (t, t, 2048, MPFR_RNDZ); 305 } 306 if (ABS (x) >= div10) 307 { 308 x /= div10; /* exact */ 309 shift_exp += 1024; 310 mpfr_div_2si (t, t, 1024, MPFR_RNDZ); 311 } 312 /* warning: we may have DBL_MAX=2^1024*(1-2^(-53)) < x < 2^1024, 313 therefore we have one extra exponent reduction step */ 314 if (ABS (x) >= div9) 315 { 316 x /= div9; /* exact */ 317 shift_exp += 512; 318 mpfr_div_2si (t, t, 512, MPFR_RNDZ); 319 } 320 } /* Check overflow of double */ 321 else /* no overflow on double */ 322 { 323 long double div9, div10, div11; 324 325 div9 = (long double) (double) 7.4583407312002067432909653e-155; 326 /* div9 = 2^(-2^9) */ 327 div10 = div9 * div9; /* 2^(-2^10) */ 328 div11 = div10 * div10; /* 2^(-2^11) if extended precision */ 329 /* since -DBL_MAX <= x <= DBL_MAX, the cast to double should not 330 overflow here */ 331 if (ABS(x) < div10 && 332 div11 != (long double) 0.0 && 333 div11 / div10 == div10) /* possible underflow */ 334 { 335 long double div12, div13; 336 /* After the divisions, any bit of x must be >= div10, 337 hence the possible division by div9. */ 338 div12 = div11 * div11; /* 2^(-2^12) */ 339 div13 = div12 * div12; /* 2^(-2^13) */ 340 if (ABS (x) <= div13) 341 { 342 x /= div13; /* exact */ 343 shift_exp -= 8192; 344 mpfr_mul_2si (t, t, 8192, MPFR_RNDZ); 345 } 346 if (ABS (x) <= div12) 347 { 348 x /= div12; /* exact */ 349 shift_exp -= 4096; 350 mpfr_mul_2si (t, t, 4096, MPFR_RNDZ); 351 } 352 if (ABS (x) <= div11) 353 { 354 x /= div11; /* exact */ 355 shift_exp -= 2048; 356 mpfr_mul_2si (t, t, 2048, MPFR_RNDZ); 357 } 358 if (ABS (x) <= div10) 359 { 360 x /= div10; /* exact */ 361 shift_exp -= 1024; 362 mpfr_mul_2si (t, t, 1024, MPFR_RNDZ); 363 } 364 if (ABS(x) <= div9) 365 { 366 x /= div9; /* exact */ 367 shift_exp -= 512; 368 mpfr_mul_2si (t, t, 512, MPFR_RNDZ); 369 } 370 } 371 else /* no underflow */ 372 { 373 inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ); 374 MPFR_ASSERTD (inexact == 0); 375 if (mpfr_add (t, t, u, MPFR_RNDZ) != 0) 376 { 377 if (!mpfr_number_p (t)) 378 break; 379 /* Inexact. This cannot happen unless the C implementation 380 "lies" on the precision or when long doubles are 381 implemented with FP expansions like double-double on 382 PowerPC. */ 383 if (MPFR_PREC (t) != MPFR_PREC (r) + 1) 384 { 385 /* We assume that MPFR_PREC (r) < MPFR_PREC_MAX. 386 The precision MPFR_PREC (r) + 1 allows us to 387 deduce the rounding bit and the sticky bit. */ 388 mpfr_set_prec (t, MPFR_PREC (r) + 1); 389 goto convert; 390 } 391 else 392 { 393 mp_limb_t *tp; 394 int rb_mask; 395 396 /* Since mpfr_add was inexact, the sticky bit is 1. */ 397 tp = MPFR_MANT (t); 398 rb_mask = MPFR_LIMB_ONE << 399 (GMP_NUMB_BITS - 1 - 400 (MPFR_PREC (r) & (GMP_NUMB_BITS - 1))); 401 if (rnd_mode == MPFR_RNDN) 402 rnd_mode = (*tp & rb_mask) ^ MPFR_IS_NEG (t) ? 403 MPFR_RNDU : MPFR_RNDD; 404 *tp |= rb_mask; 405 break; 406 } 407 } 408 x -= (long double) mpfr_get_d1 (u); /* exact */ 409 } 410 } 411 } 412 inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode); 413 mpfr_clear (t); 414 mpfr_clear (u); 415 416 MPFR_SAVE_EXPO_FREE (expo); 417 return mpfr_check_range (r, inexact, rnd_mode); 418 } 419 420 #endif 421