1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers 2 3 Copyright 1999-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* agm(x,y) is between x and y, so we don't need to save exponent range */ 27 int 28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode) 29 { 30 int compare, inexact; 31 mp_size_t s; 32 mpfr_prec_t p, q; 33 mp_limb_t *up, *vp, *ufp, *vfp; 34 mpfr_t u, v, uf, vf, sc1, sc2; 35 mpfr_exp_t scaleop = 0, scaleit; 36 unsigned long n; /* number of iterations */ 37 MPFR_ZIV_DECL (loop); 38 MPFR_TMP_DECL(marker); 39 MPFR_SAVE_EXPO_DECL (expo); 40 41 MPFR_LOG_FUNC 42 (("op2[%Pd]=%.*Rg op1[%Pd]=%.*Rg rnd=%d", 43 mpfr_get_prec (op2), mpfr_log_prec, op2, 44 mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode), 45 ("r[%Pd]=%.*Rg inexact=%d", 46 mpfr_get_prec (r), mpfr_log_prec, r, inexact)); 47 48 /* Deal with special values */ 49 if (MPFR_ARE_SINGULAR (op1, op2)) 50 { 51 /* If a or b is NaN, the result is NaN */ 52 if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2)) 53 { 54 MPFR_SET_NAN(r); 55 MPFR_RET_NAN; 56 } 57 /* now one of a or b is Inf or 0 */ 58 /* If a and b is +Inf, the result is +Inf. 59 Otherwise if a or b is -Inf or 0, the result is NaN */ 60 else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2)) 61 { 62 if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2)) 63 { 64 MPFR_SET_INF(r); 65 MPFR_SET_SAME_SIGN(r, op1); 66 MPFR_RET(0); /* exact */ 67 } 68 else 69 { 70 MPFR_SET_NAN(r); 71 MPFR_RET_NAN; 72 } 73 } 74 else /* a and b are neither NaN nor Inf, and one is zero */ 75 { /* If a or b is 0, the result is +0, in particular because the 76 result is always >= 0 with our definition (Maple sometimes 77 chooses a different sign for GaussAGM, but it uses another 78 definition, with possible negative results). */ 79 MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2)); 80 MPFR_SET_POS (r); 81 MPFR_SET_ZERO (r); 82 MPFR_RET (0); /* exact */ 83 } 84 } 85 86 /* If a or b is negative (excluding -Infinity), the result is NaN */ 87 if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2))) 88 { 89 MPFR_SET_NAN(r); 90 MPFR_RET_NAN; 91 } 92 93 /* Precision of the following calculus */ 94 q = MPFR_PREC(r); 95 p = q + MPFR_INT_CEIL_LOG2(q) + 15; 96 MPFR_ASSERTD (p >= 7); /* see algorithms.tex */ 97 s = MPFR_PREC2LIMBS (p); 98 99 /* b (op2) and a (op1) are the 2 operands but we want b >= a */ 100 compare = mpfr_cmp (op1, op2); 101 if (MPFR_UNLIKELY( compare == 0 )) 102 return mpfr_set (r, op1, rnd_mode); 103 else if (compare > 0) 104 { 105 mpfr_srcptr t = op1; 106 op1 = op2; 107 op2 = t; 108 } 109 110 /* Now b (=op2) > a (=op1) */ 111 112 MPFR_SAVE_EXPO_MARK (expo); 113 114 MPFR_TMP_MARK(marker); 115 116 /* Main loop */ 117 MPFR_ZIV_INIT (loop, p); 118 for (;;) 119 { 120 mpfr_prec_t eq; 121 unsigned long err = 0; /* must be set to 0 at each Ziv iteration */ 122 MPFR_BLOCK_DECL (flags); 123 124 /* Init temporary vars */ 125 MPFR_TMP_INIT (up, u, p, s); 126 MPFR_TMP_INIT (vp, v, p, s); 127 MPFR_TMP_INIT (ufp, uf, p, s); 128 MPFR_TMP_INIT (vfp, vf, p, s); 129 130 /* Calculus of un and vn */ 131 retry: 132 MPFR_BLOCK (flags, 133 mpfr_mul (u, op1, op2, MPFR_RNDN); 134 /* mpfr_mul(...): faster since PREC(op) < PREC(u) */ 135 mpfr_add (v, op1, op2, MPFR_RNDN); 136 /* mpfr_add with !=prec is still good */); 137 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags))) 138 { 139 mpfr_exp_t e1, e2; 140 141 MPFR_ASSERTN (scaleop == 0); 142 e1 = MPFR_GET_EXP (op1); 143 e2 = MPFR_GET_EXP (op2); 144 145 /* Let's determine scaleop to avoid an overflow/underflow. */ 146 if (MPFR_OVERFLOW (flags)) 147 { 148 /* Let's recall that emin <= e1 <= e2 <= emax. 149 There has been an overflow. Thus e2 >= emax/2. 150 If the mpfr_mul overflowed, then e1 + e2 > emax. 151 If the mpfr_add overflowed, then e2 = emax. 152 We want: (e1 + scale) + (e2 + scale) <= emax, 153 i.e. scale <= (emax - e1 - e2) / 2. Let's take 154 scale = min(floor((emax - e1 - e2) / 2), -1). 155 This is OK, as: 156 1. emin <= scale <= -1. 157 2. e1 + scale >= emin. Indeed: 158 * If e1 + e2 > emax, then 159 e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1 160 >= (emax + e1 - emax) / 2 - 1 161 >= e1 / 2 - 1 >= emin. 162 * Otherwise, mpfr_mul didn't overflow, therefore 163 mpfr_add overflowed and e2 = emax, so that 164 e1 > emin (see restriction below). 165 e1 + scale > emin - 1, thus e1 + scale >= emin. 166 3. e2 + scale <= emax, since scale < 0. */ 167 if (e1 + e2 > MPFR_EMAX_MAX) 168 { 169 scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2); 170 MPFR_ASSERTN (scaleop < 0); 171 } 172 else 173 { 174 /* The addition necessarily overflowed. */ 175 MPFR_ASSERTN (e2 == MPFR_EMAX_MAX); 176 /* The case where e1 = emin and e2 = emax is not supported 177 here. This would mean that the precision of e2 would be 178 huge (and possibly not supported in practice anyway). */ 179 MPFR_ASSERTN (e1 > MPFR_EMIN_MIN); 180 /* Note: this case is probably impossible to have in practice 181 since we need e2 = emax, and no overflow in the product. 182 Since the product is >= 2^(e1+e2-2), it implies 183 e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow 184 we need op1 >= 1/2 ulp(op2), which implies that the 185 precision of op2 should be at least emax-2. On a 64-bit 186 computer this is impossible to have, and would require 187 a huge amount of memory on a 32-bit computer. */ 188 scaleop = -1; 189 } 190 191 } 192 else /* underflow only (in the multiplication) */ 193 { 194 /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0). 195 We want: (e1 + scale) + (e2 + scale) >= emin + 1, 196 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take 197 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as: 198 1. 1 <= scale <= emax. 199 2. e1 + scale >= emin + 1 >= emin. 200 3. e2 + scale <= scale <= emax. */ 201 MPFR_ASSERTN (e1 <= e2 && e2 <= 0); 202 scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2; 203 MPFR_ASSERTN (scaleop > 0); 204 } 205 206 MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop); 207 MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop); 208 op1 = sc1; 209 op2 = sc2; 210 MPFR_LOG_MSG (("Exception in pre-iteration, scale = %" 211 MPFR_EXP_FSPEC "d\n", scaleop)); 212 goto retry; 213 } 214 215 MPFR_CLEAR_FLAGS (); 216 mpfr_sqrt (u, u, MPFR_RNDN); 217 mpfr_div_2ui (v, v, 1, MPFR_RNDN); 218 219 scaleit = 0; 220 n = 1; 221 while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2) 222 { 223 MPFR_BLOCK_DECL (flags2); 224 225 MPFR_LOG_MSG (("Iteration n = %lu\n", n)); 226 227 retry2: 228 mpfr_add (vf, u, v, MPFR_RNDN); /* No overflow? */ 229 mpfr_div_2ui (vf, vf, 1, MPFR_RNDN); 230 /* See proof in algorithms.tex */ 231 if (eq > p / 4) 232 { 233 mpfr_t w; 234 MPFR_BLOCK_DECL (flags3); 235 236 MPFR_LOG_MSG (("4*eq > p\n", 0)); 237 238 /* vf = V(k) */ 239 mpfr_init2 (w, (p + 1) / 2); 240 MPFR_BLOCK 241 (flags3, 242 mpfr_sub (w, v, u, MPFR_RNDN); /* e = V(k-1)-U(k-1) */ 243 mpfr_sqr (w, w, MPFR_RNDN); /* e = e^2 */ 244 mpfr_div_2ui (w, w, 4, MPFR_RNDN); /* e*= (1/2)^2*1/4 */ 245 mpfr_div (w, w, vf, MPFR_RNDN); /* 1/4*e^2/V(k) */ 246 ); 247 if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3))) 248 { 249 mpfr_sub (v, vf, w, MPFR_RNDN); 250 err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */ 251 mpfr_clear (w); 252 break; 253 } 254 /* There has been an underflow because of the cancellation 255 between V(k-1) and U(k-1). Let's use the conventional 256 method. */ 257 MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0)); 258 mpfr_clear (w); 259 MPFR_CLEAR_UNDERFLOW (); 260 } 261 /* U(k) increases, so that U.V can overflow (but not underflow). */ 262 MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN);); 263 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2))) 264 { 265 mpfr_exp_t scale2; 266 267 scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v)) 268 - MPFR_EMAX_MAX + 1) / 2); 269 MPFR_EXP (u) += scale2; 270 MPFR_EXP (v) += scale2; 271 scaleit += scale2; 272 MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %" 273 MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n", 274 n, scaleit, scale2)); 275 MPFR_CLEAR_OVERFLOW (); 276 goto retry2; 277 } 278 mpfr_sqrt (u, uf, MPFR_RNDN); 279 mpfr_swap (v, vf); 280 n ++; 281 } 282 283 MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n)); 284 285 /* the error on v is bounded by (18n+51) ulps, or twice if there 286 was an exponent loss in the final subtraction */ 287 err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow 288 since n is about log(p) */ 289 /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */ 290 if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 && 291 MPFR_CAN_ROUND (v, p - err, q, rnd_mode))) 292 break; /* Stop the loop */ 293 294 /* Next iteration */ 295 MPFR_ZIV_NEXT (loop, p); 296 s = MPFR_PREC2LIMBS (p); 297 } 298 MPFR_ZIV_FREE (loop); 299 300 if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT)) 301 != 0)) 302 { 303 MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */ 304 MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */ 305 MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */ 306 MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */ 307 } 308 309 /* Setting of the result */ 310 inexact = mpfr_set (r, v, rnd_mode); 311 MPFR_EXP (r) -= scaleop + scaleit; 312 313 /* Let's clean */ 314 MPFR_TMP_FREE(marker); 315 316 MPFR_SAVE_EXPO_FREE (expo); 317 /* From the definition of the AGM, underflow and overflow 318 are not possible. */ 319 return mpfr_check_range (r, inexact, rnd_mode); 320 /* agm(u,v) can be exact for u, v rational only for u=v. 321 Proof (due to Nicolas Brisebarre): it suffices to consider 322 u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2), 323 and a theorem due to G.V. Chudnovsky states that for x a 324 non-zero algebraic number with |x|<1, then 325 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically 326 independent over Q. */ 327 } 328