Home | History | Annotate | Line # | Download | only in src
      1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
      2 
      3 Copyright 1999-2023 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramba projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #define MPFR_NEED_LONGLONG_H
     24 #include "mpfr-impl.h"
     25 
     26 /* agm(x,y) is between x and y, so we don't need to save exponent range */
     27 int
     28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
     29 {
     30   int compare, inexact;
     31   mp_size_t s;
     32   mpfr_prec_t p, q;
     33   mp_limb_t *up, *vp, *ufp, *vfp;
     34   mpfr_t u, v, uf, vf, sc1, sc2;
     35   mpfr_exp_t scaleop = 0, scaleit;
     36   unsigned long n; /* number of iterations */
     37   MPFR_ZIV_DECL (loop);
     38   MPFR_TMP_DECL(marker);
     39   MPFR_SAVE_EXPO_DECL (expo);
     40 
     41   MPFR_LOG_FUNC
     42     (("op2[%Pd]=%.*Rg op1[%Pd]=%.*Rg rnd=%d",
     43       mpfr_get_prec (op2), mpfr_log_prec, op2,
     44       mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
     45      ("r[%Pd]=%.*Rg inexact=%d",
     46       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
     47 
     48   /* Deal with special values */
     49   if (MPFR_ARE_SINGULAR (op1, op2))
     50     {
     51       /* If a or b is NaN, the result is NaN */
     52       if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
     53         {
     54           MPFR_SET_NAN(r);
     55           MPFR_RET_NAN;
     56         }
     57       /* now one of a or b is Inf or 0 */
     58       /* If a and b is +Inf, the result is +Inf.
     59          Otherwise if a or b is -Inf or 0, the result is NaN */
     60       else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
     61         {
     62           if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
     63             {
     64               MPFR_SET_INF(r);
     65               MPFR_SET_SAME_SIGN(r, op1);
     66               MPFR_RET(0); /* exact */
     67             }
     68           else
     69             {
     70               MPFR_SET_NAN(r);
     71               MPFR_RET_NAN;
     72             }
     73         }
     74       else /* a and b are neither NaN nor Inf, and one is zero */
     75         {  /* If a or b is 0, the result is +0, in particular because the
     76               result is always >= 0 with our definition (Maple sometimes
     77               chooses a different sign for GaussAGM, but it uses another
     78               definition, with possible negative results). */
     79           MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
     80           MPFR_SET_POS (r);
     81           MPFR_SET_ZERO (r);
     82           MPFR_RET (0); /* exact */
     83         }
     84     }
     85 
     86   /* If a or b is negative (excluding -Infinity), the result is NaN */
     87   if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
     88     {
     89       MPFR_SET_NAN(r);
     90       MPFR_RET_NAN;
     91     }
     92 
     93   /* Precision of the following calculus */
     94   q = MPFR_PREC(r);
     95   p = q + MPFR_INT_CEIL_LOG2(q) + 15;
     96   MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
     97   s = MPFR_PREC2LIMBS (p);
     98 
     99   /* b (op2) and a (op1) are the 2 operands but we want b >= a */
    100   compare = mpfr_cmp (op1, op2);
    101   if (MPFR_UNLIKELY( compare == 0 ))
    102     return mpfr_set (r, op1, rnd_mode);
    103   else if (compare > 0)
    104     {
    105       mpfr_srcptr t = op1;
    106       op1 = op2;
    107       op2 = t;
    108     }
    109 
    110   /* Now b (=op2) > a (=op1) */
    111 
    112   MPFR_SAVE_EXPO_MARK (expo);
    113 
    114   MPFR_TMP_MARK(marker);
    115 
    116   /* Main loop */
    117   MPFR_ZIV_INIT (loop, p);
    118   for (;;)
    119     {
    120       mpfr_prec_t eq;
    121       unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
    122       MPFR_BLOCK_DECL (flags);
    123 
    124       /* Init temporary vars */
    125       MPFR_TMP_INIT (up, u, p, s);
    126       MPFR_TMP_INIT (vp, v, p, s);
    127       MPFR_TMP_INIT (ufp, uf, p, s);
    128       MPFR_TMP_INIT (vfp, vf, p, s);
    129 
    130       /* Calculus of un and vn */
    131     retry:
    132       MPFR_BLOCK (flags,
    133                   mpfr_mul (u, op1, op2, MPFR_RNDN);
    134                   /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
    135                   mpfr_add (v, op1, op2, MPFR_RNDN);
    136                   /* mpfr_add with !=prec is still good */);
    137       if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
    138         {
    139           mpfr_exp_t e1, e2;
    140 
    141           MPFR_ASSERTN (scaleop == 0);
    142           e1 = MPFR_GET_EXP (op1);
    143           e2 = MPFR_GET_EXP (op2);
    144 
    145           /* Let's determine scaleop to avoid an overflow/underflow. */
    146           if (MPFR_OVERFLOW (flags))
    147             {
    148               /* Let's recall that emin <= e1 <= e2 <= emax.
    149                  There has been an overflow. Thus e2 >= emax/2.
    150                  If the mpfr_mul overflowed, then e1 + e2 > emax.
    151                  If the mpfr_add overflowed, then e2 = emax.
    152                  We want: (e1 + scale) + (e2 + scale) <= emax,
    153                  i.e. scale <= (emax - e1 - e2) / 2. Let's take
    154                  scale = min(floor((emax - e1 - e2) / 2), -1).
    155                  This is OK, as:
    156                  1. emin <= scale <= -1.
    157                  2. e1 + scale >= emin. Indeed:
    158                     * If e1 + e2 > emax, then
    159                       e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
    160                                  >= (emax + e1 - emax) / 2 - 1
    161                                  >= e1 / 2 - 1 >= emin.
    162                     * Otherwise, mpfr_mul didn't overflow, therefore
    163                       mpfr_add overflowed and e2 = emax, so that
    164                       e1 > emin (see restriction below).
    165                       e1 + scale > emin - 1, thus e1 + scale >= emin.
    166                  3. e2 + scale <= emax, since scale < 0. */
    167               if (e1 + e2 > MPFR_EMAX_MAX)
    168                 {
    169                   scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2);
    170                   MPFR_ASSERTN (scaleop < 0);
    171                 }
    172               else
    173                 {
    174                   /* The addition necessarily overflowed. */
    175                   MPFR_ASSERTN (e2 == MPFR_EMAX_MAX);
    176                   /* The case where e1 = emin and e2 = emax is not supported
    177                      here. This would mean that the precision of e2 would be
    178                      huge (and possibly not supported in practice anyway). */
    179                   MPFR_ASSERTN (e1 > MPFR_EMIN_MIN);
    180                   /* Note: this case is probably impossible to have in practice
    181                      since we need e2 = emax, and no overflow in the product.
    182                      Since the product is >= 2^(e1+e2-2), it implies
    183                      e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow
    184                      we need op1 >= 1/2 ulp(op2), which implies that the
    185                      precision of op2 should be at least emax-2. On a 64-bit
    186                      computer this is impossible to have, and would require
    187                      a huge amount of memory on a 32-bit computer. */
    188                   scaleop = -1;
    189                 }
    190 
    191             }
    192           else  /* underflow only (in the multiplication) */
    193             {
    194               /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
    195                  We want: (e1 + scale) + (e2 + scale) >= emin + 1,
    196                  i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
    197                  scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
    198                  1. 1 <= scale <= emax.
    199                  2. e1 + scale >= emin + 1 >= emin.
    200                  3. e2 + scale <= scale <= emax. */
    201               MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
    202               scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2;
    203               MPFR_ASSERTN (scaleop > 0);
    204             }
    205 
    206           MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
    207           MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
    208           op1 = sc1;
    209           op2 = sc2;
    210           MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
    211                          MPFR_EXP_FSPEC "d\n", scaleop));
    212           goto retry;
    213         }
    214 
    215       MPFR_CLEAR_FLAGS ();
    216       mpfr_sqrt (u, u, MPFR_RNDN);
    217       mpfr_div_2ui (v, v, 1, MPFR_RNDN);
    218 
    219       scaleit = 0;
    220       n = 1;
    221       while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
    222         {
    223           MPFR_BLOCK_DECL (flags2);
    224 
    225           MPFR_LOG_MSG (("Iteration n = %lu\n", n));
    226 
    227         retry2:
    228           mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
    229           mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
    230           /* See proof in algorithms.tex */
    231           if (eq > p / 4)
    232             {
    233               mpfr_t w;
    234               MPFR_BLOCK_DECL (flags3);
    235 
    236               MPFR_LOG_MSG (("4*eq > p\n", 0));
    237 
    238               /* vf = V(k) */
    239               mpfr_init2 (w, (p + 1) / 2);
    240               MPFR_BLOCK
    241                 (flags3,
    242                  mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
    243                  mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
    244                  mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
    245                  mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
    246                  );
    247               if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
    248                 {
    249                   mpfr_sub (v, vf, w, MPFR_RNDN);
    250                   err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
    251                   mpfr_clear (w);
    252                   break;
    253                 }
    254               /* There has been an underflow because of the cancellation
    255                  between V(k-1) and U(k-1). Let's use the conventional
    256                  method. */
    257               MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
    258               mpfr_clear (w);
    259               MPFR_CLEAR_UNDERFLOW ();
    260             }
    261           /* U(k) increases, so that U.V can overflow (but not underflow). */
    262           MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
    263           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
    264             {
    265               mpfr_exp_t scale2;
    266 
    267               scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
    268                            - MPFR_EMAX_MAX + 1) / 2);
    269               MPFR_EXP (u) += scale2;
    270               MPFR_EXP (v) += scale2;
    271               scaleit += scale2;
    272               MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
    273                              MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
    274                              n, scaleit, scale2));
    275               MPFR_CLEAR_OVERFLOW ();
    276               goto retry2;
    277             }
    278           mpfr_sqrt (u, uf, MPFR_RNDN);
    279           mpfr_swap (v, vf);
    280           n ++;
    281         }
    282 
    283       MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
    284 
    285       /* the error on v is bounded by (18n+51) ulps, or twice if there
    286          was an exponent loss in the final subtraction */
    287       err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
    288                                                  since n is about log(p) */
    289       /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
    290       if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
    291                        MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
    292         break; /* Stop the loop */
    293 
    294       /* Next iteration */
    295       MPFR_ZIV_NEXT (loop, p);
    296       s = MPFR_PREC2LIMBS (p);
    297     }
    298   MPFR_ZIV_FREE (loop);
    299 
    300   if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
    301                      != 0))
    302     {
    303       MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
    304       MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
    305       MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
    306       MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
    307     }
    308 
    309   /* Setting of the result */
    310   inexact = mpfr_set (r, v, rnd_mode);
    311   MPFR_EXP (r) -= scaleop + scaleit;
    312 
    313   /* Let's clean */
    314   MPFR_TMP_FREE(marker);
    315 
    316   MPFR_SAVE_EXPO_FREE (expo);
    317   /* From the definition of the AGM, underflow and overflow
    318      are not possible. */
    319   return mpfr_check_range (r, inexact, rnd_mode);
    320   /* agm(u,v) can be exact for u, v rational only for u=v.
    321      Proof (due to Nicolas Brisebarre): it suffices to consider
    322      u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
    323      and a theorem due to G.V. Chudnovsky states that for x a
    324      non-zero algebraic number with |x|<1, then
    325      2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
    326      independent over Q. */
    327 }
    328