Home | History | Annotate | Line # | Download | only in src
      1 /* mpfr_lngamma -- lngamma function
      2 
      3 Copyright 2005-2023 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramba projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #define MPFR_NEED_LONGLONG_H
     24 #include "mpfr-impl.h"
     25 
     26 /* given a precision p, return alpha, such that the argument reduction
     27    will use k = alpha*p*log(2).
     28 
     29    Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
     30    and the smallest value of alpha multiplied by the smallest working
     31    precision should be >= 4.
     32 */
     33 static void
     34 mpfr_gamma_alpha (mpfr_ptr s, mpfr_prec_t p)
     35 {
     36   MPFR_LOG_FUNC
     37     (("p=%Pd", p),
     38      ("s[%Pd]=%.*Rg", mpfr_get_prec (s), mpfr_log_prec, s));
     39 
     40   if (p <= 100)
     41     mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
     42   else if (p <= 500)
     43     mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
     44   else if (p <= 1000)
     45     mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
     46   else if (p <= 2000)
     47     mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
     48   else if (p <= 5000)
     49     mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
     50   else if (p <= 10000)
     51     mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
     52   else
     53     mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
     54 }
     55 
     56 #ifdef IS_GAMMA
     57 
     58 /* This function is called in case of intermediate overflow/underflow.
     59    The s1 and s2 arguments are temporary MPFR numbers, having the
     60    working precision. If the result could be determined, then the
     61    flags are updated via pexpo, y is set to the result, and the
     62    (non-zero) ternary value is returned. Otherwise 0 is returned
     63    in order to perform the next Ziv iteration. */
     64 static int
     65 mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
     66                 mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
     67 {
     68   mpfr_t t1, t2;
     69   int inex1, inex2, sign;
     70   MPFR_BLOCK_DECL (flags1);
     71   MPFR_BLOCK_DECL (flags2);
     72   MPFR_GROUP_DECL (group);
     73 
     74   MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
     75   MPFR_ASSERTN (inex1 != 0);
     76   /* s1 = RNDD(lngamma(x)), inexact */
     77   if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
     78     {
     79       if (MPFR_IS_POS (s1))
     80         {
     81           MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
     82           return mpfr_overflow (y, rnd, sign);
     83         }
     84       else
     85         {
     86           MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
     87           return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
     88         }
     89     }
     90 
     91   mpfr_set (s2, s1, MPFR_RNDN);     /* exact */
     92   mpfr_nextabove (s2);              /* v = RNDU(lngamma(z0)) */
     93 
     94   if (sign < 0)
     95     rnd = MPFR_INVERT_RND (rnd);  /* since the result with be negated */
     96   MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
     97   MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
     98   MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
     99   /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
    100      t2 is the rounding with mode 'rnd' of an upper bound, thus if both
    101      are equal, so is the wanted result. If t1 and t2 differ or the flags
    102      differ, at some point of Ziv's loop they should agree. */
    103   if (mpfr_equal_p (t1, t2) && flags1 == flags2)
    104     {
    105       MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
    106       mpfr_set4 (y, t1, MPFR_RNDN, sign);  /* exact */
    107       if (sign < 0)
    108         inex1 = - inex1;
    109       MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
    110     }
    111   else
    112     inex1 = 0;  /* couldn't determine the result */
    113   MPFR_GROUP_CLEAR (group);
    114 
    115   return inex1;
    116 }
    117 
    118 #else
    119 
    120 static int
    121 unit_bit (mpfr_srcptr x)
    122 {
    123   mpfr_exp_t expo;
    124   mpfr_prec_t prec;
    125   mp_limb_t x0;
    126 
    127   expo = MPFR_GET_EXP (x);
    128   if (expo <= 0)
    129     return 0;  /* |x| < 1 */
    130 
    131   prec = MPFR_PREC (x);
    132   if (expo > prec)
    133     return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
    134 
    135   /* Now, the unit bit is represented. */
    136 
    137   prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
    138   /* number of represented fractional bits (including the trailing 0's) */
    139 
    140   x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
    141   /* limb containing the unit bit */
    142 
    143   return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
    144 }
    145 
    146 #endif
    147 
    148 /* FIXME: There is an internal overflow when z is very large.
    149    Simple overflow detection with possible false negatives?
    150    For the particular cases near the overflow boundary,
    151    scaling by a power of two?
    152 */
    153 
    154 /* lngamma(x) = log(gamma(x)).
    155    We use formula [6.1.40] from Abramowitz&Stegun:
    156    lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
    157               + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
    158    According to [6.1.42], if the sum is truncated after m=n, the error
    159    R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
    160    where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
    161    For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
    162  */
    163 #ifdef IS_GAMMA
    164 #define GAMMA_FUNC mpfr_gamma_aux
    165 #else
    166 #define GAMMA_FUNC mpfr_lngamma_aux
    167 #endif
    168 
    169 static int
    170 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
    171 {
    172   mpfr_prec_t precy, w; /* working precision */
    173   mpfr_t s, t, u, v, z;
    174   unsigned long m, k, maxm, l;
    175   int compared, inexact;
    176   mpfr_exp_t err_s, err_t;
    177   double d;
    178   MPFR_SAVE_EXPO_DECL (expo);
    179   MPFR_ZIV_DECL (loop);
    180 
    181   MPFR_LOG_FUNC
    182     (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (z0), mpfr_log_prec, z0, rnd),
    183      ("y[%Pd]=%.*Rg inexact=%d",
    184       mpfr_get_prec (y), mpfr_log_prec, y, inexact));
    185 
    186   compared = mpfr_cmp_ui (z0, 1);
    187 
    188   MPFR_SAVE_EXPO_MARK (expo);
    189 
    190 #ifndef IS_GAMMA /* lngamma or lgamma */
    191   if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
    192     {
    193       MPFR_SAVE_EXPO_FREE (expo);
    194       return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
    195     }
    196 
    197   /* Deal with very large inputs: according to [6.1.42], if we denote
    198      R_n(z) = lngamma(z) - (z-1/2)*log(z) + z - 1/2*log(2*Pi), we have
    199      |R_n(z)| <= B_2/2/z, thus for z >= 2 we have
    200      |lngamma(z) - [z*(log(z) - 1)]| < 1/2*log(z) + 1. */
    201   if (compared > 0 && MPFR_GET_EXP (z0) >= (mpfr_exp_t) MPFR_PREC(y) + 2)
    202     {
    203       /* Since PREC(y) >= 2, this ensures EXP(z0) >= 4, thus |z0| >= 8,
    204          thus 1/2*log(z0) + 1 < log(z0).
    205          Since the largest possible z is < 2^(2^62) on a 64-bit machine,
    206          the largest value of log(z) is 2^62*log(2.) < 3.2e18 < 2^62,
    207          thus if we use at least 62 bits of precision, then log(t)-1 will
    208          be exact */
    209       mpfr_init2 (t, MPFR_PREC(y) >= 52 ? MPFR_PREC(y) + 10 : 62);
    210       mpfr_log (t, z0, MPFR_RNDU); /* error < 1 ulp */
    211       inexact = mpfr_sub_ui (t, t, 1, MPFR_RNDU); /* err < 2 ulps, since the
    212                                                      exponent of t might have
    213                                                      decreased */
    214       MPFR_ASSERTD(inexact == 0);
    215       mpfr_mul (t, z0, t, MPFR_RNDU); /* err < 1+2*2=5 ulps according to
    216                                         "Generic error on multiplication"
    217                                         in algorithms.tex */
    218       if (MPFR_IS_INF(t))
    219         {
    220           mpfr_clear (t);
    221           MPFR_SAVE_EXPO_FREE (expo);
    222           inexact = mpfr_overflow (y, rnd, 1);
    223           return inexact;
    224         }
    225       if (MPFR_GET_EXP(t) - MPFR_PREC(t) >= 62)
    226         {
    227           /* then ulp(t) >= 2^62 > log(z0) thus the total error is bounded
    228              by 6 ulp(t) */
    229           if (MPFR_CAN_ROUND (t, MPFR_PREC(t) - 3, MPFR_PREC(y), rnd))
    230             {
    231               inexact = mpfr_set (y, t, rnd);
    232               mpfr_clear (t);
    233               MPFR_SAVE_EXPO_FREE (expo);
    234               return mpfr_check_range (y, inexact, rnd);
    235             }
    236         }
    237       mpfr_clear (t);
    238     }
    239 
    240   /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
    241      - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
    242   if (MPFR_GET_EXP (z0) <= - (mpfr_exp_t) MPFR_PREC(y))
    243     {
    244       mpfr_t l, h, g;
    245       int ok, inex1, inex2;
    246       mpfr_prec_t prec = MPFR_PREC(y) + 14;
    247       MPFR_ZIV_DECL (loop);
    248 
    249       MPFR_ZIV_INIT (loop, prec);
    250       do
    251         {
    252           mpfr_init2 (l, prec);
    253           if (MPFR_IS_POS(z0))
    254             {
    255               mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
    256               mpfr_init2 (h, MPFR_PREC(l));
    257             }
    258           else
    259             {
    260               mpfr_init2 (h, MPFR_PREC(z0));
    261               mpfr_neg (h, z0, MPFR_RNDN); /* exact */
    262               mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
    263               mpfr_set_prec (h, MPFR_PREC(l));
    264             }
    265           mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
    266           mpfr_set (h, l, MPFR_RNDD); /* exact */
    267           mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
    268                                  to mpfr_log */
    269           mpfr_init2 (g, MPFR_PREC(l));
    270           /* if z0>0, we need an upper approximation of Euler's constant
    271              for the left bound */
    272           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
    273           mpfr_mul (g, g, z0, MPFR_RNDD);
    274           mpfr_sub (l, l, g, MPFR_RNDD);
    275           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
    276           mpfr_mul (g, g, z0, MPFR_RNDU);
    277           mpfr_sub (h, h, g, MPFR_RNDD);
    278           mpfr_sqr (g, z0, MPFR_RNDU);
    279           mpfr_add (h, h, g, MPFR_RNDU);
    280           inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
    281           inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
    282           /* Caution: we not only need l = h, but both inexact flags should
    283              agree. Indeed, one of the inexact flags might be zero. In that
    284              case if we assume lngamma(z0) cannot be exact, the other flag
    285              should be correct. We are conservative here and request that both
    286              inexact flags agree. */
    287           ok = SAME_SIGN (inex1, inex2) && mpfr_equal_p (l, h);
    288           if (ok)
    289             mpfr_set (y, h, rnd); /* exact */
    290           mpfr_clear (l);
    291           mpfr_clear (h);
    292           mpfr_clear (g);
    293           if (ok)
    294             {
    295               MPFR_ZIV_FREE (loop);
    296               MPFR_SAVE_EXPO_FREE (expo);
    297               return mpfr_check_range (y, inex1, rnd);
    298             }
    299           /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
    300              if x ~ 2^(-n), then we have a n-bit approximation, thus
    301              we can try again with a working precision of n bits,
    302              especially when n >> PREC(y).
    303              Otherwise we would use the reflection formula evaluating x-1,
    304              which would need precision n. */
    305           MPFR_ZIV_NEXT (loop, prec);
    306         }
    307       while (prec <= - MPFR_GET_EXP (z0));
    308       MPFR_ZIV_FREE (loop);
    309     }
    310 #endif
    311 
    312   precy = MPFR_PREC(y);
    313 
    314   mpfr_init2 (s, MPFR_PREC_MIN);
    315   mpfr_init2 (t, MPFR_PREC_MIN);
    316   mpfr_init2 (u, MPFR_PREC_MIN);
    317   mpfr_init2 (v, MPFR_PREC_MIN);
    318   mpfr_init2 (z, MPFR_PREC_MIN);
    319 
    320   inexact = 0; /* 0 means: result y not set yet */
    321 
    322   if (compared < 0)
    323     {
    324       mpfr_exp_t err_u;
    325 
    326       /* use reflection formula:
    327          gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
    328          thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
    329 
    330       w = precy + MPFR_INT_CEIL_LOG2 (precy);
    331       w += MPFR_INT_CEIL_LOG2 (w) + 14;
    332       MPFR_ZIV_INIT (loop, w);
    333       while (1)
    334         {
    335           MPFR_ASSERTD(w >= 3);
    336           mpfr_set_prec (s, w);
    337           mpfr_set_prec (t, w);
    338           mpfr_set_prec (u, w);
    339           mpfr_set_prec (v, w);
    340           /* In the following, we write r for a real of absolute value
    341              at most 2^(-w). Different instances of r may represent different
    342              values. */
    343           mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
    344           mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
    345           mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
    346           /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
    347              We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
    348              Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
    349              the error on u is bounded by
    350              ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
    351              = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
    352           d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
    353           if (MPFR_IS_ZERO(u)) /* in that case the error on u is zero */
    354             err_u = 0;
    355           else
    356             err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
    357           err_u = (err_u >= 0) ? err_u + 1 : 0;
    358           /* now the error on u is bounded by 2^err_u ulps */
    359 
    360           mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
    361           err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
    362           mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
    363           /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
    364              <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
    365              <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
    366           err_s += 3 - MPFR_GET_EXP(s);
    367           err_s = (err_s >= 0) ? err_s + 1 : 0;
    368           /* the error on s is bounded by 2^err_s ulp(s), thus by
    369              2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
    370              Now n*2^(-w) can always be written |(1+r)^n-1| for some
    371              |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
    372              |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
    373              true value.
    374              In fact if ulp(s) <= ulp(S) the same inequality holds for
    375              |S| instead of |s| in the right hand side, i.e., we can
    376              write s = (1+r)^(2^(err_s+1)) * S.
    377              But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
    378              to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
    379              E = n*2^(-w) we have |s - S| <= E * |s|, thus
    380              |s - S| <= E/(1-E) * |S|.
    381              Now E/(1-E) is bounded by 2E as long as E<=1/2,
    382              and 2E can be written (1+r)^(2n)-1 as above.
    383           */
    384           err_s += 2; /* exponent of relative error */
    385 
    386           mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
    387           mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
    388           mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
    389           mpfr_abs (v, v, MPFR_RNDN);
    390           /* (1+r)^(3+2^err_s+1) */
    391           err_s = (err_s <= 1) ? 3 : err_s + 1;
    392           /* now (1+r)^M with M <= 2^err_s */
    393           mpfr_log (v, v, MPFR_RNDN);
    394           /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
    395              Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
    396              bounded by ulp(v)/2 + 2^(err_s+1-w). */
    397           if (err_s + 2 > w)
    398             {
    399               w += err_s + 2;
    400             }
    401           else
    402             {
    403               /* if v = 0 here, it was 1 before the call to mpfr_log,
    404                  thus the error on v was zero */
    405               if (!MPFR_IS_ZERO(v))
    406                 err_s += 1 - MPFR_GET_EXP(v);
    407               err_s = (err_s >= 0) ? err_s + 1 : 0;
    408               /* the error on v is bounded by 2^err_s ulps */
    409               err_u += MPFR_GET_EXP(u); /* absolute error on u */
    410               if (!MPFR_IS_ZERO(v)) /* same as above */
    411                 err_s += MPFR_GET_EXP(v); /* absolute error on v */
    412               mpfr_sub (s, v, u, MPFR_RNDN);
    413               /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
    414                  + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
    415               err_s = (err_s >= err_u) ? err_s : err_u;
    416               err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
    417               err_s = (err_s >= 0) ? err_s + 1 : 0;
    418               if (MPFR_CAN_ROUND (s, w - err_s, precy, rnd))
    419                 goto end;
    420             }
    421           MPFR_ZIV_NEXT (loop, w);
    422         }
    423       MPFR_ZIV_FREE (loop);
    424     }
    425 
    426   /* now z0 > 1 */
    427 
    428   MPFR_ASSERTD (compared > 0);
    429 
    430   /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
    431      so there is a cancellation of ~log(w) in the argument reconstruction */
    432   w = precy + MPFR_INT_CEIL_LOG2 (precy);
    433   w += MPFR_INT_CEIL_LOG2 (w) + 13;
    434   MPFR_ZIV_INIT (loop, w);
    435   while (1)
    436     {
    437       MPFR_ASSERTD (w >= 3);
    438 
    439       /* argument reduction: we compute gamma(z0 + k), where the series
    440          has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
    441          and we need k steps of argument reconstruction. Assuming k is large
    442          with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
    443          k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
    444          However, since the series is slightly more expensive to compute,
    445          the optimal value seems to be k ~ 0.25 * w experimentally (with
    446          caching of Bernoulli numbers).
    447          For only one computation of gamma with large precision, it is better
    448          to set k to a larger value, say k ~ w. */
    449       mpfr_set_prec (s, 53);
    450       mpfr_gamma_alpha (s, w);
    451       mpfr_set_ui_2exp (s, 4, -4, MPFR_RNDU);
    452       mpfr_mul_ui (s, s, w, MPFR_RNDU);
    453       if (mpfr_cmp (z0, s) < 0)
    454         {
    455           mpfr_sub (s, s, z0, MPFR_RNDU);
    456           k = mpfr_get_ui (s, MPFR_RNDU);
    457           if (k < 3)
    458             k = 3;
    459         }
    460       else
    461         k = 3;
    462 
    463       mpfr_set_prec (s, w);
    464       mpfr_set_prec (t, w);
    465       mpfr_set_prec (u, w);
    466       mpfr_set_prec (v, w);
    467       mpfr_set_prec (z, w);
    468 
    469       mpfr_add_ui (z, z0, k, MPFR_RNDN);
    470       /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
    471 
    472       /* z >= 4 ensures the relative error on log(z) is small,
    473          and also (z-1/2)*log(z)-z >= 0 */
    474       MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
    475 
    476       mpfr_log (s, z, MPFR_RNDN); /* log(z) */
    477       /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
    478          Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
    479          = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
    480          s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
    481       mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
    482       mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
    483       /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
    484          t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
    485       mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
    486       mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
    487       mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
    488       /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
    489 
    490       mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
    491 
    492       /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
    493       mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
    494       mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
    495       mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
    496 
    497       mpfr_sqr (u, u, MPFR_RNDN);        /* 1/z^2 * (1+u)^3 */
    498 
    499       /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
    500       maxm = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2 - 1);
    501 
    502       /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
    503 
    504       for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
    505         {
    506           mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
    507           if (m <= maxm)
    508             {
    509               mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
    510               mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
    511               mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
    512             }
    513           else
    514             {
    515               mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
    516               mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
    517               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
    518               mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
    519               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
    520               mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
    521             }
    522           /* (1+u)^(10m-8) */
    523           /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
    524           mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
    525           MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
    526           mpfr_add (s, s, v, MPFR_RNDN);
    527         }
    528       /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
    529       MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
    530 
    531       /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
    532          <= 1.46*u for u <= 2^(-3).
    533          We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
    534          for z >= 4, thus since the initial s >= 0.85, the different values of
    535          s differ by at most one binade, and the total rounding error on s
    536          in the for-loop is bounded by 2*(m-1)*ulp(final_s).
    537          The error coming from the v's is bounded by
    538          1.46*2^(-w) <= 2*ulp(final_s).
    539          Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
    540          <= (2m+47)*ulp(s).
    541          Taking into account the truncation error (which is bounded by the last
    542          term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
    543       */
    544 
    545       /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
    546       mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
    547       mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
    548       /* k >= 3 */
    549       mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
    550       l = 1;
    551 
    552 /* replace #if 1 by #if 0 for the naive argument reconstruction */
    553 #if 1
    554 
    555       /* We multiply by (z0+1)*(z0+2)*...*(z0+k-1) by blocks of j consecutive
    556          terms where j ~ sqrt(k).
    557          If we multiply naively by z0+1, then by z0+2, ..., then by z0+j,
    558          the multiplicative term for the rounding error is (1+u)^(2j).
    559          The multiplicative term is not larger when we multiply by
    560          Z[j] + c[j-1]*Z[j-1] + ... + c[2]*Z[2] + c[1]*z0 + c[0]
    561          with c[p] integers, and Z[p] = z0^p * (1+u)^(p-1).
    562          Note that all terms are positive.
    563          Indeed, since c[1] is exact, c[1]*z0 corresponds to (1+u),
    564          then c[1]*z0 + c[0] corresponds to (1+u)^2,
    565          c[2]*Z[2] + c[1]*z0 + c[0] to (1+u)^3, ...,
    566          c[j-1]*Z[j-1] + ... + c[0] to (1+u)^j,
    567          and Z[j] + c[j-1]*Z[j-1] + ... + c[1]*z0 + c[0] to (1+u)^(j+1).
    568          With the accumulation in t, we get (1+u)^(j+2) and j+2 <= 2j. */
    569       {
    570         unsigned long j, i, p;
    571         mpfr_t *Z;
    572         mpz_t *c;
    573         for (j = 2; (j + 1) * (j + 1) < k; j++);
    574         /* Z[i] stores z0^i for i <= j */
    575         Z = (mpfr_t *) mpfr_allocate_func ((j + 1) * sizeof (mpfr_t));
    576         for (i = 2; i <= j; i++)
    577           mpfr_init2 (Z[i], w);
    578         mpfr_sqr (Z[2], z0, MPFR_RNDN);
    579         for (i = 3; i <= j; i++)
    580           if ((i & 1) == 0)
    581             mpfr_sqr (Z[i], Z[i >> 1], MPFR_RNDN);
    582           else
    583             mpfr_mul (Z[i], Z[i-1], z0, MPFR_RNDN);
    584         c = (mpz_t *) mpfr_allocate_func ((j + 1) * sizeof (mpz_t));
    585         for (i = 0; i <= j; i++)
    586           mpz_init (c[i]);
    587         for (; l + j <= k; l += j)
    588           {
    589             /* c[i] is the coefficient of x^i in (x+l)*...*(x+l+j-1) */
    590             mpz_set_ui (c[0], 1);
    591             for (i = 0; i < j; i++)
    592               /* multiply (x+l)*(x+l+1)*...*(x+l+i-1) by x+l+i:
    593                  (b[i]*x^i + b[i-1]*x^(i-1) + ... + b[0])*(x+l+i) =
    594                  b[i]*x^(i+1) + (b[i-1]+(l+i)*b[i])*x^i + ...
    595                  + (b[0]+(l+i)*b[1])*x + i*b[0] */
    596               {
    597                 mpz_set (c[i+1], c[i]); /* b[i]*x^(i+1) */
    598                 for (p = i; p > 0; p--)
    599                   {
    600                     mpz_mul_ui (c[p], c[p], l + i);
    601                     mpz_add (c[p], c[p], c[p-1]); /* b[p-1]+(l+i)*b[p] */
    602                   }
    603                 mpz_mul_ui (c[0], c[0], l+i); /* i*b[0] */
    604               }
    605             /* now compute z0^j + c[j-1]*z0^(j-1) + ... + c[1]*z0 + c[0] */
    606             mpfr_set_z (u, c[0], MPFR_RNDN);
    607             for (i = 0; i < j; i++)
    608               {
    609                 mpfr_mul_z (z, (i == 0) ? z0 : Z[i+1], c[i+1], MPFR_RNDN);
    610                 mpfr_add (u, u, z, MPFR_RNDN);
    611               }
    612             mpfr_mul (t, t, u, MPFR_RNDN);
    613           }
    614         for (i = 0; i <= j; i++)
    615           mpz_clear (c[i]);
    616         mpfr_free_func (c, (j + 1) * sizeof (mpz_t));
    617         for (i = 2; i <= j; i++)
    618           mpfr_clear (Z[i]);
    619         mpfr_free_func (Z, (j + 1) * sizeof (mpfr_t));
    620       }
    621 #endif /* end of fast argument reconstruction */
    622 
    623       for (; l < k; l++)
    624         {
    625           mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
    626           mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
    627         }
    628       /* now t: (1+u)^(2k-1) */
    629       /* instead of computing log(sqrt(2*Pi)/t), we compute
    630          1/2*log(2*Pi/t^2), which trades a square root for a square */
    631       mpfr_sqr (t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
    632       mpfr_div (v, v, t, MPFR_RNDN);
    633       /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
    634 #ifdef IS_GAMMA
    635       err_s = MPFR_GET_EXP(s);
    636       mpfr_exp (s, s, MPFR_RNDN);
    637       /* If s is +Inf, we compute exp(lngamma(z0)). */
    638       if (mpfr_inf_p (s))
    639         {
    640           inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
    641           if (inexact)
    642             goto end0;
    643           else
    644             goto ziv_next;
    645         }
    646       /* before the exponential, we have s = s0 + h where
    647          |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
    648          For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
    649          |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
    650       /* d = 1.2 * (2.0 * (double) m + 48.0); */
    651       /* the error on s is bounded by d*2^err_s * 2^(-w) */
    652       mpfr_sqrt (t, v, MPFR_RNDN);
    653       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
    654          thus t = sqrt(v0)*(1+u)^(2k+3/2). */
    655       mpfr_mul (s, s, t, MPFR_RNDN);
    656       /* the error on input s is bounded by (1+u)^(d*2^err_s),
    657          and that on t is (1+u)^(2k+3/2), thus the
    658          total error is (1+u)^(d*2^err_s+2k+5/2) */
    659       /* err_s += __gmpfr_ceil_log2 (d); */
    660       /* since d = 1.2 * (2m+48), ceil(log2(d)) = 2 + ceil(log2(0.6*m+14.4))
    661          <= 2 + ceil(log2(0.6*m+15)) */
    662       {
    663         unsigned long mm = (1 + m / 5) * 3; /* 0.6*m <= mm */
    664         err_s += 2 + __gmpfr_int_ceil_log2 (mm + 15);
    665       }
    666       err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
    667       err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
    668 #else
    669       mpfr_log (t, v, MPFR_RNDN);
    670       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
    671          thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
    672          for |u| <= 2^(-3), the absolute error on log(v) is bounded by
    673          1.07*(4k+1)*u, and the rounding error by ulp(t). */
    674       mpfr_div_2ui (t, t, 1, MPFR_RNDN);
    675       /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
    676          We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
    677          since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
    678          Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
    679       err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
    680         __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
    681       err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
    682         __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
    683       mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
    684       /* the final error in ulp(s) is
    685          <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
    686          <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
    687          <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
    688       err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
    689       err_s += 1 - MPFR_GET_EXP(s);
    690 #endif
    691       if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
    692         break;
    693 #ifdef IS_GAMMA
    694     ziv_next:
    695 #endif
    696       MPFR_ZIV_NEXT (loop, w);
    697     }
    698 
    699 #ifdef IS_GAMMA
    700  end0:
    701 #endif
    702 
    703  end:
    704   if (inexact == 0)
    705     inexact = mpfr_set (y, s, rnd);
    706   MPFR_ZIV_FREE (loop);
    707 
    708   mpfr_clear (s);
    709   mpfr_clear (t);
    710   mpfr_clear (u);
    711   mpfr_clear (v);
    712   mpfr_clear (z);
    713 
    714   MPFR_SAVE_EXPO_FREE (expo);
    715   return mpfr_check_range (y, inexact, rnd);
    716 }
    717 
    718 #ifndef IS_GAMMA
    719 
    720 int
    721 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
    722 {
    723   int inex;
    724 
    725   MPFR_LOG_FUNC
    726     (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
    727      ("y[%Pd]=%.*Rg inexact=%d",
    728       mpfr_get_prec (y), mpfr_log_prec, y, inex));
    729 
    730   /* special cases */
    731   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x) ||
    732                      (MPFR_IS_NEG (x) && mpfr_integer_p (x))))
    733     {
    734       if (MPFR_IS_NAN (x))
    735         {
    736           MPFR_SET_NAN (y);
    737           MPFR_RET_NAN;
    738         }
    739       else /* lngamma(+/-Inf) = lngamma(non-positive integer) = +Inf */
    740         {
    741           if (!MPFR_IS_INF (x))
    742             MPFR_SET_DIVBY0 ();
    743           MPFR_SET_INF (y);
    744           MPFR_SET_POS (y);
    745           MPFR_RET (0);  /* exact */
    746         }
    747     }
    748 
    749   /* if -2k-1 < x < -2k <= 0, then lngamma(x) = NaN */
    750   if (MPFR_IS_NEG (x) && unit_bit (x) == 0)
    751     {
    752       MPFR_SET_NAN (y);
    753       MPFR_RET_NAN;
    754     }
    755 
    756   inex = mpfr_lngamma_aux (y, x, rnd);
    757   return inex;
    758 }
    759 
    760 int
    761 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
    762 {
    763   int inex;
    764 
    765   MPFR_LOG_FUNC
    766     (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
    767      ("y[%Pd]=%.*Rg signp=%d inexact=%d",
    768       mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
    769 
    770   *signp = 1;  /* most common case */
    771 
    772   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
    773     {
    774       if (MPFR_IS_NAN (x))
    775         {
    776           MPFR_SET_NAN (y);
    777           MPFR_RET_NAN;
    778         }
    779       else
    780         {
    781           if (MPFR_IS_ZERO (x))
    782             MPFR_SET_DIVBY0 ();
    783           *signp = MPFR_INT_SIGN (x);
    784           MPFR_SET_INF (y);
    785           MPFR_SET_POS (y);
    786           MPFR_RET (0);
    787         }
    788     }
    789 
    790   if (MPFR_IS_NEG (x))
    791     {
    792       if (mpfr_integer_p (x))
    793         {
    794           MPFR_SET_INF (y);
    795           MPFR_SET_POS (y);
    796           MPFR_SET_DIVBY0 ();
    797           MPFR_RET (0);
    798         }
    799 
    800       if (unit_bit (x) == 0)
    801         *signp = -1;
    802 
    803       /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
    804          thus |gamma(x)| = -1/x + euler + O(x), and
    805          log |gamma(x)| = -log(-x) - euler*x + O(x^2).
    806          More precisely we have for -0.4 <= x < 0:
    807          -log(-x) <= log |gamma(x)| <= -log(-x) - x.
    808          Since log(x) is not representable, we may have an instance of the
    809          Table Maker Dilemma. The only way to ensure correct rounding is to
    810          compute an interval [l,h] such that l <= -log(-x) and
    811          -log(-x) - x <= h, and check whether l and h round to the same number
    812          for the target precision and rounding modes. */
    813       if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
    814         /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
    815            thus |x| <= 0.25 < 0.4 */
    816         {
    817           mpfr_t l, h;
    818           int ok, inex2;
    819           mpfr_prec_t w = MPFR_PREC (y) + 14;
    820           mpfr_exp_t expl;
    821           MPFR_SAVE_EXPO_DECL (expo);
    822 
    823           MPFR_SAVE_EXPO_MARK (expo);
    824 
    825           while (1)
    826             {
    827               mpfr_init2 (l, w);
    828               mpfr_init2 (h, w);
    829               /* we want a lower bound on -log(-x), thus an upper bound
    830                  on log(-x), thus an upper bound on -x. */
    831               mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
    832               mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
    833               mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
    834               mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
    835               mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
    836               mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
    837               mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
    838               inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
    839               inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
    840               /* Caution: we not only need l = h, but both inexact flags
    841                  should agree. Indeed, one of the inexact flags might be
    842                  zero. In that case if we assume ln|gamma(x)| cannot be
    843                  exact, the other flag should be correct. We are conservative
    844                  here and request that both inexact flags agree. */
    845               ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
    846               if (ok)
    847                 mpfr_set (y, h, rnd); /* exact */
    848               else
    849                 expl = MPFR_EXP (l);
    850               mpfr_clear (l);
    851               mpfr_clear (h);
    852               if (ok)
    853                 {
    854                   MPFR_SAVE_EXPO_FREE (expo);
    855                   return mpfr_check_range (y, inex, rnd);
    856                 }
    857               /* if ulp(log(-x)) <= |x| there is no reason to loop,
    858                  since the width of [l, h] will be at least |x| */
    859               if (expl < MPFR_EXP (x) + w)
    860                 break;
    861               w += MPFR_INT_CEIL_LOG2(w) + 3;
    862             }
    863 
    864           MPFR_SAVE_EXPO_FREE (expo);
    865         }
    866     }
    867 
    868   inex = mpfr_lngamma_aux (y, x, rnd);
    869   return inex;
    870 }
    871 
    872 #endif
    873