1 /* mpfr_log_ui -- compute natural logarithm of an unsigned long 2 3 Copyright 2014-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n, 27 e.g. about 4 times as slow for n around ULONG_MAX/3 on an 28 x86_64 Linux machine, for 10^6 bits of precision. The reason is that 29 for say n=6148914691236517205 and prec=10^6, the value of T computed 30 has more than 50M bits, which is much more than needed. Indeed the 31 binary splitting algorithm for series with a finite radius of convergence 32 gives rationals of size n*log(n) for a target precision n. One might 33 truncate the rationals inside the algorithm, but then the error analysis 34 should be redone. */ 35 36 /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x) 37 up to order N for x=p/2^k is T/(B*Q). 38 P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1] 39 q <- k*(n2-n1) [corresponding to Q[0] = 2^q] 40 B[0] <- n1 * (n1+1) * ... * (n2-1) 41 T[0] <- B[0]*Q[0] * S(n1,n2) 42 where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1) 43 Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3. 44 */ 45 static void 46 S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1, 47 unsigned long n2, long p, unsigned long k, int need_P) 48 { 49 MPFR_ASSERTD (n1 < n2); 50 MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0); 51 if (n2 == n1 + 1) 52 { 53 mpz_set_si (P[0], (n1 == 1) ? p : -p); 54 *q = k; 55 mpz_set_ui (B[0], n1); 56 /* T = B*Q*S where S = P/(B*Q) thus T = P */ 57 mpz_set (T[0], P[0]); 58 /* since p is odd (or zero), there is no common factor 2 between 59 P and Q, or T and B */ 60 } 61 else 62 { 63 unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1; 64 /* m = floor((n1+n2)/2) */ 65 66 MPFR_ASSERTD (n1 < m && m < n2); 67 S (P, q, B, T, n1, m, p, k, 1); 68 S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P); 69 70 /* T0 <- T0*B1*Q1 + P0*B0*T1 */ 71 mpz_mul (T[1], T[1], P[0]); 72 mpz_mul (T[1], T[1], B[0]); 73 mpz_mul (T[0], T[0], B[1]); 74 /* Q[1] = 2^q1 */ 75 mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */ 76 mpz_add (T[0], T[0], T[1]); 77 if (need_P) 78 mpz_mul (P[0], P[0], P[1]); 79 *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */ 80 mpz_mul (B[0], B[0], B[1]); 81 82 /* there should be no common factors 2 between P, Q and T, 83 since P is odd (or zero) */ 84 } 85 } 86 87 int 88 mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode) 89 { 90 unsigned long k; 91 mpfr_prec_t w; /* working precision */ 92 mpz_t three_n, *P, *B, *T; 93 mpfr_t t, q; 94 int inexact; 95 unsigned long N, lgN, i, kk; 96 long p; 97 MPFR_GROUP_DECL(group); 98 MPFR_TMP_DECL(marker); 99 MPFR_ZIV_DECL(loop); 100 MPFR_SAVE_EXPO_DECL (expo); 101 102 if (n <= 2) 103 { 104 if (n == 0) 105 { 106 MPFR_SET_INF (x); 107 MPFR_SET_NEG (x); 108 MPFR_SET_DIVBY0 (); 109 MPFR_RET (0); /* log(0) is an exact -infinity */ 110 } 111 else if (n == 1) 112 { 113 MPFR_SET_ZERO (x); 114 MPFR_SET_POS (x); 115 MPFR_RET (0); /* only "normal" case where the result is exact */ 116 } 117 /* now n=2 */ 118 return mpfr_const_log2 (x, rnd_mode); 119 } 120 121 /* here n >= 3 */ 122 123 /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3, 124 i.e., 2^(k+1) < 3n < 2^(k+2). 125 126 FIXME: we could do better by considering n/(2^k*3^i*5^j), 127 which reduces the maximal distance to 1 from 1/3 to 1/8, 128 thus needing about 1.89 less terms in the Taylor expansion of 129 the reduced argument. Then log(2^k*3^i*5^j) can be computed 130 using a combination of log(16/15), log(25/24) and log(81/80), 131 see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package", 132 Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */ 133 134 mpz_init_set_ui (three_n, n); 135 mpz_mul_ui (three_n, three_n, 3); 136 k = mpz_sizeinbase (three_n, 2) - 2; 137 MPFR_ASSERTD (k >= 2); 138 mpz_clear (three_n); 139 140 /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k. 141 Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX, 142 so that p and -p both fit in a long. */ 143 if (k < sizeof (unsigned long) * CHAR_BIT) /* assume no padding bits */ 144 n -= 1UL << k; 145 /* n is now the value of p mod ULONG_MAX+1. 146 Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and 147 -n as an unsigned long value is at most LONG_MAX, thus fits in a 148 long. */ 149 p = ULONG2LONG (n); 150 151 MPFR_TMP_MARK(marker); 152 w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10; 153 MPFR_GROUP_INIT_2(group, w, t, q); 154 MPFR_SAVE_EXPO_MARK (expo); 155 156 kk = k; 157 if (p != 0) 158 while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */ 159 { 160 p /= 2; 161 kk --; 162 } 163 164 MPFR_ZIV_INIT (loop, w); 165 for (;;) 166 { 167 mpfr_t tmp; 168 unsigned int err; 169 unsigned long q0; 170 171 /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */ 172 mpfr_init2 (tmp, 32); 173 mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU); 174 mpfr_log2 (tmp, tmp, MPFR_RNDU); 175 mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD); 176 MPFR_ASSERTN (w <= ULONG_MAX); 177 mpfr_ui_div (tmp, w, tmp, MPFR_RNDU); 178 N = mpfr_get_ui (tmp, MPFR_RNDU); 179 if (N < 2) 180 N = 2; 181 lgN = MPFR_INT_CEIL_LOG2 (N) + 1; 182 mpfr_clear (tmp); 183 P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t)); 184 B = P + lgN; 185 T = B + lgN; 186 for (i = 0; i < lgN; i++) 187 { 188 mpz_init (P[i]); 189 mpz_init (B[i]); 190 mpz_init (T[i]); 191 } 192 193 S (P, &q0, B, T, 1, N, p, kk, 0); 194 /* mpz_mul (Q[0], B[0], Q[0]); */ 195 /* mpz_mul_2exp (B[0], B[0], q0); */ 196 197 mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */ 198 mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */ 199 mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */ 200 mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3 201 = log(n/2^k) * (1 + theta_4)^4 202 for |theta_i| < 2^(-w) */ 203 204 /* argument reconstruction: add k*log(2) */ 205 mpfr_const_log2 (q, MPFR_RNDN); 206 mpfr_mul_ui (q, q, k, MPFR_RNDN); 207 mpfr_add (t, t, q, MPFR_RNDN); 208 for (i = 0; i < lgN; i++) 209 { 210 mpz_clear (P[i]); 211 mpz_clear (B[i]); 212 mpz_clear (T[i]); 213 } 214 /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u 215 for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition, 216 thus at most k+6 ulps. 217 Note that there might be some cancellation in the addition: the worst 218 case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which 219 gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we 220 have an exponent decrease of 1, which accounts for +1 in the error. */ 221 err = MPFR_INT_CEIL_LOG2 (k + 6) + 1; 222 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode))) 223 break; 224 225 MPFR_ZIV_NEXT (loop, w); 226 MPFR_GROUP_REPREC_2(group, w, t, q); 227 } 228 MPFR_ZIV_FREE (loop); 229 230 inexact = mpfr_set (x, t, rnd_mode); 231 232 MPFR_GROUP_CLEAR(group); 233 MPFR_TMP_FREE(marker); 234 235 MPFR_SAVE_EXPO_FREE (expo); 236 return mpfr_check_range (x, inexact, rnd_mode); 237 } 238