oddfac_1.c revision 1.1.1.3 1 1.1 mrg /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Marco Bodrato.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 1.1 mrg IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 1.1 mrg IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 1.1 mrg DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 1.1 mrg
10 1.1.1.3 mrg Copyright 2010-2012, 2015-2017 Free Software Foundation, Inc.
11 1.1 mrg
12 1.1 mrg This file is part of the GNU MP Library.
13 1.1 mrg
14 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
15 1.1.1.2 mrg it under the terms of either:
16 1.1.1.2 mrg
17 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
18 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
19 1.1.1.2 mrg option) any later version.
20 1.1.1.2 mrg
21 1.1.1.2 mrg or
22 1.1.1.2 mrg
23 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
24 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
25 1.1.1.2 mrg later version.
26 1.1.1.2 mrg
27 1.1.1.2 mrg or both in parallel, as here.
28 1.1 mrg
29 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
30 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 1.1.1.2 mrg for more details.
33 1.1 mrg
34 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
35 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
36 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
37 1.1 mrg
38 1.1 mrg #include "gmp-impl.h"
39 1.1 mrg #include "longlong.h"
40 1.1 mrg
41 1.1 mrg /* TODO:
42 1.1 mrg - split this file in smaller parts with functions that can be recycled for different computations.
43 1.1 mrg */
44 1.1 mrg
45 1.1 mrg /**************************************************************/
46 1.1 mrg /* Section macros: common macros, for mswing/fac/bin (&sieve) */
47 1.1 mrg /**************************************************************/
48 1.1 mrg
49 1.1 mrg #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
50 1.1 mrg if ((PR) > (MAX_PR)) { \
51 1.1 mrg (VEC)[(I)++] = (PR); \
52 1.1 mrg (PR) = 1; \
53 1.1 mrg }
54 1.1 mrg
55 1.1 mrg #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
56 1.1 mrg do { \
57 1.1 mrg if ((PR) > (MAX_PR)) { \
58 1.1 mrg (VEC)[(I)++] = (PR); \
59 1.1 mrg (PR) = (P); \
60 1.1 mrg } else \
61 1.1 mrg (PR) *= (P); \
62 1.1 mrg } while (0)
63 1.1 mrg
64 1.1 mrg #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
65 1.1 mrg __max_i = (end); \
66 1.1 mrg \
67 1.1 mrg do { \
68 1.1 mrg ++__i; \
69 1.1 mrg if (((sieve)[__index] & __mask) == 0) \
70 1.1 mrg { \
71 1.1.1.3 mrg mp_limb_t prime; \
72 1.1.1.3 mrg prime = id_to_n(__i)
73 1.1 mrg
74 1.1 mrg #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
75 1.1 mrg do { \
76 1.1 mrg mp_limb_t __mask, __index, __max_i, __i; \
77 1.1 mrg \
78 1.1 mrg __i = (start)-(off); \
79 1.1 mrg __index = __i / GMP_LIMB_BITS; \
80 1.1 mrg __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
81 1.1 mrg __i += (off); \
82 1.1 mrg \
83 1.1 mrg LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84 1.1 mrg
85 1.1 mrg #define LOOP_ON_SIEVE_STOP \
86 1.1 mrg } \
87 1.1 mrg __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
88 1.1 mrg __index += __mask & 1; \
89 1.1.1.3 mrg } while (__i <= __max_i)
90 1.1 mrg
91 1.1 mrg #define LOOP_ON_SIEVE_END \
92 1.1 mrg LOOP_ON_SIEVE_STOP; \
93 1.1 mrg } while (0)
94 1.1 mrg
95 1.1 mrg /*********************************************************/
96 1.1 mrg /* Section sieve: sieving functions and tools for primes */
97 1.1 mrg /*********************************************************/
98 1.1 mrg
99 1.1 mrg #if WANT_ASSERT
100 1.1 mrg static mp_limb_t
101 1.1 mrg bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102 1.1 mrg #endif
103 1.1 mrg
104 1.1 mrg /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105 1.1 mrg static mp_limb_t
106 1.1 mrg id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
107 1.1 mrg
108 1.1 mrg /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109 1.1 mrg static mp_limb_t
110 1.1 mrg n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111 1.1 mrg
112 1.1 mrg #if WANT_ASSERT
113 1.1 mrg static mp_size_t
114 1.1 mrg primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115 1.1 mrg #endif
116 1.1 mrg
117 1.1 mrg /*********************************************************/
118 1.1.1.3 mrg /* Section mswing: 2-multiswing factorial */
119 1.1 mrg /*********************************************************/
120 1.1 mrg
121 1.1.1.3 mrg /* Returns an approximation of the sqare root of x.
122 1.1.1.3 mrg * It gives:
123 1.1.1.3 mrg * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124 1.1.1.3 mrg * or
125 1.1.1.3 mrg * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126 1.1.1.3 mrg */
127 1.1 mrg static mp_limb_t
128 1.1 mrg limb_apprsqrt (mp_limb_t x)
129 1.1 mrg {
130 1.1 mrg int s;
131 1.1 mrg
132 1.1 mrg ASSERT (x > 2);
133 1.1.1.3 mrg count_leading_zeros (s, x);
134 1.1.1.3 mrg s = (GMP_LIMB_BITS - s) >> 1;
135 1.1.1.3 mrg return ((CNST_LIMB(1) << s) + (x >> s)) >> 1;
136 1.1 mrg }
137 1.1 mrg
138 1.1 mrg #if 0
139 1.1 mrg /* A count-then-exponentiate variant for SWING_A_PRIME */
140 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
141 1.1 mrg do { \
142 1.1 mrg mp_limb_t __q, __prime; \
143 1.1 mrg int __exp; \
144 1.1 mrg __prime = (P); \
145 1.1 mrg __exp = 0; \
146 1.1 mrg __q = (N); \
147 1.1 mrg do { \
148 1.1 mrg __q /= __prime; \
149 1.1 mrg __exp += __q & 1; \
150 1.1 mrg } while (__q >= __prime); \
151 1.1 mrg if (__exp) { /* Store $prime^{exp}$ */ \
152 1.1 mrg for (__q = __prime; --__exp; __q *= __prime); \
153 1.1 mrg FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154 1.1 mrg }; \
155 1.1 mrg } while (0)
156 1.1 mrg #else
157 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158 1.1 mrg do { \
159 1.1 mrg mp_limb_t __q, __prime; \
160 1.1 mrg __prime = (P); \
161 1.1 mrg FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162 1.1 mrg __q = (N); \
163 1.1 mrg do { \
164 1.1 mrg __q /= __prime; \
165 1.1 mrg if ((__q & 1) != 0) (PR) *= __prime; \
166 1.1 mrg } while (__q >= __prime); \
167 1.1 mrg } while (0)
168 1.1 mrg #endif
169 1.1 mrg
170 1.1 mrg #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
171 1.1 mrg do { \
172 1.1 mrg mp_limb_t __prime; \
173 1.1 mrg __prime = (P); \
174 1.1 mrg if ((((N) / __prime) & 1) != 0) \
175 1.1 mrg FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
176 1.1 mrg } while (0)
177 1.1 mrg
178 1.1 mrg /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179 1.1 mrg factorial of the parameter n. The result x is an odd positive
180 1.1 mrg integer so that multiswing(n,2) = x 2^a.
181 1.1 mrg
182 1.1 mrg Uses the algorithm described by Peter Luschny in "Divide, Swing and
183 1.1 mrg Conquer the Factorial!".
184 1.1 mrg
185 1.1 mrg The pointer sieve points to primesieve_size(n) limbs containing a
186 1.1 mrg bit-array where primes are marked as 0.
187 1.1 mrg Enough (FIXME: explain :-) limbs must be pointed by factors.
188 1.1 mrg */
189 1.1 mrg
190 1.1 mrg static void
191 1.1 mrg mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192 1.1 mrg {
193 1.1 mrg mp_limb_t prod, max_prod;
194 1.1 mrg mp_size_t j;
195 1.1 mrg
196 1.1.1.3 mrg ASSERT (n > 25);
197 1.1 mrg
198 1.1 mrg j = 0;
199 1.1 mrg prod = -(n & 1);
200 1.1 mrg n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201 1.1 mrg
202 1.1 mrg prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203 1.1 mrg max_prod = GMP_NUMB_MAX / (n-1);
204 1.1 mrg
205 1.1 mrg /* Handle prime = 3 separately. */
206 1.1 mrg SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207 1.1 mrg
208 1.1 mrg /* Swing primes from 5 to n/3 */
209 1.1 mrg {
210 1.1.1.3 mrg mp_limb_t s, l_max_prod;
211 1.1 mrg
212 1.1.1.3 mrg s = limb_apprsqrt(n);
213 1.1.1.3 mrg ASSERT (s >= 5);
214 1.1.1.3 mrg s = n_to_bit (s);
215 1.1.1.3 mrg ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216 1.1.1.3 mrg ASSERT (s < n_to_bit (n / 3));
217 1.1.1.3 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218 1.1.1.3 mrg SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219 1.1.1.3 mrg LOOP_ON_SIEVE_STOP;
220 1.1 mrg
221 1.1 mrg ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222 1.1 mrg
223 1.1.1.3 mrg l_max_prod = max_prod * 3;
224 1.1 mrg
225 1.1.1.3 mrg LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3), sieve);
226 1.1.1.3 mrg SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227 1.1 mrg LOOP_ON_SIEVE_END;
228 1.1 mrg }
229 1.1 mrg
230 1.1.1.3 mrg /* Store primes from (n+1)/2 to n */
231 1.1.1.3 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232 1.1.1.3 mrg FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233 1.1.1.3 mrg LOOP_ON_SIEVE_END;
234 1.1.1.3 mrg
235 1.1 mrg if (LIKELY (j != 0))
236 1.1 mrg {
237 1.1 mrg factors[j++] = prod;
238 1.1 mrg mpz_prodlimbs (x, factors, j);
239 1.1 mrg }
240 1.1 mrg else
241 1.1 mrg {
242 1.1.1.3 mrg ASSERT (ALLOC (x) > 0);
243 1.1 mrg PTR (x)[0] = prod;
244 1.1 mrg SIZ (x) = 1;
245 1.1 mrg }
246 1.1 mrg }
247 1.1 mrg
248 1.1 mrg #undef SWING_A_PRIME
249 1.1 mrg #undef SH_SWING_A_PRIME
250 1.1 mrg #undef LOOP_ON_SIEVE_END
251 1.1 mrg #undef LOOP_ON_SIEVE_STOP
252 1.1 mrg #undef LOOP_ON_SIEVE_BEGIN
253 1.1 mrg #undef LOOP_ON_SIEVE_CONTINUE
254 1.1 mrg #undef FACTOR_LIST_APPEND
255 1.1 mrg
256 1.1 mrg /*********************************************************/
257 1.1 mrg /* Section oddfac: odd factorial, needed also by binomial*/
258 1.1 mrg /*********************************************************/
259 1.1 mrg
260 1.1 mrg #if TUNE_PROGRAM_BUILD
261 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
262 1.1 mrg #else
263 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
264 1.1 mrg #endif
265 1.1 mrg
266 1.1 mrg /* mpz_oddfac_1 computes the odd part of the factorial of the
267 1.1 mrg parameter n. I.e. n! = x 2^a, where x is the returned value: an
268 1.1 mrg odd positive integer.
269 1.1 mrg
270 1.1 mrg If flag != 0 a square is skipped in the DSC part, e.g.
271 1.1 mrg if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
272 1.1 mrg
273 1.1 mrg If n is too small, flag is ignored, and an ASSERT can be triggered.
274 1.1 mrg
275 1.1 mrg TODO: FAC_DSC_THRESHOLD is used here with two different roles:
276 1.1 mrg - to decide when prime factorisation is needed,
277 1.1 mrg - to stop the recursion, once sieving is done.
278 1.1 mrg Maybe two thresholds can do a better job.
279 1.1 mrg */
280 1.1 mrg void
281 1.1 mrg mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
282 1.1 mrg {
283 1.1 mrg ASSERT (n <= GMP_NUMB_MAX);
284 1.1.1.3 mrg ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
285 1.1 mrg
286 1.1 mrg if (n <= ODD_FACTORIAL_TABLE_LIMIT)
287 1.1 mrg {
288 1.1.1.3 mrg MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
289 1.1 mrg SIZ (x) = 1;
290 1.1 mrg }
291 1.1 mrg else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
292 1.1 mrg {
293 1.1 mrg mp_ptr px;
294 1.1 mrg
295 1.1 mrg px = MPZ_NEWALLOC (x, 2);
296 1.1 mrg umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
297 1.1 mrg SIZ (x) = 2;
298 1.1 mrg }
299 1.1 mrg else
300 1.1 mrg {
301 1.1 mrg unsigned s;
302 1.1 mrg mp_ptr factors;
303 1.1 mrg
304 1.1 mrg s = 0;
305 1.1 mrg {
306 1.1 mrg mp_limb_t tn;
307 1.1 mrg mp_limb_t prod, max_prod, i;
308 1.1 mrg mp_size_t j;
309 1.1 mrg TMP_SDECL;
310 1.1 mrg
311 1.1 mrg #if TUNE_PROGRAM_BUILD
312 1.1 mrg ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
313 1.1 mrg ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
314 1.1 mrg #endif
315 1.1 mrg
316 1.1 mrg /* Compute the number of recursive steps for the DSC algorithm. */
317 1.1 mrg for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
318 1.1 mrg tn >>= 1;
319 1.1 mrg
320 1.1 mrg j = 0;
321 1.1 mrg
322 1.1 mrg TMP_SMARK;
323 1.1 mrg factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
324 1.1 mrg ASSERT (tn >= FACTORS_PER_LIMB);
325 1.1 mrg
326 1.1 mrg prod = 1;
327 1.1 mrg #if TUNE_PROGRAM_BUILD
328 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
329 1.1 mrg #else
330 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
331 1.1 mrg #endif
332 1.1 mrg
333 1.1 mrg ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
334 1.1 mrg do {
335 1.1 mrg i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
336 1.1 mrg factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
337 1.1 mrg do {
338 1.1 mrg FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
339 1.1 mrg i += 2;
340 1.1 mrg } while (i <= tn);
341 1.1 mrg max_prod <<= 1;
342 1.1 mrg tn >>= 1;
343 1.1 mrg } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
344 1.1 mrg
345 1.1 mrg factors[j++] = prod;
346 1.1 mrg factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
347 1.1 mrg factors[j++] = __gmp_oddfac_table[tn >> 1];
348 1.1 mrg mpz_prodlimbs (x, factors, j);
349 1.1 mrg
350 1.1 mrg TMP_SFREE;
351 1.1 mrg }
352 1.1 mrg
353 1.1 mrg if (s != 0)
354 1.1 mrg /* Use the algorithm described by Peter Luschny in "Divide,
355 1.1 mrg Swing and Conquer the Factorial!".
356 1.1 mrg
357 1.1 mrg Improvement: there are two temporary buffers, factors and
358 1.1 mrg square, that are never used together; with a good estimate
359 1.1 mrg of the maximal needed size, they could share a single
360 1.1 mrg allocation.
361 1.1 mrg */
362 1.1 mrg {
363 1.1 mrg mpz_t mswing;
364 1.1 mrg mp_ptr sieve;
365 1.1 mrg mp_size_t size;
366 1.1 mrg TMP_DECL;
367 1.1 mrg
368 1.1 mrg TMP_MARK;
369 1.1 mrg
370 1.1 mrg flag--;
371 1.1 mrg size = n / GMP_NUMB_BITS + 4;
372 1.1 mrg ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
373 1.1 mrg /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
374 1.1 mrg one more can be overwritten by mul, another for the sieve */
375 1.1 mrg MPZ_TMP_INIT (mswing, size);
376 1.1 mrg /* Initialize size, so that ASSERT can check it correctly. */
377 1.1 mrg ASSERT_CODE (SIZ (mswing) = 0);
378 1.1 mrg
379 1.1 mrg /* Put the sieve on the second half, it will be overwritten by the last mswing. */
380 1.1 mrg sieve = PTR (mswing) + size / 2 + 1;
381 1.1 mrg
382 1.1 mrg size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
383 1.1 mrg
384 1.1 mrg factors = TMP_ALLOC_LIMBS (size);
385 1.1 mrg do {
386 1.1 mrg mp_ptr square, px;
387 1.1 mrg mp_size_t nx, ns;
388 1.1 mrg mp_limb_t cy;
389 1.1 mrg TMP_DECL;
390 1.1 mrg
391 1.1 mrg s--;
392 1.1 mrg ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
393 1.1 mrg mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
394 1.1 mrg
395 1.1 mrg TMP_MARK;
396 1.1 mrg nx = SIZ (x);
397 1.1 mrg if (s == flag) {
398 1.1 mrg size = nx;
399 1.1 mrg square = TMP_ALLOC_LIMBS (size);
400 1.1 mrg MPN_COPY (square, PTR (x), nx);
401 1.1 mrg } else {
402 1.1 mrg size = nx << 1;
403 1.1 mrg square = TMP_ALLOC_LIMBS (size);
404 1.1 mrg mpn_sqr (square, PTR (x), nx);
405 1.1 mrg size -= (square[size - 1] == 0);
406 1.1 mrg }
407 1.1 mrg ns = SIZ (mswing);
408 1.1 mrg nx = size + ns;
409 1.1 mrg px = MPZ_NEWALLOC (x, nx);
410 1.1 mrg ASSERT (ns <= size);
411 1.1 mrg cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
412 1.1 mrg
413 1.1 mrg SIZ(x) = nx - (cy == 0);
414 1.1.1.3 mrg TMP_FREE;
415 1.1 mrg } while (s != 0);
416 1.1 mrg TMP_FREE;
417 1.1 mrg }
418 1.1 mrg }
419 1.1 mrg }
420 1.1 mrg
421 1.1 mrg #undef FACTORS_PER_LIMB
422 1.1 mrg #undef FACTOR_LIST_STORE
423