oddfac_1.c revision 1.1.1.2 1 1.1 mrg /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Marco Bodrato.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 1.1 mrg IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 1.1 mrg IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 1.1 mrg DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 1.1 mrg
10 1.1.1.2 mrg Copyright 2010-2012 Free Software Foundation, Inc.
11 1.1 mrg
12 1.1 mrg This file is part of the GNU MP Library.
13 1.1 mrg
14 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
15 1.1.1.2 mrg it under the terms of either:
16 1.1.1.2 mrg
17 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
18 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
19 1.1.1.2 mrg option) any later version.
20 1.1.1.2 mrg
21 1.1.1.2 mrg or
22 1.1.1.2 mrg
23 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
24 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
25 1.1.1.2 mrg later version.
26 1.1.1.2 mrg
27 1.1.1.2 mrg or both in parallel, as here.
28 1.1 mrg
29 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
30 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 1.1.1.2 mrg for more details.
33 1.1 mrg
34 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
35 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
36 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
37 1.1 mrg
38 1.1 mrg #include "gmp.h"
39 1.1 mrg #include "gmp-impl.h"
40 1.1 mrg #include "longlong.h"
41 1.1 mrg
42 1.1 mrg /* TODO:
43 1.1 mrg - split this file in smaller parts with functions that can be recycled for different computations.
44 1.1 mrg */
45 1.1 mrg
46 1.1 mrg /**************************************************************/
47 1.1 mrg /* Section macros: common macros, for mswing/fac/bin (&sieve) */
48 1.1 mrg /**************************************************************/
49 1.1 mrg
50 1.1 mrg #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
51 1.1 mrg if ((PR) > (MAX_PR)) { \
52 1.1 mrg (VEC)[(I)++] = (PR); \
53 1.1 mrg (PR) = 1; \
54 1.1 mrg }
55 1.1 mrg
56 1.1 mrg #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
57 1.1 mrg do { \
58 1.1 mrg if ((PR) > (MAX_PR)) { \
59 1.1 mrg (VEC)[(I)++] = (PR); \
60 1.1 mrg (PR) = (P); \
61 1.1 mrg } else \
62 1.1 mrg (PR) *= (P); \
63 1.1 mrg } while (0)
64 1.1 mrg
65 1.1 mrg #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
66 1.1 mrg __max_i = (end); \
67 1.1 mrg \
68 1.1 mrg do { \
69 1.1 mrg ++__i; \
70 1.1 mrg if (((sieve)[__index] & __mask) == 0) \
71 1.1 mrg { \
72 1.1 mrg (prime) = id_to_n(__i)
73 1.1 mrg
74 1.1 mrg #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
75 1.1 mrg do { \
76 1.1 mrg mp_limb_t __mask, __index, __max_i, __i; \
77 1.1 mrg \
78 1.1 mrg __i = (start)-(off); \
79 1.1 mrg __index = __i / GMP_LIMB_BITS; \
80 1.1 mrg __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
81 1.1 mrg __i += (off); \
82 1.1 mrg \
83 1.1 mrg LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84 1.1 mrg
85 1.1 mrg #define LOOP_ON_SIEVE_STOP \
86 1.1 mrg } \
87 1.1 mrg __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
88 1.1 mrg __index += __mask & 1; \
89 1.1 mrg } while (__i <= __max_i) \
90 1.1 mrg
91 1.1 mrg #define LOOP_ON_SIEVE_END \
92 1.1 mrg LOOP_ON_SIEVE_STOP; \
93 1.1 mrg } while (0)
94 1.1 mrg
95 1.1 mrg /*********************************************************/
96 1.1 mrg /* Section sieve: sieving functions and tools for primes */
97 1.1 mrg /*********************************************************/
98 1.1 mrg
99 1.1 mrg #if WANT_ASSERT
100 1.1 mrg static mp_limb_t
101 1.1 mrg bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102 1.1 mrg #endif
103 1.1 mrg
104 1.1 mrg /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105 1.1 mrg static mp_limb_t
106 1.1 mrg id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
107 1.1 mrg
108 1.1 mrg /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109 1.1 mrg static mp_limb_t
110 1.1 mrg n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111 1.1 mrg
112 1.1 mrg #if WANT_ASSERT
113 1.1 mrg static mp_size_t
114 1.1 mrg primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115 1.1 mrg #endif
116 1.1 mrg
117 1.1 mrg /*********************************************************/
118 1.1 mrg /* Section mswing: 2-multiswing factorial */
119 1.1 mrg /*********************************************************/
120 1.1 mrg
121 1.1 mrg /* Returns an approximation of the sqare root of x. *
122 1.1 mrg * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
123 1.1 mrg static mp_limb_t
124 1.1 mrg limb_apprsqrt (mp_limb_t x)
125 1.1 mrg {
126 1.1 mrg int s;
127 1.1 mrg
128 1.1 mrg ASSERT (x > 2);
129 1.1 mrg count_leading_zeros (s, x - 1);
130 1.1 mrg s = GMP_LIMB_BITS - 1 - s;
131 1.1 mrg return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg #if 0
135 1.1 mrg /* A count-then-exponentiate variant for SWING_A_PRIME */
136 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
137 1.1 mrg do { \
138 1.1 mrg mp_limb_t __q, __prime; \
139 1.1 mrg int __exp; \
140 1.1 mrg __prime = (P); \
141 1.1 mrg __exp = 0; \
142 1.1 mrg __q = (N); \
143 1.1 mrg do { \
144 1.1 mrg __q /= __prime; \
145 1.1 mrg __exp += __q & 1; \
146 1.1 mrg } while (__q >= __prime); \
147 1.1 mrg if (__exp) { /* Store $prime^{exp}$ */ \
148 1.1 mrg for (__q = __prime; --__exp; __q *= __prime); \
149 1.1 mrg FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
150 1.1 mrg }; \
151 1.1 mrg } while (0)
152 1.1 mrg #else
153 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
154 1.1 mrg do { \
155 1.1 mrg mp_limb_t __q, __prime; \
156 1.1 mrg __prime = (P); \
157 1.1 mrg FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
158 1.1 mrg __q = (N); \
159 1.1 mrg do { \
160 1.1 mrg __q /= __prime; \
161 1.1 mrg if ((__q & 1) != 0) (PR) *= __prime; \
162 1.1 mrg } while (__q >= __prime); \
163 1.1 mrg } while (0)
164 1.1 mrg #endif
165 1.1 mrg
166 1.1 mrg #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
167 1.1 mrg do { \
168 1.1 mrg mp_limb_t __prime; \
169 1.1 mrg __prime = (P); \
170 1.1 mrg if ((((N) / __prime) & 1) != 0) \
171 1.1 mrg FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
172 1.1 mrg } while (0)
173 1.1 mrg
174 1.1 mrg /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
175 1.1 mrg factorial of the parameter n. The result x is an odd positive
176 1.1 mrg integer so that multiswing(n,2) = x 2^a.
177 1.1 mrg
178 1.1 mrg Uses the algorithm described by Peter Luschny in "Divide, Swing and
179 1.1 mrg Conquer the Factorial!".
180 1.1 mrg
181 1.1 mrg The pointer sieve points to primesieve_size(n) limbs containing a
182 1.1 mrg bit-array where primes are marked as 0.
183 1.1 mrg Enough (FIXME: explain :-) limbs must be pointed by factors.
184 1.1 mrg */
185 1.1 mrg
186 1.1 mrg static void
187 1.1 mrg mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
188 1.1 mrg {
189 1.1 mrg mp_limb_t prod, max_prod;
190 1.1 mrg mp_size_t j;
191 1.1 mrg
192 1.1 mrg ASSERT (n >= 26);
193 1.1 mrg
194 1.1 mrg j = 0;
195 1.1 mrg prod = -(n & 1);
196 1.1 mrg n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
197 1.1 mrg
198 1.1 mrg prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
199 1.1 mrg max_prod = GMP_NUMB_MAX / (n-1);
200 1.1 mrg
201 1.1 mrg /* Handle prime = 3 separately. */
202 1.1 mrg SWING_A_PRIME (3, n, prod, max_prod, factors, j);
203 1.1 mrg
204 1.1 mrg /* Swing primes from 5 to n/3 */
205 1.1 mrg {
206 1.1 mrg mp_limb_t s;
207 1.1 mrg
208 1.1 mrg {
209 1.1 mrg mp_limb_t prime;
210 1.1 mrg
211 1.1 mrg s = limb_apprsqrt(n);
212 1.1 mrg ASSERT (s >= 5);
213 1.1 mrg s = n_to_bit (s);
214 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
215 1.1 mrg SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
216 1.1 mrg LOOP_ON_SIEVE_END;
217 1.1 mrg s++;
218 1.1 mrg }
219 1.1 mrg
220 1.1 mrg ASSERT (max_prod <= GMP_NUMB_MAX / 3);
221 1.1 mrg ASSERT (bit_to_n (s) * bit_to_n (s) > n);
222 1.1 mrg ASSERT (s <= n_to_bit (n / 3));
223 1.1 mrg {
224 1.1 mrg mp_limb_t prime;
225 1.1 mrg mp_limb_t l_max_prod = max_prod * 3;
226 1.1 mrg
227 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
228 1.1 mrg SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
229 1.1 mrg LOOP_ON_SIEVE_END;
230 1.1 mrg }
231 1.1 mrg }
232 1.1 mrg
233 1.1 mrg /* Store primes from (n+1)/2 to n */
234 1.1 mrg {
235 1.1 mrg mp_limb_t prime;
236 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
237 1.1 mrg FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
238 1.1 mrg LOOP_ON_SIEVE_END;
239 1.1 mrg }
240 1.1 mrg
241 1.1 mrg if (LIKELY (j != 0))
242 1.1 mrg {
243 1.1 mrg factors[j++] = prod;
244 1.1 mrg mpz_prodlimbs (x, factors, j);
245 1.1 mrg }
246 1.1 mrg else
247 1.1 mrg {
248 1.1 mrg PTR (x)[0] = prod;
249 1.1 mrg SIZ (x) = 1;
250 1.1 mrg }
251 1.1 mrg }
252 1.1 mrg
253 1.1 mrg #undef SWING_A_PRIME
254 1.1 mrg #undef SH_SWING_A_PRIME
255 1.1 mrg #undef LOOP_ON_SIEVE_END
256 1.1 mrg #undef LOOP_ON_SIEVE_STOP
257 1.1 mrg #undef LOOP_ON_SIEVE_BEGIN
258 1.1 mrg #undef LOOP_ON_SIEVE_CONTINUE
259 1.1 mrg #undef FACTOR_LIST_APPEND
260 1.1 mrg
261 1.1 mrg /*********************************************************/
262 1.1 mrg /* Section oddfac: odd factorial, needed also by binomial*/
263 1.1 mrg /*********************************************************/
264 1.1 mrg
265 1.1 mrg #if TUNE_PROGRAM_BUILD
266 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
267 1.1 mrg #else
268 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
269 1.1 mrg #endif
270 1.1 mrg
271 1.1 mrg /* mpz_oddfac_1 computes the odd part of the factorial of the
272 1.1 mrg parameter n. I.e. n! = x 2^a, where x is the returned value: an
273 1.1 mrg odd positive integer.
274 1.1 mrg
275 1.1 mrg If flag != 0 a square is skipped in the DSC part, e.g.
276 1.1 mrg if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
277 1.1 mrg
278 1.1 mrg If n is too small, flag is ignored, and an ASSERT can be triggered.
279 1.1 mrg
280 1.1 mrg TODO: FAC_DSC_THRESHOLD is used here with two different roles:
281 1.1 mrg - to decide when prime factorisation is needed,
282 1.1 mrg - to stop the recursion, once sieving is done.
283 1.1 mrg Maybe two thresholds can do a better job.
284 1.1 mrg */
285 1.1 mrg void
286 1.1 mrg mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
287 1.1 mrg {
288 1.1 mrg ASSERT (n <= GMP_NUMB_MAX);
289 1.1 mrg ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
290 1.1 mrg
291 1.1 mrg if (n <= ODD_FACTORIAL_TABLE_LIMIT)
292 1.1 mrg {
293 1.1 mrg PTR (x)[0] = __gmp_oddfac_table[n];
294 1.1 mrg SIZ (x) = 1;
295 1.1 mrg }
296 1.1 mrg else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
297 1.1 mrg {
298 1.1 mrg mp_ptr px;
299 1.1 mrg
300 1.1 mrg px = MPZ_NEWALLOC (x, 2);
301 1.1 mrg umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
302 1.1 mrg SIZ (x) = 2;
303 1.1 mrg }
304 1.1 mrg else
305 1.1 mrg {
306 1.1 mrg unsigned s;
307 1.1 mrg mp_ptr factors;
308 1.1 mrg
309 1.1 mrg s = 0;
310 1.1 mrg {
311 1.1 mrg mp_limb_t tn;
312 1.1 mrg mp_limb_t prod, max_prod, i;
313 1.1 mrg mp_size_t j;
314 1.1 mrg TMP_SDECL;
315 1.1 mrg
316 1.1 mrg #if TUNE_PROGRAM_BUILD
317 1.1 mrg ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
318 1.1 mrg ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
319 1.1 mrg #endif
320 1.1 mrg
321 1.1 mrg /* Compute the number of recursive steps for the DSC algorithm. */
322 1.1 mrg for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
323 1.1 mrg tn >>= 1;
324 1.1 mrg
325 1.1 mrg j = 0;
326 1.1 mrg
327 1.1 mrg TMP_SMARK;
328 1.1 mrg factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
329 1.1 mrg ASSERT (tn >= FACTORS_PER_LIMB);
330 1.1 mrg
331 1.1 mrg prod = 1;
332 1.1 mrg #if TUNE_PROGRAM_BUILD
333 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
334 1.1 mrg #else
335 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
336 1.1 mrg #endif
337 1.1 mrg
338 1.1 mrg ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
339 1.1 mrg do {
340 1.1 mrg i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
341 1.1 mrg factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
342 1.1 mrg do {
343 1.1 mrg FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
344 1.1 mrg i += 2;
345 1.1 mrg } while (i <= tn);
346 1.1 mrg max_prod <<= 1;
347 1.1 mrg tn >>= 1;
348 1.1 mrg } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
349 1.1 mrg
350 1.1 mrg factors[j++] = prod;
351 1.1 mrg factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
352 1.1 mrg factors[j++] = __gmp_oddfac_table[tn >> 1];
353 1.1 mrg mpz_prodlimbs (x, factors, j);
354 1.1 mrg
355 1.1 mrg TMP_SFREE;
356 1.1 mrg }
357 1.1 mrg
358 1.1 mrg if (s != 0)
359 1.1 mrg /* Use the algorithm described by Peter Luschny in "Divide,
360 1.1 mrg Swing and Conquer the Factorial!".
361 1.1 mrg
362 1.1 mrg Improvement: there are two temporary buffers, factors and
363 1.1 mrg square, that are never used together; with a good estimate
364 1.1 mrg of the maximal needed size, they could share a single
365 1.1 mrg allocation.
366 1.1 mrg */
367 1.1 mrg {
368 1.1 mrg mpz_t mswing;
369 1.1 mrg mp_ptr sieve;
370 1.1 mrg mp_size_t size;
371 1.1 mrg TMP_DECL;
372 1.1 mrg
373 1.1 mrg TMP_MARK;
374 1.1 mrg
375 1.1 mrg flag--;
376 1.1 mrg size = n / GMP_NUMB_BITS + 4;
377 1.1 mrg ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
378 1.1 mrg /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
379 1.1 mrg one more can be overwritten by mul, another for the sieve */
380 1.1 mrg MPZ_TMP_INIT (mswing, size);
381 1.1 mrg /* Initialize size, so that ASSERT can check it correctly. */
382 1.1 mrg ASSERT_CODE (SIZ (mswing) = 0);
383 1.1 mrg
384 1.1 mrg /* Put the sieve on the second half, it will be overwritten by the last mswing. */
385 1.1 mrg sieve = PTR (mswing) + size / 2 + 1;
386 1.1 mrg
387 1.1 mrg size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
388 1.1 mrg
389 1.1 mrg factors = TMP_ALLOC_LIMBS (size);
390 1.1 mrg do {
391 1.1 mrg mp_ptr square, px;
392 1.1 mrg mp_size_t nx, ns;
393 1.1 mrg mp_limb_t cy;
394 1.1 mrg TMP_DECL;
395 1.1 mrg
396 1.1 mrg s--;
397 1.1 mrg ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
398 1.1 mrg mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
399 1.1 mrg
400 1.1 mrg TMP_MARK;
401 1.1 mrg nx = SIZ (x);
402 1.1 mrg if (s == flag) {
403 1.1 mrg size = nx;
404 1.1 mrg square = TMP_ALLOC_LIMBS (size);
405 1.1 mrg MPN_COPY (square, PTR (x), nx);
406 1.1 mrg } else {
407 1.1 mrg size = nx << 1;
408 1.1 mrg square = TMP_ALLOC_LIMBS (size);
409 1.1 mrg mpn_sqr (square, PTR (x), nx);
410 1.1 mrg size -= (square[size - 1] == 0);
411 1.1 mrg }
412 1.1 mrg ns = SIZ (mswing);
413 1.1 mrg nx = size + ns;
414 1.1 mrg px = MPZ_NEWALLOC (x, nx);
415 1.1 mrg ASSERT (ns <= size);
416 1.1 mrg cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
417 1.1 mrg
418 1.1 mrg TMP_FREE;
419 1.1 mrg SIZ(x) = nx - (cy == 0);
420 1.1 mrg } while (s != 0);
421 1.1 mrg TMP_FREE;
422 1.1 mrg }
423 1.1 mrg }
424 1.1 mrg }
425 1.1 mrg
426 1.1 mrg #undef FACTORS_PER_LIMB
427 1.1 mrg #undef FACTOR_LIST_STORE
428