oddfac_1.c revision 1.1 1 1.1 mrg /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Marco Bodrato.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 1.1 mrg IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 1.1 mrg IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 1.1 mrg DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 1.1 mrg
10 1.1 mrg Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
11 1.1 mrg
12 1.1 mrg This file is part of the GNU MP Library.
13 1.1 mrg
14 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
15 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
16 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
17 1.1 mrg option) any later version.
18 1.1 mrg
19 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
20 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 1.1 mrg License for more details.
23 1.1 mrg
24 1.1 mrg You should have received a copy of the GNU Lesser General Public License
25 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26 1.1 mrg
27 1.1 mrg #include "gmp.h"
28 1.1 mrg #include "gmp-impl.h"
29 1.1 mrg #include "longlong.h"
30 1.1 mrg
31 1.1 mrg /* TODO:
32 1.1 mrg - split this file in smaller parts with functions that can be recycled for different computations.
33 1.1 mrg */
34 1.1 mrg
35 1.1 mrg /**************************************************************/
36 1.1 mrg /* Section macros: common macros, for mswing/fac/bin (&sieve) */
37 1.1 mrg /**************************************************************/
38 1.1 mrg
39 1.1 mrg #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
40 1.1 mrg if ((PR) > (MAX_PR)) { \
41 1.1 mrg (VEC)[(I)++] = (PR); \
42 1.1 mrg (PR) = 1; \
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
46 1.1 mrg do { \
47 1.1 mrg if ((PR) > (MAX_PR)) { \
48 1.1 mrg (VEC)[(I)++] = (PR); \
49 1.1 mrg (PR) = (P); \
50 1.1 mrg } else \
51 1.1 mrg (PR) *= (P); \
52 1.1 mrg } while (0)
53 1.1 mrg
54 1.1 mrg #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
55 1.1 mrg __max_i = (end); \
56 1.1 mrg \
57 1.1 mrg do { \
58 1.1 mrg ++__i; \
59 1.1 mrg if (((sieve)[__index] & __mask) == 0) \
60 1.1 mrg { \
61 1.1 mrg (prime) = id_to_n(__i)
62 1.1 mrg
63 1.1 mrg #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
64 1.1 mrg do { \
65 1.1 mrg mp_limb_t __mask, __index, __max_i, __i; \
66 1.1 mrg \
67 1.1 mrg __i = (start)-(off); \
68 1.1 mrg __index = __i / GMP_LIMB_BITS; \
69 1.1 mrg __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
70 1.1 mrg __i += (off); \
71 1.1 mrg \
72 1.1 mrg LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
73 1.1 mrg
74 1.1 mrg #define LOOP_ON_SIEVE_STOP \
75 1.1 mrg } \
76 1.1 mrg __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
77 1.1 mrg __index += __mask & 1; \
78 1.1 mrg } while (__i <= __max_i) \
79 1.1 mrg
80 1.1 mrg #define LOOP_ON_SIEVE_END \
81 1.1 mrg LOOP_ON_SIEVE_STOP; \
82 1.1 mrg } while (0)
83 1.1 mrg
84 1.1 mrg /*********************************************************/
85 1.1 mrg /* Section sieve: sieving functions and tools for primes */
86 1.1 mrg /*********************************************************/
87 1.1 mrg
88 1.1 mrg #if WANT_ASSERT
89 1.1 mrg static mp_limb_t
90 1.1 mrg bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
91 1.1 mrg #endif
92 1.1 mrg
93 1.1 mrg /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
94 1.1 mrg static mp_limb_t
95 1.1 mrg id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
96 1.1 mrg
97 1.1 mrg /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
98 1.1 mrg static mp_limb_t
99 1.1 mrg n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
100 1.1 mrg
101 1.1 mrg #if WANT_ASSERT
102 1.1 mrg static mp_size_t
103 1.1 mrg primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
104 1.1 mrg #endif
105 1.1 mrg
106 1.1 mrg /*********************************************************/
107 1.1 mrg /* Section mswing: 2-multiswing factorial */
108 1.1 mrg /*********************************************************/
109 1.1 mrg
110 1.1 mrg /* Returns an approximation of the sqare root of x. *
111 1.1 mrg * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
112 1.1 mrg static mp_limb_t
113 1.1 mrg limb_apprsqrt (mp_limb_t x)
114 1.1 mrg {
115 1.1 mrg int s;
116 1.1 mrg
117 1.1 mrg ASSERT (x > 2);
118 1.1 mrg count_leading_zeros (s, x - 1);
119 1.1 mrg s = GMP_LIMB_BITS - 1 - s;
120 1.1 mrg return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
121 1.1 mrg }
122 1.1 mrg
123 1.1 mrg #if 0
124 1.1 mrg /* A count-then-exponentiate variant for SWING_A_PRIME */
125 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
126 1.1 mrg do { \
127 1.1 mrg mp_limb_t __q, __prime; \
128 1.1 mrg int __exp; \
129 1.1 mrg __prime = (P); \
130 1.1 mrg __exp = 0; \
131 1.1 mrg __q = (N); \
132 1.1 mrg do { \
133 1.1 mrg __q /= __prime; \
134 1.1 mrg __exp += __q & 1; \
135 1.1 mrg } while (__q >= __prime); \
136 1.1 mrg if (__exp) { /* Store $prime^{exp}$ */ \
137 1.1 mrg for (__q = __prime; --__exp; __q *= __prime); \
138 1.1 mrg FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
139 1.1 mrg }; \
140 1.1 mrg } while (0)
141 1.1 mrg #else
142 1.1 mrg #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
143 1.1 mrg do { \
144 1.1 mrg mp_limb_t __q, __prime; \
145 1.1 mrg __prime = (P); \
146 1.1 mrg FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
147 1.1 mrg __q = (N); \
148 1.1 mrg do { \
149 1.1 mrg __q /= __prime; \
150 1.1 mrg if ((__q & 1) != 0) (PR) *= __prime; \
151 1.1 mrg } while (__q >= __prime); \
152 1.1 mrg } while (0)
153 1.1 mrg #endif
154 1.1 mrg
155 1.1 mrg #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
156 1.1 mrg do { \
157 1.1 mrg mp_limb_t __prime; \
158 1.1 mrg __prime = (P); \
159 1.1 mrg if ((((N) / __prime) & 1) != 0) \
160 1.1 mrg FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
161 1.1 mrg } while (0)
162 1.1 mrg
163 1.1 mrg /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
164 1.1 mrg factorial of the parameter n. The result x is an odd positive
165 1.1 mrg integer so that multiswing(n,2) = x 2^a.
166 1.1 mrg
167 1.1 mrg Uses the algorithm described by Peter Luschny in "Divide, Swing and
168 1.1 mrg Conquer the Factorial!".
169 1.1 mrg
170 1.1 mrg The pointer sieve points to primesieve_size(n) limbs containing a
171 1.1 mrg bit-array where primes are marked as 0.
172 1.1 mrg Enough (FIXME: explain :-) limbs must be pointed by factors.
173 1.1 mrg */
174 1.1 mrg
175 1.1 mrg static void
176 1.1 mrg mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
177 1.1 mrg {
178 1.1 mrg mp_limb_t prod, max_prod;
179 1.1 mrg mp_size_t j;
180 1.1 mrg
181 1.1 mrg ASSERT (n >= 26);
182 1.1 mrg
183 1.1 mrg j = 0;
184 1.1 mrg prod = -(n & 1);
185 1.1 mrg n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
186 1.1 mrg
187 1.1 mrg prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
188 1.1 mrg max_prod = GMP_NUMB_MAX / (n-1);
189 1.1 mrg
190 1.1 mrg /* Handle prime = 3 separately. */
191 1.1 mrg SWING_A_PRIME (3, n, prod, max_prod, factors, j);
192 1.1 mrg
193 1.1 mrg /* Swing primes from 5 to n/3 */
194 1.1 mrg {
195 1.1 mrg mp_limb_t s;
196 1.1 mrg
197 1.1 mrg {
198 1.1 mrg mp_limb_t prime;
199 1.1 mrg
200 1.1 mrg s = limb_apprsqrt(n);
201 1.1 mrg ASSERT (s >= 5);
202 1.1 mrg s = n_to_bit (s);
203 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
204 1.1 mrg SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
205 1.1 mrg LOOP_ON_SIEVE_END;
206 1.1 mrg s++;
207 1.1 mrg }
208 1.1 mrg
209 1.1 mrg ASSERT (max_prod <= GMP_NUMB_MAX / 3);
210 1.1 mrg ASSERT (bit_to_n (s) * bit_to_n (s) > n);
211 1.1 mrg ASSERT (s <= n_to_bit (n / 3));
212 1.1 mrg {
213 1.1 mrg mp_limb_t prime;
214 1.1 mrg mp_limb_t l_max_prod = max_prod * 3;
215 1.1 mrg
216 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
217 1.1 mrg SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
218 1.1 mrg LOOP_ON_SIEVE_END;
219 1.1 mrg }
220 1.1 mrg }
221 1.1 mrg
222 1.1 mrg /* Store primes from (n+1)/2 to n */
223 1.1 mrg {
224 1.1 mrg mp_limb_t prime;
225 1.1 mrg LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
226 1.1 mrg FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
227 1.1 mrg LOOP_ON_SIEVE_END;
228 1.1 mrg }
229 1.1 mrg
230 1.1 mrg if (LIKELY (j != 0))
231 1.1 mrg {
232 1.1 mrg factors[j++] = prod;
233 1.1 mrg mpz_prodlimbs (x, factors, j);
234 1.1 mrg }
235 1.1 mrg else
236 1.1 mrg {
237 1.1 mrg PTR (x)[0] = prod;
238 1.1 mrg SIZ (x) = 1;
239 1.1 mrg }
240 1.1 mrg }
241 1.1 mrg
242 1.1 mrg #undef SWING_A_PRIME
243 1.1 mrg #undef SH_SWING_A_PRIME
244 1.1 mrg #undef LOOP_ON_SIEVE_END
245 1.1 mrg #undef LOOP_ON_SIEVE_STOP
246 1.1 mrg #undef LOOP_ON_SIEVE_BEGIN
247 1.1 mrg #undef LOOP_ON_SIEVE_CONTINUE
248 1.1 mrg #undef FACTOR_LIST_APPEND
249 1.1 mrg
250 1.1 mrg /*********************************************************/
251 1.1 mrg /* Section oddfac: odd factorial, needed also by binomial*/
252 1.1 mrg /*********************************************************/
253 1.1 mrg
254 1.1 mrg #if TUNE_PROGRAM_BUILD
255 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
256 1.1 mrg #else
257 1.1 mrg #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
258 1.1 mrg #endif
259 1.1 mrg
260 1.1 mrg /* mpz_oddfac_1 computes the odd part of the factorial of the
261 1.1 mrg parameter n. I.e. n! = x 2^a, where x is the returned value: an
262 1.1 mrg odd positive integer.
263 1.1 mrg
264 1.1 mrg If flag != 0 a square is skipped in the DSC part, e.g.
265 1.1 mrg if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
266 1.1 mrg
267 1.1 mrg If n is too small, flag is ignored, and an ASSERT can be triggered.
268 1.1 mrg
269 1.1 mrg TODO: FAC_DSC_THRESHOLD is used here with two different roles:
270 1.1 mrg - to decide when prime factorisation is needed,
271 1.1 mrg - to stop the recursion, once sieving is done.
272 1.1 mrg Maybe two thresholds can do a better job.
273 1.1 mrg */
274 1.1 mrg void
275 1.1 mrg mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
276 1.1 mrg {
277 1.1 mrg ASSERT (n <= GMP_NUMB_MAX);
278 1.1 mrg ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
279 1.1 mrg
280 1.1 mrg if (n <= ODD_FACTORIAL_TABLE_LIMIT)
281 1.1 mrg {
282 1.1 mrg PTR (x)[0] = __gmp_oddfac_table[n];
283 1.1 mrg SIZ (x) = 1;
284 1.1 mrg }
285 1.1 mrg else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
286 1.1 mrg {
287 1.1 mrg mp_ptr px;
288 1.1 mrg
289 1.1 mrg px = MPZ_NEWALLOC (x, 2);
290 1.1 mrg umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
291 1.1 mrg SIZ (x) = 2;
292 1.1 mrg }
293 1.1 mrg else
294 1.1 mrg {
295 1.1 mrg unsigned s;
296 1.1 mrg mp_ptr factors;
297 1.1 mrg
298 1.1 mrg s = 0;
299 1.1 mrg {
300 1.1 mrg mp_limb_t tn;
301 1.1 mrg mp_limb_t prod, max_prod, i;
302 1.1 mrg mp_size_t j;
303 1.1 mrg TMP_SDECL;
304 1.1 mrg
305 1.1 mrg #if TUNE_PROGRAM_BUILD
306 1.1 mrg ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
307 1.1 mrg ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
308 1.1 mrg #endif
309 1.1 mrg
310 1.1 mrg /* Compute the number of recursive steps for the DSC algorithm. */
311 1.1 mrg for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
312 1.1 mrg tn >>= 1;
313 1.1 mrg
314 1.1 mrg j = 0;
315 1.1 mrg
316 1.1 mrg TMP_SMARK;
317 1.1 mrg factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
318 1.1 mrg ASSERT (tn >= FACTORS_PER_LIMB);
319 1.1 mrg
320 1.1 mrg prod = 1;
321 1.1 mrg #if TUNE_PROGRAM_BUILD
322 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
323 1.1 mrg #else
324 1.1 mrg max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
325 1.1 mrg #endif
326 1.1 mrg
327 1.1 mrg ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
328 1.1 mrg do {
329 1.1 mrg i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
330 1.1 mrg factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
331 1.1 mrg do {
332 1.1 mrg FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
333 1.1 mrg i += 2;
334 1.1 mrg } while (i <= tn);
335 1.1 mrg max_prod <<= 1;
336 1.1 mrg tn >>= 1;
337 1.1 mrg } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
338 1.1 mrg
339 1.1 mrg factors[j++] = prod;
340 1.1 mrg factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
341 1.1 mrg factors[j++] = __gmp_oddfac_table[tn >> 1];
342 1.1 mrg mpz_prodlimbs (x, factors, j);
343 1.1 mrg
344 1.1 mrg TMP_SFREE;
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg if (s != 0)
348 1.1 mrg /* Use the algorithm described by Peter Luschny in "Divide,
349 1.1 mrg Swing and Conquer the Factorial!".
350 1.1 mrg
351 1.1 mrg Improvement: there are two temporary buffers, factors and
352 1.1 mrg square, that are never used together; with a good estimate
353 1.1 mrg of the maximal needed size, they could share a single
354 1.1 mrg allocation.
355 1.1 mrg */
356 1.1 mrg {
357 1.1 mrg mpz_t mswing;
358 1.1 mrg mp_ptr sieve;
359 1.1 mrg mp_size_t size;
360 1.1 mrg TMP_DECL;
361 1.1 mrg
362 1.1 mrg TMP_MARK;
363 1.1 mrg
364 1.1 mrg flag--;
365 1.1 mrg size = n / GMP_NUMB_BITS + 4;
366 1.1 mrg ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
367 1.1 mrg /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
368 1.1 mrg one more can be overwritten by mul, another for the sieve */
369 1.1 mrg MPZ_TMP_INIT (mswing, size);
370 1.1 mrg /* Initialize size, so that ASSERT can check it correctly. */
371 1.1 mrg ASSERT_CODE (SIZ (mswing) = 0);
372 1.1 mrg
373 1.1 mrg /* Put the sieve on the second half, it will be overwritten by the last mswing. */
374 1.1 mrg sieve = PTR (mswing) + size / 2 + 1;
375 1.1 mrg
376 1.1 mrg size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
377 1.1 mrg
378 1.1 mrg factors = TMP_ALLOC_LIMBS (size);
379 1.1 mrg do {
380 1.1 mrg mp_ptr square, px;
381 1.1 mrg mp_size_t nx, ns;
382 1.1 mrg mp_limb_t cy;
383 1.1 mrg TMP_DECL;
384 1.1 mrg
385 1.1 mrg s--;
386 1.1 mrg ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
387 1.1 mrg mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
388 1.1 mrg
389 1.1 mrg TMP_MARK;
390 1.1 mrg nx = SIZ (x);
391 1.1 mrg if (s == flag) {
392 1.1 mrg size = nx;
393 1.1 mrg square = TMP_ALLOC_LIMBS (size);
394 1.1 mrg MPN_COPY (square, PTR (x), nx);
395 1.1 mrg } else {
396 1.1 mrg size = nx << 1;
397 1.1 mrg square = TMP_ALLOC_LIMBS (size);
398 1.1 mrg mpn_sqr (square, PTR (x), nx);
399 1.1 mrg size -= (square[size - 1] == 0);
400 1.1 mrg }
401 1.1 mrg ns = SIZ (mswing);
402 1.1 mrg nx = size + ns;
403 1.1 mrg px = MPZ_NEWALLOC (x, nx);
404 1.1 mrg ASSERT (ns <= size);
405 1.1 mrg cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
406 1.1 mrg
407 1.1 mrg TMP_FREE;
408 1.1 mrg SIZ(x) = nx - (cy == 0);
409 1.1 mrg } while (s != 0);
410 1.1 mrg TMP_FREE;
411 1.1 mrg }
412 1.1 mrg }
413 1.1 mrg }
414 1.1 mrg
415 1.1 mrg #undef FACTORS_PER_LIMB
416 1.1 mrg #undef FACTOR_LIST_STORE
417