oddfac_1.c revision 1.1 1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10 Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 #include "longlong.h"
30
31 /* TODO:
32 - split this file in smaller parts with functions that can be recycled for different computations.
33 */
34
35 /**************************************************************/
36 /* Section macros: common macros, for mswing/fac/bin (&sieve) */
37 /**************************************************************/
38
39 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
40 if ((PR) > (MAX_PR)) { \
41 (VEC)[(I)++] = (PR); \
42 (PR) = 1; \
43 }
44
45 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
46 do { \
47 if ((PR) > (MAX_PR)) { \
48 (VEC)[(I)++] = (PR); \
49 (PR) = (P); \
50 } else \
51 (PR) *= (P); \
52 } while (0)
53
54 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
55 __max_i = (end); \
56 \
57 do { \
58 ++__i; \
59 if (((sieve)[__index] & __mask) == 0) \
60 { \
61 (prime) = id_to_n(__i)
62
63 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
64 do { \
65 mp_limb_t __mask, __index, __max_i, __i; \
66 \
67 __i = (start)-(off); \
68 __index = __i / GMP_LIMB_BITS; \
69 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
70 __i += (off); \
71 \
72 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
73
74 #define LOOP_ON_SIEVE_STOP \
75 } \
76 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
77 __index += __mask & 1; \
78 } while (__i <= __max_i) \
79
80 #define LOOP_ON_SIEVE_END \
81 LOOP_ON_SIEVE_STOP; \
82 } while (0)
83
84 /*********************************************************/
85 /* Section sieve: sieving functions and tools for primes */
86 /*********************************************************/
87
88 #if WANT_ASSERT
89 static mp_limb_t
90 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
91 #endif
92
93 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
94 static mp_limb_t
95 id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
96
97 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
98 static mp_limb_t
99 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
100
101 #if WANT_ASSERT
102 static mp_size_t
103 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
104 #endif
105
106 /*********************************************************/
107 /* Section mswing: 2-multiswing factorial */
108 /*********************************************************/
109
110 /* Returns an approximation of the sqare root of x. *
111 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
112 static mp_limb_t
113 limb_apprsqrt (mp_limb_t x)
114 {
115 int s;
116
117 ASSERT (x > 2);
118 count_leading_zeros (s, x - 1);
119 s = GMP_LIMB_BITS - 1 - s;
120 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
121 }
122
123 #if 0
124 /* A count-then-exponentiate variant for SWING_A_PRIME */
125 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
126 do { \
127 mp_limb_t __q, __prime; \
128 int __exp; \
129 __prime = (P); \
130 __exp = 0; \
131 __q = (N); \
132 do { \
133 __q /= __prime; \
134 __exp += __q & 1; \
135 } while (__q >= __prime); \
136 if (__exp) { /* Store $prime^{exp}$ */ \
137 for (__q = __prime; --__exp; __q *= __prime); \
138 FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
139 }; \
140 } while (0)
141 #else
142 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
143 do { \
144 mp_limb_t __q, __prime; \
145 __prime = (P); \
146 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
147 __q = (N); \
148 do { \
149 __q /= __prime; \
150 if ((__q & 1) != 0) (PR) *= __prime; \
151 } while (__q >= __prime); \
152 } while (0)
153 #endif
154
155 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
156 do { \
157 mp_limb_t __prime; \
158 __prime = (P); \
159 if ((((N) / __prime) & 1) != 0) \
160 FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
161 } while (0)
162
163 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
164 factorial of the parameter n. The result x is an odd positive
165 integer so that multiswing(n,2) = x 2^a.
166
167 Uses the algorithm described by Peter Luschny in "Divide, Swing and
168 Conquer the Factorial!".
169
170 The pointer sieve points to primesieve_size(n) limbs containing a
171 bit-array where primes are marked as 0.
172 Enough (FIXME: explain :-) limbs must be pointed by factors.
173 */
174
175 static void
176 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
177 {
178 mp_limb_t prod, max_prod;
179 mp_size_t j;
180
181 ASSERT (n >= 26);
182
183 j = 0;
184 prod = -(n & 1);
185 n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
186
187 prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
188 max_prod = GMP_NUMB_MAX / (n-1);
189
190 /* Handle prime = 3 separately. */
191 SWING_A_PRIME (3, n, prod, max_prod, factors, j);
192
193 /* Swing primes from 5 to n/3 */
194 {
195 mp_limb_t s;
196
197 {
198 mp_limb_t prime;
199
200 s = limb_apprsqrt(n);
201 ASSERT (s >= 5);
202 s = n_to_bit (s);
203 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
204 SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
205 LOOP_ON_SIEVE_END;
206 s++;
207 }
208
209 ASSERT (max_prod <= GMP_NUMB_MAX / 3);
210 ASSERT (bit_to_n (s) * bit_to_n (s) > n);
211 ASSERT (s <= n_to_bit (n / 3));
212 {
213 mp_limb_t prime;
214 mp_limb_t l_max_prod = max_prod * 3;
215
216 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
217 SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
218 LOOP_ON_SIEVE_END;
219 }
220 }
221
222 /* Store primes from (n+1)/2 to n */
223 {
224 mp_limb_t prime;
225 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
226 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
227 LOOP_ON_SIEVE_END;
228 }
229
230 if (LIKELY (j != 0))
231 {
232 factors[j++] = prod;
233 mpz_prodlimbs (x, factors, j);
234 }
235 else
236 {
237 PTR (x)[0] = prod;
238 SIZ (x) = 1;
239 }
240 }
241
242 #undef SWING_A_PRIME
243 #undef SH_SWING_A_PRIME
244 #undef LOOP_ON_SIEVE_END
245 #undef LOOP_ON_SIEVE_STOP
246 #undef LOOP_ON_SIEVE_BEGIN
247 #undef LOOP_ON_SIEVE_CONTINUE
248 #undef FACTOR_LIST_APPEND
249
250 /*********************************************************/
251 /* Section oddfac: odd factorial, needed also by binomial*/
252 /*********************************************************/
253
254 #if TUNE_PROGRAM_BUILD
255 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
256 #else
257 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
258 #endif
259
260 /* mpz_oddfac_1 computes the odd part of the factorial of the
261 parameter n. I.e. n! = x 2^a, where x is the returned value: an
262 odd positive integer.
263
264 If flag != 0 a square is skipped in the DSC part, e.g.
265 if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
266
267 If n is too small, flag is ignored, and an ASSERT can be triggered.
268
269 TODO: FAC_DSC_THRESHOLD is used here with two different roles:
270 - to decide when prime factorisation is needed,
271 - to stop the recursion, once sieving is done.
272 Maybe two thresholds can do a better job.
273 */
274 void
275 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
276 {
277 ASSERT (n <= GMP_NUMB_MAX);
278 ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
279
280 if (n <= ODD_FACTORIAL_TABLE_LIMIT)
281 {
282 PTR (x)[0] = __gmp_oddfac_table[n];
283 SIZ (x) = 1;
284 }
285 else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
286 {
287 mp_ptr px;
288
289 px = MPZ_NEWALLOC (x, 2);
290 umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
291 SIZ (x) = 2;
292 }
293 else
294 {
295 unsigned s;
296 mp_ptr factors;
297
298 s = 0;
299 {
300 mp_limb_t tn;
301 mp_limb_t prod, max_prod, i;
302 mp_size_t j;
303 TMP_SDECL;
304
305 #if TUNE_PROGRAM_BUILD
306 ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
307 ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
308 #endif
309
310 /* Compute the number of recursive steps for the DSC algorithm. */
311 for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
312 tn >>= 1;
313
314 j = 0;
315
316 TMP_SMARK;
317 factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
318 ASSERT (tn >= FACTORS_PER_LIMB);
319
320 prod = 1;
321 #if TUNE_PROGRAM_BUILD
322 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
323 #else
324 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
325 #endif
326
327 ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
328 do {
329 i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
330 factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
331 do {
332 FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
333 i += 2;
334 } while (i <= tn);
335 max_prod <<= 1;
336 tn >>= 1;
337 } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
338
339 factors[j++] = prod;
340 factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
341 factors[j++] = __gmp_oddfac_table[tn >> 1];
342 mpz_prodlimbs (x, factors, j);
343
344 TMP_SFREE;
345 }
346
347 if (s != 0)
348 /* Use the algorithm described by Peter Luschny in "Divide,
349 Swing and Conquer the Factorial!".
350
351 Improvement: there are two temporary buffers, factors and
352 square, that are never used together; with a good estimate
353 of the maximal needed size, they could share a single
354 allocation.
355 */
356 {
357 mpz_t mswing;
358 mp_ptr sieve;
359 mp_size_t size;
360 TMP_DECL;
361
362 TMP_MARK;
363
364 flag--;
365 size = n / GMP_NUMB_BITS + 4;
366 ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
367 /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
368 one more can be overwritten by mul, another for the sieve */
369 MPZ_TMP_INIT (mswing, size);
370 /* Initialize size, so that ASSERT can check it correctly. */
371 ASSERT_CODE (SIZ (mswing) = 0);
372
373 /* Put the sieve on the second half, it will be overwritten by the last mswing. */
374 sieve = PTR (mswing) + size / 2 + 1;
375
376 size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
377
378 factors = TMP_ALLOC_LIMBS (size);
379 do {
380 mp_ptr square, px;
381 mp_size_t nx, ns;
382 mp_limb_t cy;
383 TMP_DECL;
384
385 s--;
386 ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
387 mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
388
389 TMP_MARK;
390 nx = SIZ (x);
391 if (s == flag) {
392 size = nx;
393 square = TMP_ALLOC_LIMBS (size);
394 MPN_COPY (square, PTR (x), nx);
395 } else {
396 size = nx << 1;
397 square = TMP_ALLOC_LIMBS (size);
398 mpn_sqr (square, PTR (x), nx);
399 size -= (square[size - 1] == 0);
400 }
401 ns = SIZ (mswing);
402 nx = size + ns;
403 px = MPZ_NEWALLOC (x, nx);
404 ASSERT (ns <= size);
405 cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
406
407 TMP_FREE;
408 SIZ(x) = nx - (cy == 0);
409 } while (s != 0);
410 TMP_FREE;
411 }
412 }
413 }
414
415 #undef FACTORS_PER_LIMB
416 #undef FACTOR_LIST_STORE
417