oddfac_1.c revision 1.1.1.2 1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10 Copyright 2010-2012 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 #include "longlong.h"
41
42 /* TODO:
43 - split this file in smaller parts with functions that can be recycled for different computations.
44 */
45
46 /**************************************************************/
47 /* Section macros: common macros, for mswing/fac/bin (&sieve) */
48 /**************************************************************/
49
50 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
51 if ((PR) > (MAX_PR)) { \
52 (VEC)[(I)++] = (PR); \
53 (PR) = 1; \
54 }
55
56 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
57 do { \
58 if ((PR) > (MAX_PR)) { \
59 (VEC)[(I)++] = (PR); \
60 (PR) = (P); \
61 } else \
62 (PR) *= (P); \
63 } while (0)
64
65 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
66 __max_i = (end); \
67 \
68 do { \
69 ++__i; \
70 if (((sieve)[__index] & __mask) == 0) \
71 { \
72 (prime) = id_to_n(__i)
73
74 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
75 do { \
76 mp_limb_t __mask, __index, __max_i, __i; \
77 \
78 __i = (start)-(off); \
79 __index = __i / GMP_LIMB_BITS; \
80 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
81 __i += (off); \
82 \
83 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84
85 #define LOOP_ON_SIEVE_STOP \
86 } \
87 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
88 __index += __mask & 1; \
89 } while (__i <= __max_i) \
90
91 #define LOOP_ON_SIEVE_END \
92 LOOP_ON_SIEVE_STOP; \
93 } while (0)
94
95 /*********************************************************/
96 /* Section sieve: sieving functions and tools for primes */
97 /*********************************************************/
98
99 #if WANT_ASSERT
100 static mp_limb_t
101 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102 #endif
103
104 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105 static mp_limb_t
106 id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
107
108 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109 static mp_limb_t
110 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112 #if WANT_ASSERT
113 static mp_size_t
114 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115 #endif
116
117 /*********************************************************/
118 /* Section mswing: 2-multiswing factorial */
119 /*********************************************************/
120
121 /* Returns an approximation of the sqare root of x. *
122 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
123 static mp_limb_t
124 limb_apprsqrt (mp_limb_t x)
125 {
126 int s;
127
128 ASSERT (x > 2);
129 count_leading_zeros (s, x - 1);
130 s = GMP_LIMB_BITS - 1 - s;
131 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
132 }
133
134 #if 0
135 /* A count-then-exponentiate variant for SWING_A_PRIME */
136 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
137 do { \
138 mp_limb_t __q, __prime; \
139 int __exp; \
140 __prime = (P); \
141 __exp = 0; \
142 __q = (N); \
143 do { \
144 __q /= __prime; \
145 __exp += __q & 1; \
146 } while (__q >= __prime); \
147 if (__exp) { /* Store $prime^{exp}$ */ \
148 for (__q = __prime; --__exp; __q *= __prime); \
149 FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
150 }; \
151 } while (0)
152 #else
153 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
154 do { \
155 mp_limb_t __q, __prime; \
156 __prime = (P); \
157 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
158 __q = (N); \
159 do { \
160 __q /= __prime; \
161 if ((__q & 1) != 0) (PR) *= __prime; \
162 } while (__q >= __prime); \
163 } while (0)
164 #endif
165
166 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
167 do { \
168 mp_limb_t __prime; \
169 __prime = (P); \
170 if ((((N) / __prime) & 1) != 0) \
171 FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
172 } while (0)
173
174 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
175 factorial of the parameter n. The result x is an odd positive
176 integer so that multiswing(n,2) = x 2^a.
177
178 Uses the algorithm described by Peter Luschny in "Divide, Swing and
179 Conquer the Factorial!".
180
181 The pointer sieve points to primesieve_size(n) limbs containing a
182 bit-array where primes are marked as 0.
183 Enough (FIXME: explain :-) limbs must be pointed by factors.
184 */
185
186 static void
187 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
188 {
189 mp_limb_t prod, max_prod;
190 mp_size_t j;
191
192 ASSERT (n >= 26);
193
194 j = 0;
195 prod = -(n & 1);
196 n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
197
198 prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
199 max_prod = GMP_NUMB_MAX / (n-1);
200
201 /* Handle prime = 3 separately. */
202 SWING_A_PRIME (3, n, prod, max_prod, factors, j);
203
204 /* Swing primes from 5 to n/3 */
205 {
206 mp_limb_t s;
207
208 {
209 mp_limb_t prime;
210
211 s = limb_apprsqrt(n);
212 ASSERT (s >= 5);
213 s = n_to_bit (s);
214 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
215 SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
216 LOOP_ON_SIEVE_END;
217 s++;
218 }
219
220 ASSERT (max_prod <= GMP_NUMB_MAX / 3);
221 ASSERT (bit_to_n (s) * bit_to_n (s) > n);
222 ASSERT (s <= n_to_bit (n / 3));
223 {
224 mp_limb_t prime;
225 mp_limb_t l_max_prod = max_prod * 3;
226
227 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
228 SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
229 LOOP_ON_SIEVE_END;
230 }
231 }
232
233 /* Store primes from (n+1)/2 to n */
234 {
235 mp_limb_t prime;
236 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
237 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
238 LOOP_ON_SIEVE_END;
239 }
240
241 if (LIKELY (j != 0))
242 {
243 factors[j++] = prod;
244 mpz_prodlimbs (x, factors, j);
245 }
246 else
247 {
248 PTR (x)[0] = prod;
249 SIZ (x) = 1;
250 }
251 }
252
253 #undef SWING_A_PRIME
254 #undef SH_SWING_A_PRIME
255 #undef LOOP_ON_SIEVE_END
256 #undef LOOP_ON_SIEVE_STOP
257 #undef LOOP_ON_SIEVE_BEGIN
258 #undef LOOP_ON_SIEVE_CONTINUE
259 #undef FACTOR_LIST_APPEND
260
261 /*********************************************************/
262 /* Section oddfac: odd factorial, needed also by binomial*/
263 /*********************************************************/
264
265 #if TUNE_PROGRAM_BUILD
266 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
267 #else
268 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
269 #endif
270
271 /* mpz_oddfac_1 computes the odd part of the factorial of the
272 parameter n. I.e. n! = x 2^a, where x is the returned value: an
273 odd positive integer.
274
275 If flag != 0 a square is skipped in the DSC part, e.g.
276 if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
277
278 If n is too small, flag is ignored, and an ASSERT can be triggered.
279
280 TODO: FAC_DSC_THRESHOLD is used here with two different roles:
281 - to decide when prime factorisation is needed,
282 - to stop the recursion, once sieving is done.
283 Maybe two thresholds can do a better job.
284 */
285 void
286 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
287 {
288 ASSERT (n <= GMP_NUMB_MAX);
289 ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
290
291 if (n <= ODD_FACTORIAL_TABLE_LIMIT)
292 {
293 PTR (x)[0] = __gmp_oddfac_table[n];
294 SIZ (x) = 1;
295 }
296 else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
297 {
298 mp_ptr px;
299
300 px = MPZ_NEWALLOC (x, 2);
301 umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
302 SIZ (x) = 2;
303 }
304 else
305 {
306 unsigned s;
307 mp_ptr factors;
308
309 s = 0;
310 {
311 mp_limb_t tn;
312 mp_limb_t prod, max_prod, i;
313 mp_size_t j;
314 TMP_SDECL;
315
316 #if TUNE_PROGRAM_BUILD
317 ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
318 ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
319 #endif
320
321 /* Compute the number of recursive steps for the DSC algorithm. */
322 for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
323 tn >>= 1;
324
325 j = 0;
326
327 TMP_SMARK;
328 factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
329 ASSERT (tn >= FACTORS_PER_LIMB);
330
331 prod = 1;
332 #if TUNE_PROGRAM_BUILD
333 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
334 #else
335 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
336 #endif
337
338 ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
339 do {
340 i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
341 factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
342 do {
343 FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
344 i += 2;
345 } while (i <= tn);
346 max_prod <<= 1;
347 tn >>= 1;
348 } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
349
350 factors[j++] = prod;
351 factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
352 factors[j++] = __gmp_oddfac_table[tn >> 1];
353 mpz_prodlimbs (x, factors, j);
354
355 TMP_SFREE;
356 }
357
358 if (s != 0)
359 /* Use the algorithm described by Peter Luschny in "Divide,
360 Swing and Conquer the Factorial!".
361
362 Improvement: there are two temporary buffers, factors and
363 square, that are never used together; with a good estimate
364 of the maximal needed size, they could share a single
365 allocation.
366 */
367 {
368 mpz_t mswing;
369 mp_ptr sieve;
370 mp_size_t size;
371 TMP_DECL;
372
373 TMP_MARK;
374
375 flag--;
376 size = n / GMP_NUMB_BITS + 4;
377 ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
378 /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
379 one more can be overwritten by mul, another for the sieve */
380 MPZ_TMP_INIT (mswing, size);
381 /* Initialize size, so that ASSERT can check it correctly. */
382 ASSERT_CODE (SIZ (mswing) = 0);
383
384 /* Put the sieve on the second half, it will be overwritten by the last mswing. */
385 sieve = PTR (mswing) + size / 2 + 1;
386
387 size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
388
389 factors = TMP_ALLOC_LIMBS (size);
390 do {
391 mp_ptr square, px;
392 mp_size_t nx, ns;
393 mp_limb_t cy;
394 TMP_DECL;
395
396 s--;
397 ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
398 mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
399
400 TMP_MARK;
401 nx = SIZ (x);
402 if (s == flag) {
403 size = nx;
404 square = TMP_ALLOC_LIMBS (size);
405 MPN_COPY (square, PTR (x), nx);
406 } else {
407 size = nx << 1;
408 square = TMP_ALLOC_LIMBS (size);
409 mpn_sqr (square, PTR (x), nx);
410 size -= (square[size - 1] == 0);
411 }
412 ns = SIZ (mswing);
413 nx = size + ns;
414 px = MPZ_NEWALLOC (x, nx);
415 ASSERT (ns <= size);
416 cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
417
418 TMP_FREE;
419 SIZ(x) = nx - (cy == 0);
420 } while (s != 0);
421 TMP_FREE;
422 }
423 }
424 }
425
426 #undef FACTORS_PER_LIMB
427 #undef FACTOR_LIST_STORE
428