Home | History | Annotate | Line # | Download | only in mpz
oddfac_1.c revision 1.1.1.2
      1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
      2 
      3 Contributed to the GNU project by Marco Bodrato.
      4 
      5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
      6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
      7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
      8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
      9 
     10 Copyright 2010-2012 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of either:
     16 
     17   * the GNU Lesser General Public License as published by the Free
     18     Software Foundation; either version 3 of the License, or (at your
     19     option) any later version.
     20 
     21 or
     22 
     23   * the GNU General Public License as published by the Free Software
     24     Foundation; either version 2 of the License, or (at your option) any
     25     later version.
     26 
     27 or both in parallel, as here.
     28 
     29 The GNU MP Library is distributed in the hope that it will be useful, but
     30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     32 for more details.
     33 
     34 You should have received copies of the GNU General Public License and the
     35 GNU Lesser General Public License along with the GNU MP Library.  If not,
     36 see https://www.gnu.org/licenses/.  */
     37 
     38 #include "gmp.h"
     39 #include "gmp-impl.h"
     40 #include "longlong.h"
     41 
     42 /* TODO:
     43    - split this file in smaller parts with functions that can be recycled for different computations.
     44  */
     45 
     46 /**************************************************************/
     47 /* Section macros: common macros, for mswing/fac/bin (&sieve) */
     48 /**************************************************************/
     49 
     50 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
     51   if ((PR) > (MAX_PR)) {					\
     52     (VEC)[(I)++] = (PR);					\
     53     (PR) = 1;							\
     54   }
     55 
     56 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
     57   do {								\
     58     if ((PR) > (MAX_PR)) {					\
     59       (VEC)[(I)++] = (PR);					\
     60       (PR) = (P);						\
     61     } else							\
     62       (PR) *= (P);						\
     63   } while (0)
     64 
     65 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)			\
     66     __max_i = (end);						\
     67 								\
     68     do {							\
     69       ++__i;							\
     70       if (((sieve)[__index] & __mask) == 0)			\
     71 	{							\
     72 	  (prime) = id_to_n(__i)
     73 
     74 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
     75   do {								\
     76     mp_limb_t __mask, __index, __max_i, __i;			\
     77 								\
     78     __i = (start)-(off);					\
     79     __index = __i / GMP_LIMB_BITS;				\
     80     __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
     81     __i += (off);						\
     82 								\
     83     LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
     84 
     85 #define LOOP_ON_SIEVE_STOP					\
     86 	}							\
     87       __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
     88       __index += __mask & 1;					\
     89     }  while (__i <= __max_i)					\
     90 
     91 #define LOOP_ON_SIEVE_END					\
     92     LOOP_ON_SIEVE_STOP;						\
     93   } while (0)
     94 
     95 /*********************************************************/
     96 /* Section sieve: sieving functions and tools for primes */
     97 /*********************************************************/
     98 
     99 #if WANT_ASSERT
    100 static mp_limb_t
    101 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
    102 #endif
    103 
    104 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
    105 static mp_limb_t
    106 id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
    107 
    108 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
    109 static mp_limb_t
    110 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
    111 
    112 #if WANT_ASSERT
    113 static mp_size_t
    114 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
    115 #endif
    116 
    117 /*********************************************************/
    118 /* Section mswing: 2-multiswing factorial                 */
    119 /*********************************************************/
    120 
    121 /* Returns an approximation of the sqare root of x.  *
    122  * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
    123 static mp_limb_t
    124 limb_apprsqrt (mp_limb_t x)
    125 {
    126   int s;
    127 
    128   ASSERT (x > 2);
    129   count_leading_zeros (s, x - 1);
    130   s = GMP_LIMB_BITS - 1 - s;
    131   return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
    132 }
    133 
    134 #if 0
    135 /* A count-then-exponentiate variant for SWING_A_PRIME */
    136 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)		\
    137   do {							\
    138     mp_limb_t __q, __prime;				\
    139     int __exp;						\
    140     __prime = (P);					\
    141     __exp = 0;						\
    142     __q = (N);						\
    143     do {						\
    144       __q /= __prime;					\
    145       __exp += __q & 1;					\
    146     } while (__q >= __prime);				\
    147     if (__exp) { /* Store $prime^{exp}$ */		\
    148       for (__q = __prime; --__exp; __q *= __prime);	\
    149       FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);	\
    150     };							\
    151   } while (0)
    152 #else
    153 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
    154   do {						\
    155     mp_limb_t __q, __prime;			\
    156     __prime = (P);				\
    157     FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
    158     __q = (N);					\
    159     do {					\
    160       __q /= __prime;				\
    161       if ((__q & 1) != 0) (PR) *= __prime;	\
    162     } while (__q >= __prime);			\
    163   } while (0)
    164 #endif
    165 
    166 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
    167   do {							\
    168     mp_limb_t __prime;					\
    169     __prime = (P);					\
    170     if ((((N) / __prime) & 1) != 0)			\
    171       FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
    172   } while (0)
    173 
    174 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
    175    factorial of the parameter n.  The result x is an odd positive
    176    integer so that multiswing(n,2) = x 2^a.
    177 
    178    Uses the algorithm described by Peter Luschny in "Divide, Swing and
    179    Conquer the Factorial!".
    180 
    181    The pointer sieve points to primesieve_size(n) limbs containing a
    182    bit-array where primes are marked as 0.
    183    Enough (FIXME: explain :-) limbs must be pointed by factors.
    184  */
    185 
    186 static void
    187 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
    188 {
    189   mp_limb_t prod, max_prod;
    190   mp_size_t j;
    191 
    192   ASSERT (n >= 26);
    193 
    194   j = 0;
    195   prod  = -(n & 1);
    196   n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
    197 
    198   prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
    199   max_prod = GMP_NUMB_MAX / (n-1);
    200 
    201   /* Handle prime = 3 separately. */
    202   SWING_A_PRIME (3, n, prod, max_prod, factors, j);
    203 
    204   /* Swing primes from 5 to n/3 */
    205   {
    206     mp_limb_t s;
    207 
    208     {
    209       mp_limb_t prime;
    210 
    211       s = limb_apprsqrt(n);
    212       ASSERT (s >= 5);
    213       s = n_to_bit (s);
    214       LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
    215       SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
    216       LOOP_ON_SIEVE_END;
    217       s++;
    218     }
    219 
    220     ASSERT (max_prod <= GMP_NUMB_MAX / 3);
    221     ASSERT (bit_to_n (s) * bit_to_n (s) > n);
    222     ASSERT (s <= n_to_bit (n / 3));
    223     {
    224       mp_limb_t prime;
    225       mp_limb_t l_max_prod = max_prod * 3;
    226 
    227       LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
    228       SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
    229       LOOP_ON_SIEVE_END;
    230     }
    231   }
    232 
    233   /* Store primes from (n+1)/2 to n */
    234   {
    235     mp_limb_t prime;
    236     LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
    237     FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
    238     LOOP_ON_SIEVE_END;
    239   }
    240 
    241   if (LIKELY (j != 0))
    242     {
    243       factors[j++] = prod;
    244       mpz_prodlimbs (x, factors, j);
    245     }
    246   else
    247     {
    248       PTR (x)[0] = prod;
    249       SIZ (x) = 1;
    250     }
    251 }
    252 
    253 #undef SWING_A_PRIME
    254 #undef SH_SWING_A_PRIME
    255 #undef LOOP_ON_SIEVE_END
    256 #undef LOOP_ON_SIEVE_STOP
    257 #undef LOOP_ON_SIEVE_BEGIN
    258 #undef LOOP_ON_SIEVE_CONTINUE
    259 #undef FACTOR_LIST_APPEND
    260 
    261 /*********************************************************/
    262 /* Section oddfac: odd factorial, needed also by binomial*/
    263 /*********************************************************/
    264 
    265 #if TUNE_PROGRAM_BUILD
    266 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
    267 #else
    268 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
    269 #endif
    270 
    271 /* mpz_oddfac_1 computes the odd part of the factorial of the
    272    parameter n.  I.e. n! = x 2^a, where x is the returned value: an
    273    odd positive integer.
    274 
    275    If flag != 0 a square is skipped in the DSC part, e.g.
    276    if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
    277 
    278    If n is too small, flag is ignored, and an ASSERT can be triggered.
    279 
    280    TODO: FAC_DSC_THRESHOLD is used here with two different roles:
    281     - to decide when prime factorisation is needed,
    282     - to stop the recursion, once sieving is done.
    283    Maybe two thresholds can do a better job.
    284  */
    285 void
    286 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
    287 {
    288   ASSERT (n <= GMP_NUMB_MAX);
    289   ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
    290 
    291   if (n <= ODD_FACTORIAL_TABLE_LIMIT)
    292     {
    293       PTR (x)[0] = __gmp_oddfac_table[n];
    294       SIZ (x) = 1;
    295     }
    296   else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
    297     {
    298       mp_ptr   px;
    299 
    300       px = MPZ_NEWALLOC (x, 2);
    301       umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
    302       SIZ (x) = 2;
    303     }
    304   else
    305     {
    306       unsigned s;
    307       mp_ptr   factors;
    308 
    309       s = 0;
    310       {
    311 	mp_limb_t tn;
    312 	mp_limb_t prod, max_prod, i;
    313 	mp_size_t j;
    314 	TMP_SDECL;
    315 
    316 #if TUNE_PROGRAM_BUILD
    317 	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
    318 	ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
    319 #endif
    320 
    321 	/* Compute the number of recursive steps for the DSC algorithm. */
    322 	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
    323 	  tn >>= 1;
    324 
    325 	j = 0;
    326 
    327 	TMP_SMARK;
    328 	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
    329 	ASSERT (tn >= FACTORS_PER_LIMB);
    330 
    331 	prod = 1;
    332 #if TUNE_PROGRAM_BUILD
    333 	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
    334 #else
    335 	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
    336 #endif
    337 
    338 	ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
    339 	do {
    340 	  i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
    341 	  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
    342 	  do {
    343 	    FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
    344 	    i += 2;
    345 	  } while (i <= tn);
    346 	  max_prod <<= 1;
    347 	  tn >>= 1;
    348 	} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
    349 
    350 	factors[j++] = prod;
    351 	factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
    352 	factors[j++] = __gmp_oddfac_table[tn >> 1];
    353 	mpz_prodlimbs (x, factors, j);
    354 
    355 	TMP_SFREE;
    356       }
    357 
    358       if (s != 0)
    359 	/* Use the algorithm described by Peter Luschny in "Divide,
    360 	   Swing and Conquer the Factorial!".
    361 
    362 	   Improvement: there are two temporary buffers, factors and
    363 	   square, that are never used together; with a good estimate
    364 	   of the maximal needed size, they could share a single
    365 	   allocation.
    366 	*/
    367 	{
    368 	  mpz_t mswing;
    369 	  mp_ptr sieve;
    370 	  mp_size_t size;
    371 	  TMP_DECL;
    372 
    373 	  TMP_MARK;
    374 
    375 	  flag--;
    376 	  size = n / GMP_NUMB_BITS + 4;
    377 	  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
    378 	  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
    379 	     one more can be overwritten by mul, another for the sieve */
    380 	  MPZ_TMP_INIT (mswing, size);
    381 	  /* Initialize size, so that ASSERT can check it correctly. */
    382 	  ASSERT_CODE (SIZ (mswing) = 0);
    383 
    384 	  /* Put the sieve on the second half, it will be overwritten by the last mswing. */
    385 	  sieve = PTR (mswing) + size / 2 + 1;
    386 
    387 	  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
    388 
    389 	  factors = TMP_ALLOC_LIMBS (size);
    390 	  do {
    391 	    mp_ptr    square, px;
    392 	    mp_size_t nx, ns;
    393 	    mp_limb_t cy;
    394 	    TMP_DECL;
    395 
    396 	    s--;
    397 	    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
    398 	    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
    399 
    400 	    TMP_MARK;
    401 	    nx = SIZ (x);
    402 	    if (s == flag) {
    403 	      size = nx;
    404 	      square = TMP_ALLOC_LIMBS (size);
    405 	      MPN_COPY (square, PTR (x), nx);
    406 	    } else {
    407 	      size = nx << 1;
    408 	      square = TMP_ALLOC_LIMBS (size);
    409 	      mpn_sqr (square, PTR (x), nx);
    410 	      size -= (square[size - 1] == 0);
    411 	    }
    412 	    ns = SIZ (mswing);
    413 	    nx = size + ns;
    414 	    px = MPZ_NEWALLOC (x, nx);
    415 	    ASSERT (ns <= size);
    416 	    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
    417 
    418 	    TMP_FREE;
    419 	    SIZ(x) = nx - (cy == 0);
    420 	  } while (s != 0);
    421 	  TMP_FREE;
    422 	}
    423     }
    424 }
    425 
    426 #undef FACTORS_PER_LIMB
    427 #undef FACTOR_LIST_STORE
    428