Home | History | Annotate | Line # | Download | only in mpz
oddfac_1.c revision 1.1
      1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
      2 
      3 Contributed to the GNU project by Marco Bodrato.
      4 
      5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
      6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
      7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
      8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
      9 
     10 Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of the GNU Lesser General Public License as published by
     16 the Free Software Foundation; either version 3 of the License, or (at your
     17 option) any later version.
     18 
     19 The GNU MP Library is distributed in the hope that it will be useful, but
     20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     22 License for more details.
     23 
     24 You should have received a copy of the GNU Lesser General Public License
     25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     26 
     27 #include "gmp.h"
     28 #include "gmp-impl.h"
     29 #include "longlong.h"
     30 
     31 /* TODO:
     32    - split this file in smaller parts with functions that can be recycled for different computations.
     33  */
     34 
     35 /**************************************************************/
     36 /* Section macros: common macros, for mswing/fac/bin (&sieve) */
     37 /**************************************************************/
     38 
     39 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
     40   if ((PR) > (MAX_PR)) {					\
     41     (VEC)[(I)++] = (PR);					\
     42     (PR) = 1;							\
     43   }
     44 
     45 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
     46   do {								\
     47     if ((PR) > (MAX_PR)) {					\
     48       (VEC)[(I)++] = (PR);					\
     49       (PR) = (P);						\
     50     } else							\
     51       (PR) *= (P);						\
     52   } while (0)
     53 
     54 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)			\
     55     __max_i = (end);						\
     56 								\
     57     do {							\
     58       ++__i;							\
     59       if (((sieve)[__index] & __mask) == 0)			\
     60 	{							\
     61 	  (prime) = id_to_n(__i)
     62 
     63 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
     64   do {								\
     65     mp_limb_t __mask, __index, __max_i, __i;			\
     66 								\
     67     __i = (start)-(off);					\
     68     __index = __i / GMP_LIMB_BITS;				\
     69     __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
     70     __i += (off);						\
     71 								\
     72     LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
     73 
     74 #define LOOP_ON_SIEVE_STOP					\
     75 	}							\
     76       __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
     77       __index += __mask & 1;					\
     78     }  while (__i <= __max_i)					\
     79 
     80 #define LOOP_ON_SIEVE_END					\
     81     LOOP_ON_SIEVE_STOP;						\
     82   } while (0)
     83 
     84 /*********************************************************/
     85 /* Section sieve: sieving functions and tools for primes */
     86 /*********************************************************/
     87 
     88 #if WANT_ASSERT
     89 static mp_limb_t
     90 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
     91 #endif
     92 
     93 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
     94 static mp_limb_t
     95 id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
     96 
     97 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
     98 static mp_limb_t
     99 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
    100 
    101 #if WANT_ASSERT
    102 static mp_size_t
    103 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
    104 #endif
    105 
    106 /*********************************************************/
    107 /* Section mswing: 2-multiswing factorial                 */
    108 /*********************************************************/
    109 
    110 /* Returns an approximation of the sqare root of x.  *
    111  * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
    112 static mp_limb_t
    113 limb_apprsqrt (mp_limb_t x)
    114 {
    115   int s;
    116 
    117   ASSERT (x > 2);
    118   count_leading_zeros (s, x - 1);
    119   s = GMP_LIMB_BITS - 1 - s;
    120   return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
    121 }
    122 
    123 #if 0
    124 /* A count-then-exponentiate variant for SWING_A_PRIME */
    125 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)		\
    126   do {							\
    127     mp_limb_t __q, __prime;				\
    128     int __exp;						\
    129     __prime = (P);					\
    130     __exp = 0;						\
    131     __q = (N);						\
    132     do {						\
    133       __q /= __prime;					\
    134       __exp += __q & 1;					\
    135     } while (__q >= __prime);				\
    136     if (__exp) { /* Store $prime^{exp}$ */		\
    137       for (__q = __prime; --__exp; __q *= __prime);	\
    138       FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);	\
    139     };							\
    140   } while (0)
    141 #else
    142 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
    143   do {						\
    144     mp_limb_t __q, __prime;			\
    145     __prime = (P);				\
    146     FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
    147     __q = (N);					\
    148     do {					\
    149       __q /= __prime;				\
    150       if ((__q & 1) != 0) (PR) *= __prime;	\
    151     } while (__q >= __prime);			\
    152   } while (0)
    153 #endif
    154 
    155 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
    156   do {							\
    157     mp_limb_t __prime;					\
    158     __prime = (P);					\
    159     if ((((N) / __prime) & 1) != 0)			\
    160       FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
    161   } while (0)
    162 
    163 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
    164    factorial of the parameter n.  The result x is an odd positive
    165    integer so that multiswing(n,2) = x 2^a.
    166 
    167    Uses the algorithm described by Peter Luschny in "Divide, Swing and
    168    Conquer the Factorial!".
    169 
    170    The pointer sieve points to primesieve_size(n) limbs containing a
    171    bit-array where primes are marked as 0.
    172    Enough (FIXME: explain :-) limbs must be pointed by factors.
    173  */
    174 
    175 static void
    176 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
    177 {
    178   mp_limb_t prod, max_prod;
    179   mp_size_t j;
    180 
    181   ASSERT (n >= 26);
    182 
    183   j = 0;
    184   prod  = -(n & 1);
    185   n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
    186 
    187   prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
    188   max_prod = GMP_NUMB_MAX / (n-1);
    189 
    190   /* Handle prime = 3 separately. */
    191   SWING_A_PRIME (3, n, prod, max_prod, factors, j);
    192 
    193   /* Swing primes from 5 to n/3 */
    194   {
    195     mp_limb_t s;
    196 
    197     {
    198       mp_limb_t prime;
    199 
    200       s = limb_apprsqrt(n);
    201       ASSERT (s >= 5);
    202       s = n_to_bit (s);
    203       LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
    204       SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
    205       LOOP_ON_SIEVE_END;
    206       s++;
    207     }
    208 
    209     ASSERT (max_prod <= GMP_NUMB_MAX / 3);
    210     ASSERT (bit_to_n (s) * bit_to_n (s) > n);
    211     ASSERT (s <= n_to_bit (n / 3));
    212     {
    213       mp_limb_t prime;
    214       mp_limb_t l_max_prod = max_prod * 3;
    215 
    216       LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
    217       SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
    218       LOOP_ON_SIEVE_END;
    219     }
    220   }
    221 
    222   /* Store primes from (n+1)/2 to n */
    223   {
    224     mp_limb_t prime;
    225     LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
    226     FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
    227     LOOP_ON_SIEVE_END;
    228   }
    229 
    230   if (LIKELY (j != 0))
    231     {
    232       factors[j++] = prod;
    233       mpz_prodlimbs (x, factors, j);
    234     }
    235   else
    236     {
    237       PTR (x)[0] = prod;
    238       SIZ (x) = 1;
    239     }
    240 }
    241 
    242 #undef SWING_A_PRIME
    243 #undef SH_SWING_A_PRIME
    244 #undef LOOP_ON_SIEVE_END
    245 #undef LOOP_ON_SIEVE_STOP
    246 #undef LOOP_ON_SIEVE_BEGIN
    247 #undef LOOP_ON_SIEVE_CONTINUE
    248 #undef FACTOR_LIST_APPEND
    249 
    250 /*********************************************************/
    251 /* Section oddfac: odd factorial, needed also by binomial*/
    252 /*********************************************************/
    253 
    254 #if TUNE_PROGRAM_BUILD
    255 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
    256 #else
    257 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
    258 #endif
    259 
    260 /* mpz_oddfac_1 computes the odd part of the factorial of the
    261    parameter n.  I.e. n! = x 2^a, where x is the returned value: an
    262    odd positive integer.
    263 
    264    If flag != 0 a square is skipped in the DSC part, e.g.
    265    if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
    266 
    267    If n is too small, flag is ignored, and an ASSERT can be triggered.
    268 
    269    TODO: FAC_DSC_THRESHOLD is used here with two different roles:
    270     - to decide when prime factorisation is needed,
    271     - to stop the recursion, once sieving is done.
    272    Maybe two thresholds can do a better job.
    273  */
    274 void
    275 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
    276 {
    277   ASSERT (n <= GMP_NUMB_MAX);
    278   ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
    279 
    280   if (n <= ODD_FACTORIAL_TABLE_LIMIT)
    281     {
    282       PTR (x)[0] = __gmp_oddfac_table[n];
    283       SIZ (x) = 1;
    284     }
    285   else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
    286     {
    287       mp_ptr   px;
    288 
    289       px = MPZ_NEWALLOC (x, 2);
    290       umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
    291       SIZ (x) = 2;
    292     }
    293   else
    294     {
    295       unsigned s;
    296       mp_ptr   factors;
    297 
    298       s = 0;
    299       {
    300 	mp_limb_t tn;
    301 	mp_limb_t prod, max_prod, i;
    302 	mp_size_t j;
    303 	TMP_SDECL;
    304 
    305 #if TUNE_PROGRAM_BUILD
    306 	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
    307 	ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
    308 #endif
    309 
    310 	/* Compute the number of recursive steps for the DSC algorithm. */
    311 	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
    312 	  tn >>= 1;
    313 
    314 	j = 0;
    315 
    316 	TMP_SMARK;
    317 	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
    318 	ASSERT (tn >= FACTORS_PER_LIMB);
    319 
    320 	prod = 1;
    321 #if TUNE_PROGRAM_BUILD
    322 	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
    323 #else
    324 	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
    325 #endif
    326 
    327 	ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
    328 	do {
    329 	  i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
    330 	  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
    331 	  do {
    332 	    FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
    333 	    i += 2;
    334 	  } while (i <= tn);
    335 	  max_prod <<= 1;
    336 	  tn >>= 1;
    337 	} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
    338 
    339 	factors[j++] = prod;
    340 	factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
    341 	factors[j++] = __gmp_oddfac_table[tn >> 1];
    342 	mpz_prodlimbs (x, factors, j);
    343 
    344 	TMP_SFREE;
    345       }
    346 
    347       if (s != 0)
    348 	/* Use the algorithm described by Peter Luschny in "Divide,
    349 	   Swing and Conquer the Factorial!".
    350 
    351 	   Improvement: there are two temporary buffers, factors and
    352 	   square, that are never used together; with a good estimate
    353 	   of the maximal needed size, they could share a single
    354 	   allocation.
    355 	*/
    356 	{
    357 	  mpz_t mswing;
    358 	  mp_ptr sieve;
    359 	  mp_size_t size;
    360 	  TMP_DECL;
    361 
    362 	  TMP_MARK;
    363 
    364 	  flag--;
    365 	  size = n / GMP_NUMB_BITS + 4;
    366 	  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
    367 	  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
    368 	     one more can be overwritten by mul, another for the sieve */
    369 	  MPZ_TMP_INIT (mswing, size);
    370 	  /* Initialize size, so that ASSERT can check it correctly. */
    371 	  ASSERT_CODE (SIZ (mswing) = 0);
    372 
    373 	  /* Put the sieve on the second half, it will be overwritten by the last mswing. */
    374 	  sieve = PTR (mswing) + size / 2 + 1;
    375 
    376 	  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
    377 
    378 	  factors = TMP_ALLOC_LIMBS (size);
    379 	  do {
    380 	    mp_ptr    square, px;
    381 	    mp_size_t nx, ns;
    382 	    mp_limb_t cy;
    383 	    TMP_DECL;
    384 
    385 	    s--;
    386 	    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
    387 	    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
    388 
    389 	    TMP_MARK;
    390 	    nx = SIZ (x);
    391 	    if (s == flag) {
    392 	      size = nx;
    393 	      square = TMP_ALLOC_LIMBS (size);
    394 	      MPN_COPY (square, PTR (x), nx);
    395 	    } else {
    396 	      size = nx << 1;
    397 	      square = TMP_ALLOC_LIMBS (size);
    398 	      mpn_sqr (square, PTR (x), nx);
    399 	      size -= (square[size - 1] == 0);
    400 	    }
    401 	    ns = SIZ (mswing);
    402 	    nx = size + ns;
    403 	    px = MPZ_NEWALLOC (x, nx);
    404 	    ASSERT (ns <= size);
    405 	    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
    406 
    407 	    TMP_FREE;
    408 	    SIZ(x) = nx - (cy == 0);
    409 	  } while (s != 0);
    410 	  TMP_FREE;
    411 	}
    412     }
    413 }
    414 
    415 #undef FACTORS_PER_LIMB
    416 #undef FACTOR_LIST_STORE
    417