Home | History | Annotate | Line # | Download | only in generic
      1      1.1  mrg /* hgcd.c.
      2      1.1  mrg 
      3      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      4      1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      5      1.1  mrg    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      6      1.1  mrg 
      7  1.1.1.3  mrg Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12  1.1.1.3  mrg it under the terms of either:
     13  1.1.1.3  mrg 
     14  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     15  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     16  1.1.1.3  mrg     option) any later version.
     17  1.1.1.3  mrg 
     18  1.1.1.3  mrg or
     19  1.1.1.3  mrg 
     20  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     21  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     22  1.1.1.3  mrg     later version.
     23  1.1.1.3  mrg 
     24  1.1.1.3  mrg or both in parallel, as here.
     25      1.1  mrg 
     26      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     27      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     28  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     29  1.1.1.3  mrg for more details.
     30      1.1  mrg 
     31  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     32  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     33  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     34      1.1  mrg 
     35      1.1  mrg #include "gmp-impl.h"
     36      1.1  mrg #include "longlong.h"
     37      1.1  mrg 
     38      1.1  mrg 
     39      1.1  mrg /* Size analysis for hgcd:
     40      1.1  mrg 
     41      1.1  mrg    For the recursive calls, we have n1 <= ceil(n / 2). Then the
     42      1.1  mrg    storage need is determined by the storage for the recursive call
     43      1.1  mrg    computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
     44      1.1  mrg    (after this, the storage needed for M1 can be recycled).
     45      1.1  mrg 
     46      1.1  mrg    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
     47      1.1  mrg    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
     48      1.1  mrg    and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
     49      1.1  mrg    4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
     50      1.1  mrg 
     51      1.1  mrg    For the recursive call, we need S(n1) = S(ceil(n/2)).
     52      1.1  mrg 
     53      1.1  mrg    S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
     54      1.1  mrg 	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
     55      1.1  mrg 	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
     56      1.1  mrg 	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
     57      1.1  mrg */
     58      1.1  mrg 
     59      1.1  mrg mp_size_t
     60      1.1  mrg mpn_hgcd_itch (mp_size_t n)
     61      1.1  mrg {
     62      1.1  mrg   unsigned k;
     63      1.1  mrg   int count;
     64      1.1  mrg   mp_size_t nscaled;
     65      1.1  mrg 
     66      1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
     67  1.1.1.2  mrg     return n;
     68      1.1  mrg 
     69      1.1  mrg   /* Get the recursion depth. */
     70      1.1  mrg   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
     71      1.1  mrg   count_leading_zeros (count, nscaled);
     72      1.1  mrg   k = GMP_LIMB_BITS - count;
     73      1.1  mrg 
     74  1.1.1.2  mrg   return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
     75      1.1  mrg }
     76      1.1  mrg 
     77      1.1  mrg /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
     78      1.1  mrg    with elements of size at most (n+1)/2 - 1. Returns new size of a,
     79      1.1  mrg    b, or zero if no reduction is possible. */
     80      1.1  mrg 
     81      1.1  mrg mp_size_t
     82      1.1  mrg mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
     83      1.1  mrg 	  struct hgcd_matrix *M, mp_ptr tp)
     84      1.1  mrg {
     85      1.1  mrg   mp_size_t s = n/2 + 1;
     86      1.1  mrg 
     87  1.1.1.2  mrg   mp_size_t nn;
     88      1.1  mrg   int success = 0;
     89      1.1  mrg 
     90      1.1  mrg   if (n <= s)
     91      1.1  mrg     /* Happens when n <= 2, a fairly uninteresting case but exercised
     92      1.1  mrg        by the random inputs of the testsuite. */
     93      1.1  mrg     return 0;
     94      1.1  mrg 
     95      1.1  mrg   ASSERT ((ap[n-1] | bp[n-1]) > 0);
     96      1.1  mrg 
     97      1.1  mrg   ASSERT ((n+1)/2 - 1 < M->alloc);
     98      1.1  mrg 
     99  1.1.1.2  mrg   if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
    100      1.1  mrg     {
    101  1.1.1.2  mrg       mp_size_t n2 = (3*n)/4 + 1;
    102  1.1.1.2  mrg       mp_size_t p = n/2;
    103      1.1  mrg 
    104  1.1.1.2  mrg       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
    105  1.1.1.2  mrg       if (nn)
    106  1.1.1.2  mrg 	{
    107  1.1.1.2  mrg 	  n = nn;
    108  1.1.1.2  mrg 	  success = 1;
    109  1.1.1.2  mrg 	}
    110  1.1.1.2  mrg 
    111  1.1.1.3  mrg       /* NOTE: It appears this loop never runs more than once (at
    112  1.1.1.2  mrg 	 least when not recursing to hgcd_appr). */
    113  1.1.1.2  mrg       while (n > n2)
    114  1.1.1.2  mrg 	{
    115  1.1.1.2  mrg 	  /* Needs n + 1 storage */
    116  1.1.1.2  mrg 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    117  1.1.1.2  mrg 	  if (!nn)
    118  1.1.1.2  mrg 	    return success ? n : 0;
    119      1.1  mrg 
    120  1.1.1.2  mrg 	  n = nn;
    121  1.1.1.2  mrg 	  success = 1;
    122  1.1.1.2  mrg 	}
    123      1.1  mrg 
    124  1.1.1.2  mrg       if (n > s + 2)
    125      1.1  mrg 	{
    126  1.1.1.2  mrg 	  struct hgcd_matrix M1;
    127  1.1.1.2  mrg 	  mp_size_t scratch;
    128      1.1  mrg 
    129  1.1.1.2  mrg 	  p = 2*s - n + 1;
    130  1.1.1.2  mrg 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
    131      1.1  mrg 
    132  1.1.1.2  mrg 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
    133      1.1  mrg 
    134  1.1.1.2  mrg 	  /* FIXME: Should use hgcd_reduce, but that may require more
    135  1.1.1.2  mrg 	     scratch space, which requires review. */
    136      1.1  mrg 
    137  1.1.1.2  mrg 	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
    138  1.1.1.2  mrg 	  if (nn > 0)
    139  1.1.1.2  mrg 	    {
    140  1.1.1.2  mrg 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
    141  1.1.1.2  mrg 	      ASSERT (M->n + 2 >= M1.n);
    142      1.1  mrg 
    143  1.1.1.2  mrg 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
    144  1.1.1.2  mrg 		 then either q or q + 1 is a correct quotient, and M1 will
    145  1.1.1.2  mrg 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
    146  1.1.1.2  mrg 		 rules out the case that the size of M * M1 is much
    147  1.1.1.2  mrg 		 smaller than the expected M->n + M1->n. */
    148      1.1  mrg 
    149  1.1.1.2  mrg 	      ASSERT (M->n + M1.n < M->alloc);
    150      1.1  mrg 
    151  1.1.1.2  mrg 	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
    152  1.1.1.2  mrg 		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
    153  1.1.1.2  mrg 	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
    154      1.1  mrg 
    155  1.1.1.2  mrg 	      /* We need a bound for of M->n + M1.n. Let n be the original
    156  1.1.1.2  mrg 		 input size. Then
    157  1.1.1.2  mrg 
    158  1.1.1.2  mrg 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
    159  1.1.1.2  mrg 
    160  1.1.1.2  mrg 		 and it follows that
    161  1.1.1.2  mrg 
    162  1.1.1.2  mrg 		 M.n + M1.n <= ceil(n/2) + 1
    163  1.1.1.2  mrg 
    164  1.1.1.2  mrg 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
    165  1.1.1.2  mrg 		 amount of needed scratch space. */
    166  1.1.1.2  mrg 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
    167  1.1.1.2  mrg 	      success = 1;
    168  1.1.1.2  mrg 	    }
    169      1.1  mrg 	}
    170      1.1  mrg     }
    171      1.1  mrg 
    172      1.1  mrg   for (;;)
    173      1.1  mrg     {
    174      1.1  mrg       /* Needs s+3 < n */
    175  1.1.1.2  mrg       nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    176      1.1  mrg       if (!nn)
    177      1.1  mrg 	return success ? n : 0;
    178      1.1  mrg 
    179      1.1  mrg       n = nn;
    180      1.1  mrg       success = 1;
    181      1.1  mrg     }
    182      1.1  mrg }
    183