Home | History | Annotate | Line # | Download | only in generic
hgcd.c revision 1.1.1.2
      1      1.1  mrg /* hgcd.c.
      2      1.1  mrg 
      3      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      4      1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      5      1.1  mrg    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      6      1.1  mrg 
      7  1.1.1.2  mrg Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12      1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     13      1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     14      1.1  mrg option) any later version.
     15      1.1  mrg 
     16      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     17      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     18      1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     19      1.1  mrg License for more details.
     20      1.1  mrg 
     21      1.1  mrg You should have received a copy of the GNU Lesser General Public License
     22      1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     23      1.1  mrg 
     24      1.1  mrg #include "gmp.h"
     25      1.1  mrg #include "gmp-impl.h"
     26      1.1  mrg #include "longlong.h"
     27      1.1  mrg 
     28      1.1  mrg 
     29      1.1  mrg /* Size analysis for hgcd:
     30      1.1  mrg 
     31      1.1  mrg    For the recursive calls, we have n1 <= ceil(n / 2). Then the
     32      1.1  mrg    storage need is determined by the storage for the recursive call
     33      1.1  mrg    computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
     34      1.1  mrg    (after this, the storage needed for M1 can be recycled).
     35      1.1  mrg 
     36      1.1  mrg    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
     37      1.1  mrg    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
     38      1.1  mrg    and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
     39      1.1  mrg    4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
     40      1.1  mrg 
     41      1.1  mrg    For the recursive call, we need S(n1) = S(ceil(n/2)).
     42      1.1  mrg 
     43      1.1  mrg    S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
     44      1.1  mrg 	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
     45      1.1  mrg 	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
     46      1.1  mrg 	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
     47      1.1  mrg */
     48      1.1  mrg 
     49      1.1  mrg mp_size_t
     50      1.1  mrg mpn_hgcd_itch (mp_size_t n)
     51      1.1  mrg {
     52      1.1  mrg   unsigned k;
     53      1.1  mrg   int count;
     54      1.1  mrg   mp_size_t nscaled;
     55      1.1  mrg 
     56      1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
     57  1.1.1.2  mrg     return n;
     58      1.1  mrg 
     59      1.1  mrg   /* Get the recursion depth. */
     60      1.1  mrg   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
     61      1.1  mrg   count_leading_zeros (count, nscaled);
     62      1.1  mrg   k = GMP_LIMB_BITS - count;
     63      1.1  mrg 
     64  1.1.1.2  mrg   return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
     65      1.1  mrg }
     66      1.1  mrg 
     67      1.1  mrg /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
     68      1.1  mrg    with elements of size at most (n+1)/2 - 1. Returns new size of a,
     69      1.1  mrg    b, or zero if no reduction is possible. */
     70      1.1  mrg 
     71      1.1  mrg mp_size_t
     72      1.1  mrg mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
     73      1.1  mrg 	  struct hgcd_matrix *M, mp_ptr tp)
     74      1.1  mrg {
     75      1.1  mrg   mp_size_t s = n/2 + 1;
     76      1.1  mrg 
     77  1.1.1.2  mrg   mp_size_t nn;
     78      1.1  mrg   int success = 0;
     79      1.1  mrg 
     80      1.1  mrg   if (n <= s)
     81      1.1  mrg     /* Happens when n <= 2, a fairly uninteresting case but exercised
     82      1.1  mrg        by the random inputs of the testsuite. */
     83      1.1  mrg     return 0;
     84      1.1  mrg 
     85      1.1  mrg   ASSERT ((ap[n-1] | bp[n-1]) > 0);
     86      1.1  mrg 
     87      1.1  mrg   ASSERT ((n+1)/2 - 1 < M->alloc);
     88      1.1  mrg 
     89  1.1.1.2  mrg   if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
     90      1.1  mrg     {
     91  1.1.1.2  mrg       mp_size_t n2 = (3*n)/4 + 1;
     92  1.1.1.2  mrg       mp_size_t p = n/2;
     93      1.1  mrg 
     94  1.1.1.2  mrg       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
     95  1.1.1.2  mrg       if (nn)
     96  1.1.1.2  mrg 	{
     97  1.1.1.2  mrg 	  n = nn;
     98  1.1.1.2  mrg 	  success = 1;
     99  1.1.1.2  mrg 	}
    100  1.1.1.2  mrg 
    101  1.1.1.2  mrg       /* NOTE: It apppears this loop never runs more than once (at
    102  1.1.1.2  mrg 	 least when not recursing to hgcd_appr). */
    103  1.1.1.2  mrg       while (n > n2)
    104  1.1.1.2  mrg 	{
    105  1.1.1.2  mrg 	  /* Needs n + 1 storage */
    106  1.1.1.2  mrg 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    107  1.1.1.2  mrg 	  if (!nn)
    108  1.1.1.2  mrg 	    return success ? n : 0;
    109      1.1  mrg 
    110  1.1.1.2  mrg 	  n = nn;
    111  1.1.1.2  mrg 	  success = 1;
    112  1.1.1.2  mrg 	}
    113      1.1  mrg 
    114  1.1.1.2  mrg       if (n > s + 2)
    115      1.1  mrg 	{
    116  1.1.1.2  mrg 	  struct hgcd_matrix M1;
    117  1.1.1.2  mrg 	  mp_size_t scratch;
    118      1.1  mrg 
    119  1.1.1.2  mrg 	  p = 2*s - n + 1;
    120  1.1.1.2  mrg 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
    121      1.1  mrg 
    122  1.1.1.2  mrg 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
    123      1.1  mrg 
    124  1.1.1.2  mrg 	  /* FIXME: Should use hgcd_reduce, but that may require more
    125  1.1.1.2  mrg 	     scratch space, which requires review. */
    126      1.1  mrg 
    127  1.1.1.2  mrg 	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
    128  1.1.1.2  mrg 	  if (nn > 0)
    129  1.1.1.2  mrg 	    {
    130  1.1.1.2  mrg 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
    131  1.1.1.2  mrg 	      ASSERT (M->n + 2 >= M1.n);
    132      1.1  mrg 
    133  1.1.1.2  mrg 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
    134  1.1.1.2  mrg 		 then either q or q + 1 is a correct quotient, and M1 will
    135  1.1.1.2  mrg 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
    136  1.1.1.2  mrg 		 rules out the case that the size of M * M1 is much
    137  1.1.1.2  mrg 		 smaller than the expected M->n + M1->n. */
    138      1.1  mrg 
    139  1.1.1.2  mrg 	      ASSERT (M->n + M1.n < M->alloc);
    140      1.1  mrg 
    141  1.1.1.2  mrg 	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
    142  1.1.1.2  mrg 		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
    143  1.1.1.2  mrg 	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
    144      1.1  mrg 
    145  1.1.1.2  mrg 	      /* We need a bound for of M->n + M1.n. Let n be the original
    146  1.1.1.2  mrg 		 input size. Then
    147  1.1.1.2  mrg 
    148  1.1.1.2  mrg 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
    149  1.1.1.2  mrg 
    150  1.1.1.2  mrg 		 and it follows that
    151  1.1.1.2  mrg 
    152  1.1.1.2  mrg 		 M.n + M1.n <= ceil(n/2) + 1
    153  1.1.1.2  mrg 
    154  1.1.1.2  mrg 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
    155  1.1.1.2  mrg 		 amount of needed scratch space. */
    156  1.1.1.2  mrg 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
    157  1.1.1.2  mrg 	      success = 1;
    158  1.1.1.2  mrg 	    }
    159      1.1  mrg 	}
    160      1.1  mrg     }
    161      1.1  mrg 
    162      1.1  mrg   for (;;)
    163      1.1  mrg     {
    164      1.1  mrg       /* Needs s+3 < n */
    165  1.1.1.2  mrg       nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    166      1.1  mrg       if (!nn)
    167      1.1  mrg 	return success ? n : 0;
    168      1.1  mrg 
    169      1.1  mrg       n = nn;
    170      1.1  mrg       success = 1;
    171      1.1  mrg     }
    172      1.1  mrg }
    173