Home | History | Annotate | Line # | Download | only in generic
      1 /* hgcd_appr.c.
      2 
      3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      6 
      7 Copyright 2011, 2012 Free Software Foundation, Inc.
      8 
      9 This file is part of the GNU MP Library.
     10 
     11 The GNU MP Library is free software; you can redistribute it and/or modify
     12 it under the terms of either:
     13 
     14   * the GNU Lesser General Public License as published by the Free
     15     Software Foundation; either version 3 of the License, or (at your
     16     option) any later version.
     17 
     18 or
     19 
     20   * the GNU General Public License as published by the Free Software
     21     Foundation; either version 2 of the License, or (at your option) any
     22     later version.
     23 
     24 or both in parallel, as here.
     25 
     26 The GNU MP Library is distributed in the hope that it will be useful, but
     27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     29 for more details.
     30 
     31 You should have received copies of the GNU General Public License and the
     32 GNU Lesser General Public License along with the GNU MP Library.  If not,
     33 see https://www.gnu.org/licenses/.  */
     34 
     35 #include "gmp-impl.h"
     36 #include "longlong.h"
     37 
     38 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
     39    HGCD_THRESHOLD at the end? */
     40 mp_size_t
     41 mpn_hgcd_appr_itch (mp_size_t n)
     42 {
     43   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
     44     return n;
     45   else
     46     {
     47       unsigned k;
     48       int count;
     49       mp_size_t nscaled;
     50 
     51       /* Get the recursion depth. */
     52       nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
     53       count_leading_zeros (count, nscaled);
     54       k = GMP_LIMB_BITS - count;
     55 
     56       return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
     57     }
     58 }
     59 
     60 /* Destroys inputs. */
     61 int
     62 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
     63 	       struct hgcd_matrix *M, mp_ptr tp)
     64 {
     65   mp_size_t s;
     66   int success = 0;
     67 
     68   ASSERT (n > 0);
     69 
     70   ASSERT ((ap[n-1] | bp[n-1]) != 0);
     71 
     72   if (n <= 2)
     73     /* Implies s = n. A fairly uninteresting case but exercised by the
     74        random inputs of the testsuite. */
     75     return 0;
     76 
     77   ASSERT ((n+1)/2 - 1 < M->alloc);
     78 
     79   /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
     80      we discard some of the least significant limbs, we must keep one
     81      additional bit to account for the truncation error. We maintain
     82      the GMP_NUMB_BITS * s - extra_bits as the current target size. */
     83 
     84   s = n/2 + 1;
     85   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
     86     {
     87       unsigned extra_bits = 0;
     88 
     89       while (n > 2)
     90 	{
     91 	  mp_size_t nn;
     92 
     93 	  ASSERT (n > s);
     94 	  ASSERT (n <= 2*s);
     95 
     96 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
     97 	  if (!nn)
     98 	    break;
     99 
    100 	  n = nn;
    101 	  success = 1;
    102 
    103 	  /* We can truncate and discard the lower p bits whenever nbits <=
    104 	     2*sbits - p. To account for the truncation error, we must
    105 	     adjust
    106 
    107 	     sbits <-- sbits + 1 - p,
    108 
    109 	     rather than just sbits <-- sbits - p. This adjustment makes
    110 	     the produced matrix slightly smaller than it could be. */
    111 
    112 	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
    113 	    {
    114 	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
    115 
    116 	      if (extra_bits == 0)
    117 		{
    118 		  /* We cross a limb boundary and bump s. We can't do that
    119 		     if the result is that it makes makes min(U, V)
    120 		     smaller than 2^{GMP_NUMB_BITS} s. */
    121 		  if (s + 1 == n
    122 		      || mpn_zero_p (ap + s + 1, n - s - 1)
    123 		      || mpn_zero_p (bp + s + 1, n - s - 1))
    124 		    continue;
    125 
    126 		  extra_bits = GMP_NUMB_BITS - 1;
    127 		  s++;
    128 		}
    129 	      else
    130 		{
    131 		  extra_bits--;
    132 		}
    133 
    134 	      /* Drop the p least significant limbs */
    135 	      ap += p; bp += p; n -= p; s -= p;
    136 	    }
    137 	}
    138 
    139       ASSERT (s > 0);
    140 
    141       if (extra_bits > 0)
    142 	{
    143 	  /* We can get here only of we have dropped at least one of the least
    144 	     significant bits, so we can decrement ap and bp. We can then shift
    145 	     left extra bits using mpn_rshift. */
    146 	  /* NOTE: In the unlikely case that n is large, it would be preferable
    147 	     to do an initial subdiv step to reduce the size before shifting,
    148 	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
    149 	     count rather than a limb count. */
    150 	  ap--; bp--;
    151 	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
    152 	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
    153 	  n += (ap[n] | bp[n]) > 0;
    154 
    155 	  ASSERT (success);
    156 
    157 	  while (n > 2)
    158 	    {
    159 	      mp_size_t nn;
    160 
    161 	      ASSERT (n > s);
    162 	      ASSERT (n <= 2*s);
    163 
    164 	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    165 
    166 	      if (!nn)
    167 		return 1;
    168 
    169 	      n = nn;
    170 	    }
    171 	}
    172 
    173       if (n == 2)
    174 	{
    175 	  struct hgcd_matrix1 M1;
    176 	  ASSERT (s == 1);
    177 
    178 	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
    179 	    {
    180 	      /* Multiply M <- M * M1 */
    181 	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
    182 	      success = 1;
    183 	    }
    184 	}
    185       return success;
    186     }
    187   else
    188     {
    189       mp_size_t n2 = (3*n)/4 + 1;
    190       mp_size_t p = n/2;
    191       mp_size_t nn;
    192 
    193       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
    194       if (nn)
    195 	{
    196 	  n = nn;
    197 	  /* FIXME: Discard some of the low limbs immediately? */
    198 	  success = 1;
    199 	}
    200 
    201       while (n > n2)
    202 	{
    203 	  mp_size_t nn;
    204 
    205 	  /* Needs n + 1 storage */
    206 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    207 	  if (!nn)
    208 	    return success;
    209 
    210 	  n = nn;
    211 	  success = 1;
    212 	}
    213       if (n > s + 2)
    214 	{
    215 	  struct hgcd_matrix M1;
    216 	  mp_size_t scratch;
    217 
    218 	  p = 2*s - n + 1;
    219 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
    220 
    221 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
    222 	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
    223 	    {
    224 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
    225 	      ASSERT (M->n + 2 >= M1.n);
    226 
    227 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
    228 		 then either q or q + 1 is a correct quotient, and M1 will
    229 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
    230 		 rules out the case that the size of M * M1 is much
    231 		 smaller than the expected M->n + M1->n. */
    232 
    233 	      ASSERT (M->n + M1.n < M->alloc);
    234 
    235 	      /* We need a bound for of M->n + M1.n. Let n be the original
    236 		 input size. Then
    237 
    238 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
    239 
    240 		 and it follows that
    241 
    242 		 M.n + M1.n <= ceil(n/2) + 1
    243 
    244 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
    245 		 amount of needed scratch space. */
    246 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
    247 	      return 1;
    248 	    }
    249 	}
    250 
    251       for(;;)
    252 	{
    253 	  mp_size_t nn;
    254 
    255 	  ASSERT (n > s);
    256 	  ASSERT (n <= 2*s);
    257 
    258 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    259 
    260 	  if (!nn)
    261 	    return success;
    262 
    263 	  n = nn;
    264 	  success = 1;
    265 	}
    266     }
    267 }
    268