Home | History | Annotate | Line # | Download | only in generic
      1 /* mpn_gcdext -- Extended Greatest Common Divisor.
      2 
      3 Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
      4 
      5 This file is part of the GNU MP Library.
      6 
      7 The GNU MP Library is free software; you can redistribute it and/or modify
      8 it under the terms of either:
      9 
     10   * the GNU Lesser General Public License as published by the Free
     11     Software Foundation; either version 3 of the License, or (at your
     12     option) any later version.
     13 
     14 or
     15 
     16   * the GNU General Public License as published by the Free Software
     17     Foundation; either version 2 of the License, or (at your option) any
     18     later version.
     19 
     20 or both in parallel, as here.
     21 
     22 The GNU MP Library is distributed in the hope that it will be useful, but
     23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 for more details.
     26 
     27 You should have received copies of the GNU General Public License and the
     28 GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 see https://www.gnu.org/licenses/.  */
     30 
     31 #include "gmp-impl.h"
     32 #include "longlong.h"
     33 
     34 #ifndef GCDEXT_1_USE_BINARY
     35 #define GCDEXT_1_USE_BINARY 0
     36 #endif
     37 
     38 #ifndef GCDEXT_1_BINARY_METHOD
     39 #define GCDEXT_1_BINARY_METHOD 2
     40 #endif
     41 
     42 #if GCDEXT_1_USE_BINARY
     43 
     44 mp_limb_t
     45 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
     46 	      mp_limb_t u, mp_limb_t v)
     47 {
     48   /* Maintain
     49 
     50      U = t1 u + t0 v
     51      V = s1 u + s0 v
     52 
     53      where U, V are the inputs (without any shared power of two),
     54      and the matrix has determinant  2^{shift}.
     55   */
     56   mp_limb_t s0 = 1;
     57   mp_limb_t t0 = 0;
     58   mp_limb_t s1 = 0;
     59   mp_limb_t t1 = 1;
     60   mp_limb_t ug;
     61   mp_limb_t vg;
     62   mp_limb_t ugh;
     63   mp_limb_t vgh;
     64   unsigned zero_bits;
     65   unsigned shift;
     66   unsigned i;
     67 #if GCDEXT_1_BINARY_METHOD == 2
     68   mp_limb_t det_sign;
     69 #endif
     70 
     71   ASSERT (u > 0);
     72   ASSERT (v > 0);
     73 
     74   count_trailing_zeros (zero_bits, u | v);
     75   u >>= zero_bits;
     76   v >>= zero_bits;
     77 
     78   if ((u & 1) == 0)
     79     {
     80       count_trailing_zeros (shift, u);
     81       u >>= shift;
     82       t1 <<= shift;
     83     }
     84   else if ((v & 1) == 0)
     85     {
     86       count_trailing_zeros (shift, v);
     87       v >>= shift;
     88       s0 <<= shift;
     89     }
     90   else
     91     shift = 0;
     92 
     93 #if GCDEXT_1_BINARY_METHOD == 1
     94   while (u != v)
     95     {
     96       unsigned count;
     97       if (u > v)
     98 	{
     99 	  u -= v;
    100 
    101 	  count_trailing_zeros (count, u);
    102 	  u >>= count;
    103 
    104 	  t0 += t1; t1 <<= count;
    105 	  s0 += s1; s1 <<= count;
    106 	}
    107       else
    108 	{
    109 	  v -= u;
    110 
    111 	  count_trailing_zeros (count, v);
    112 	  v >>= count;
    113 
    114 	  t1 += t0; t0 <<= count;
    115 	  s1 += s0; s0 <<= count;
    116 	}
    117       shift += count;
    118     }
    119 #else
    120 # if GCDEXT_1_BINARY_METHOD == 2
    121   u >>= 1;
    122   v >>= 1;
    123 
    124   det_sign = 0;
    125 
    126   while (u != v)
    127     {
    128       unsigned count;
    129       mp_limb_t d =  u - v;
    130       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
    131       mp_limb_t sx;
    132       mp_limb_t tx;
    133 
    134       /* When v <= u (vgtu == 0), the updates are:
    135 
    136 	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
    137 	   (t1, t0) <-- (t1 << count, t0 + t1)
    138 
    139 	 and when v > 0, the updates are
    140 
    141 	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
    142 	   (t1, t0) <-- (t0 << count, t0 + t1)
    143 
    144 	 and similarly for s1, s0
    145       */
    146 
    147       /* v <-- min (u, v) */
    148       v += (vgtu & d);
    149 
    150       /* u <-- |u - v| */
    151       u = (d ^ vgtu) - vgtu;
    152 
    153       /* Number of trailing zeros is the same no matter if we look at
    154        * d or u, but using d gives more parallelism. */
    155       count_trailing_zeros (count, d);
    156 
    157       det_sign ^= vgtu;
    158 
    159       tx = vgtu & (t0 - t1);
    160       sx = vgtu & (s0 - s1);
    161       t0 += t1;
    162       s0 += s1;
    163       t1 += tx;
    164       s1 += sx;
    165 
    166       count++;
    167       u >>= count;
    168       t1 <<= count;
    169       s1 <<= count;
    170       shift += count;
    171     }
    172   u = (u << 1) + 1;
    173 # else /* GCDEXT_1_BINARY_METHOD == 2 */
    174 #  error Unknown GCDEXT_1_BINARY_METHOD
    175 # endif
    176 #endif
    177 
    178   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
    179   ug = t0 + t1;
    180   vg = s0 + s1;
    181 
    182   ugh = ug/2 + (ug & 1);
    183   vgh = vg/2 + (vg & 1);
    184 
    185   /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
    186      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
    187   for (i = 0; i < shift; i++)
    188     {
    189       mp_limb_t mask = - ( (s0 | t0) & 1);
    190 
    191       s0 /= 2;
    192       t0 /= 2;
    193       s0 += mask & vgh;
    194       t0 += mask & ugh;
    195     }
    196 
    197   ASSERT_ALWAYS (s0 <= vg);
    198   ASSERT_ALWAYS (t0 <= ug);
    199 
    200   if (s0 > vg - s0)
    201     {
    202       s0 -= vg;
    203       t0 -= ug;
    204     }
    205 #if GCDEXT_1_BINARY_METHOD == 2
    206   /* Conditional negation. */
    207   s0 = (s0 ^ det_sign) - det_sign;
    208   t0 = (t0 ^ det_sign) - det_sign;
    209 #endif
    210   *sp = s0;
    211   *tp = -t0;
    212 
    213   return u << zero_bits;
    214 }
    215 
    216 #else /* !GCDEXT_1_USE_BINARY */
    217 
    218 
    219 /* FIXME: Takes two single-word limbs. It could be extended to a
    220  * function that accepts a bignum for the first input, and only
    221  * returns the first co-factor. */
    222 
    223 mp_limb_t
    224 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
    225 	      mp_limb_t a, mp_limb_t b)
    226 {
    227   /* Maintain
    228 
    229      a =  u0 A + v0 B
    230      b =  u1 A + v1 B
    231 
    232      where A, B are the original inputs.
    233   */
    234   mp_limb_signed_t u0 = 1;
    235   mp_limb_signed_t v0 = 0;
    236   mp_limb_signed_t u1 = 0;
    237   mp_limb_signed_t v1 = 1;
    238 
    239   ASSERT (a > 0);
    240   ASSERT (b > 0);
    241 
    242   if (a < b)
    243     goto divide_by_b;
    244 
    245   for (;;)
    246     {
    247       mp_limb_t q;
    248 
    249       q = a / b;
    250       a -= q * b;
    251 
    252       if (a == 0)
    253 	{
    254 	  *up = u1;
    255 	  *vp = v1;
    256 	  return b;
    257 	}
    258       u0 -= q * u1;
    259       v0 -= q * v1;
    260 
    261     divide_by_b:
    262       q = b / a;
    263       b -= q * a;
    264 
    265       if (b == 0)
    266 	{
    267 	  *up = u0;
    268 	  *vp = v0;
    269 	  return a;
    270 	}
    271       u1 -= q * u0;
    272       v1 -= q * v0;
    273     }
    274 }
    275 #endif /* !GCDEXT_1_USE_BINARY */
    276