Home | History | Annotate | Line # | Download | only in generic
gcdext_lehmer.c revision 1.1.1.3
      1 /* mpn_gcdext -- Extended Greatest Common Divisor.
      2 
      3 Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
      4 Inc.
      5 
      6 This file is part of the GNU MP Library.
      7 
      8 The GNU MP Library is free software; you can redistribute it and/or modify
      9 it under the terms of either:
     10 
     11   * the GNU Lesser General Public License as published by the Free
     12     Software Foundation; either version 3 of the License, or (at your
     13     option) any later version.
     14 
     15 or
     16 
     17   * the GNU General Public License as published by the Free Software
     18     Foundation; either version 2 of the License, or (at your option) any
     19     later version.
     20 
     21 or both in parallel, as here.
     22 
     23 The GNU MP Library is distributed in the hope that it will be useful, but
     24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 for more details.
     27 
     28 You should have received copies of the GNU General Public License and the
     29 GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 see https://www.gnu.org/licenses/.  */
     31 
     32 #include "gmp.h"
     33 #include "gmp-impl.h"
     34 #include "longlong.h"
     35 
     36 /* Here, d is the index of the cofactor to update. FIXME: Could use qn
     37    = 0 for the common case q = 1. */
     38 void
     39 mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
     40 		 mp_srcptr qp, mp_size_t qn, int d)
     41 {
     42   struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
     43   mp_size_t un = ctx->un;
     44 
     45   if (gp)
     46     {
     47       mp_srcptr up;
     48 
     49       ASSERT (gn > 0);
     50       ASSERT (gp[gn-1] > 0);
     51 
     52       MPN_COPY (ctx->gp, gp, gn);
     53       ctx->gn = gn;
     54 
     55       if (d < 0)
     56 	{
     57 	  int c;
     58 
     59 	  /* Must return the smallest cofactor, +u1 or -u0 */
     60 	  MPN_CMP (c, ctx->u0, ctx->u1, un);
     61 	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
     62 
     63 	  d = c < 0;
     64 	}
     65 
     66       up = d ? ctx->u0 : ctx->u1;
     67 
     68       MPN_NORMALIZE (up, un);
     69       MPN_COPY (ctx->up, up, un);
     70 
     71       *ctx->usize = d ? -un : un;
     72     }
     73   else
     74     {
     75       mp_limb_t cy;
     76       mp_ptr u0 = ctx->u0;
     77       mp_ptr u1 = ctx->u1;
     78 
     79       ASSERT (d >= 0);
     80 
     81       if (d)
     82 	MP_PTR_SWAP (u0, u1);
     83 
     84       qn -= (qp[qn-1] == 0);
     85 
     86       /* Update u0 += q  * u1 */
     87       if (qn == 1)
     88 	{
     89 	  mp_limb_t q = qp[0];
     90 
     91 	  if (q == 1)
     92 	    /* A common case. */
     93 	    cy = mpn_add_n (u0, u0, u1, un);
     94 	  else
     95 	    cy = mpn_addmul_1 (u0, u1, un, q);
     96 	}
     97       else
     98 	{
     99 	  mp_size_t u1n;
    100 	  mp_ptr tp;
    101 
    102 	  u1n = un;
    103 	  MPN_NORMALIZE (u1, u1n);
    104 
    105 	  if (u1n == 0)
    106 	    return;
    107 
    108 	  /* Should always have u1n == un here, and u1 >= u0. The
    109 	     reason is that we alternate adding u0 to u1 and u1 to u0
    110 	     (corresponding to subtractions a - b and b - a), and we
    111 	     can get a large quotient only just after a switch, which
    112 	     means that we'll add (a multiple of) the larger u to the
    113 	     smaller. */
    114 
    115 	  tp = ctx->tp;
    116 
    117 	  if (qn > u1n)
    118 	    mpn_mul (tp, qp, qn, u1, u1n);
    119 	  else
    120 	    mpn_mul (tp, u1, u1n, qp, qn);
    121 
    122 	  u1n += qn;
    123 	  u1n -= tp[u1n-1] == 0;
    124 
    125 	  if (u1n >= un)
    126 	    {
    127 	      cy = mpn_add (u0, tp, u1n, u0, un);
    128 	      un = u1n;
    129 	    }
    130 	  else
    131 	    /* Note: Unlikely case, maybe never happens? */
    132 	    cy = mpn_add (u0, u0, un, tp, u1n);
    133 
    134 	}
    135       u0[un] = cy;
    136       ctx->un = un + (cy > 0);
    137     }
    138 }
    139 
    140 /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
    141    the matrix-vector multiplication adjusting a, b. If hgcd fails, we
    142    need at most n for the quotient and n+1 for the u update (reusing
    143    the extra u). In all, 4n + 3. */
    144 
    145 mp_size_t
    146 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    147 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
    148 		     mp_ptr tp)
    149 {
    150   mp_size_t ualloc = n + 1;
    151 
    152   /* Keeps track of the second row of the reduction matrix
    153    *
    154    *   M = (v0, v1 ; u0, u1)
    155    *
    156    * which correspond to the first column of the inverse
    157    *
    158    *   M^{-1} = (u1, -v1; -u0, v0)
    159    *
    160    * This implies that
    161    *
    162    *   a =  u1 A (mod B)
    163    *   b = -u0 A (mod B)
    164    *
    165    * where A, B denotes the input values.
    166    */
    167 
    168   struct gcdext_ctx ctx;
    169   mp_size_t un;
    170   mp_ptr u0;
    171   mp_ptr u1;
    172   mp_ptr u2;
    173 
    174   MPN_ZERO (tp, 3*ualloc);
    175   u0 = tp; tp += ualloc;
    176   u1 = tp; tp += ualloc;
    177   u2 = tp; tp += ualloc;
    178 
    179   u1[0] = 1; un = 1;
    180 
    181   ctx.gp = gp;
    182   ctx.up = up;
    183   ctx.usize = usize;
    184 
    185   /* FIXME: Handle n == 2 differently, after the loop? */
    186   while (n >= 2)
    187     {
    188       struct hgcd_matrix1 M;
    189       mp_limb_t ah, al, bh, bl;
    190       mp_limb_t mask;
    191 
    192       mask = ap[n-1] | bp[n-1];
    193       ASSERT (mask > 0);
    194 
    195       if (mask & GMP_NUMB_HIGHBIT)
    196 	{
    197 	  ah = ap[n-1]; al = ap[n-2];
    198 	  bh = bp[n-1]; bl = bp[n-2];
    199 	}
    200       else if (n == 2)
    201 	{
    202 	  /* We use the full inputs without truncation, so we can
    203 	     safely shift left. */
    204 	  int shift;
    205 
    206 	  count_leading_zeros (shift, mask);
    207 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
    208 	  al = ap[0] << shift;
    209 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
    210 	  bl = bp[0] << shift;
    211 	}
    212       else
    213 	{
    214 	  int shift;
    215 
    216 	  count_leading_zeros (shift, mask);
    217 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
    218 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
    219 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
    220 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
    221 	}
    222 
    223       /* Try an mpn_nhgcd2 step */
    224       if (mpn_hgcd2 (ah, al, bh, bl, &M))
    225 	{
    226 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
    227 	  MP_PTR_SWAP (ap, tp);
    228 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
    229 	  MP_PTR_SWAP (u0, u2);
    230 	}
    231       else
    232 	{
    233 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
    234 	     small, or the difference is very small. Perform one
    235 	     subtraction followed by one division. */
    236 	  ctx.u0 = u0;
    237 	  ctx.u1 = u1;
    238 	  ctx.tp = u2;
    239 	  ctx.un = un;
    240 
    241 	  /* Temporary storage n for the quotient and ualloc for the
    242 	     new cofactor. */
    243 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
    244 	  if (n == 0)
    245 	    return ctx.gn;
    246 
    247 	  un = ctx.un;
    248 	}
    249     }
    250   ASSERT_ALWAYS (ap[0] > 0);
    251   ASSERT_ALWAYS (bp[0] > 0);
    252 
    253   if (ap[0] == bp[0])
    254     {
    255       int c;
    256 
    257       /* Which cofactor to return now? Candidates are +u1 and -u0,
    258 	 depending on which of a and b was most recently reduced,
    259 	 which we don't keep track of. So compare and get the smallest
    260 	 one. */
    261 
    262       gp[0] = ap[0];
    263 
    264       MPN_CMP (c, u0, u1, un);
    265       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
    266       if (c < 0)
    267 	{
    268 	  MPN_NORMALIZE (u0, un);
    269 	  MPN_COPY (up, u0, un);
    270 	  *usize = -un;
    271 	}
    272       else
    273 	{
    274 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
    275 	  MPN_COPY (up, u1, un);
    276 	  *usize = un;
    277 	}
    278       return 1;
    279     }
    280   else
    281     {
    282       mp_limb_t uh, vh;
    283       mp_limb_signed_t u;
    284       mp_limb_signed_t v;
    285       int negate;
    286 
    287       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
    288 
    289       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
    290 	 two limbs. */
    291 
    292       if (u == 0)
    293 	{
    294 	  ASSERT (v == 1);
    295 	  MPN_NORMALIZE (u0, un);
    296 	  MPN_COPY (up, u0, un);
    297 	  *usize = -un;
    298 	  return 1;
    299 	}
    300       else if (v == 0)
    301 	{
    302 	  ASSERT (u == 1);
    303 	  MPN_NORMALIZE (u1, un);
    304 	  MPN_COPY (up, u1, un);
    305 	  *usize = un;
    306 	  return 1;
    307 	}
    308       else if (u > 0)
    309 	{
    310 	  negate = 0;
    311 	  ASSERT (v < 0);
    312 	  v = -v;
    313 	}
    314       else
    315 	{
    316 	  negate = 1;
    317 	  ASSERT (v > 0);
    318 	  u = -u;
    319 	}
    320 
    321       uh = mpn_mul_1 (up, u1, un, u);
    322       vh = mpn_addmul_1 (up, u0, un, v);
    323 
    324       if ( (uh | vh) > 0)
    325 	{
    326 	  uh += vh;
    327 	  up[un++] = uh;
    328 	  if (uh < vh)
    329 	    up[un++] = 1;
    330 	}
    331 
    332       MPN_NORMALIZE_NOT_ZERO (up, un);
    333 
    334       *usize = negate ? -un : un;
    335       return 1;
    336     }
    337 }
    338