Home | History | Annotate | Line # | Download | only in generic
gcdext_lehmer.c revision 1.1.1.4
      1 /* mpn_gcdext -- Extended Greatest Common Divisor.
      2 
      3 Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
      4 Inc.
      5 
      6 This file is part of the GNU MP Library.
      7 
      8 The GNU MP Library is free software; you can redistribute it and/or modify
      9 it under the terms of either:
     10 
     11   * the GNU Lesser General Public License as published by the Free
     12     Software Foundation; either version 3 of the License, or (at your
     13     option) any later version.
     14 
     15 or
     16 
     17   * the GNU General Public License as published by the Free Software
     18     Foundation; either version 2 of the License, or (at your option) any
     19     later version.
     20 
     21 or both in parallel, as here.
     22 
     23 The GNU MP Library is distributed in the hope that it will be useful, but
     24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 for more details.
     27 
     28 You should have received copies of the GNU General Public License and the
     29 GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 see https://www.gnu.org/licenses/.  */
     31 
     32 #include "gmp-impl.h"
     33 #include "longlong.h"
     34 
     35 /* Here, d is the index of the cofactor to update. FIXME: Could use qn
     36    = 0 for the common case q = 1. */
     37 void
     38 mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
     39 		 mp_srcptr qp, mp_size_t qn, int d)
     40 {
     41   struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
     42   mp_size_t un = ctx->un;
     43 
     44   if (gp)
     45     {
     46       mp_srcptr up;
     47 
     48       ASSERT (gn > 0);
     49       ASSERT (gp[gn-1] > 0);
     50 
     51       MPN_COPY (ctx->gp, gp, gn);
     52       ctx->gn = gn;
     53 
     54       if (d < 0)
     55 	{
     56 	  int c;
     57 
     58 	  /* Must return the smallest cofactor, +u1 or -u0 */
     59 	  MPN_CMP (c, ctx->u0, ctx->u1, un);
     60 	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
     61 
     62 	  d = c < 0;
     63 	}
     64 
     65       up = d ? ctx->u0 : ctx->u1;
     66 
     67       MPN_NORMALIZE (up, un);
     68       MPN_COPY (ctx->up, up, un);
     69 
     70       *ctx->usize = d ? -un : un;
     71     }
     72   else
     73     {
     74       mp_limb_t cy;
     75       mp_ptr u0 = ctx->u0;
     76       mp_ptr u1 = ctx->u1;
     77 
     78       ASSERT (d >= 0);
     79 
     80       if (d)
     81 	MP_PTR_SWAP (u0, u1);
     82 
     83       qn -= (qp[qn-1] == 0);
     84 
     85       /* Update u0 += q  * u1 */
     86       if (qn == 1)
     87 	{
     88 	  mp_limb_t q = qp[0];
     89 
     90 	  if (q == 1)
     91 	    /* A common case. */
     92 	    cy = mpn_add_n (u0, u0, u1, un);
     93 	  else
     94 	    cy = mpn_addmul_1 (u0, u1, un, q);
     95 	}
     96       else
     97 	{
     98 	  mp_size_t u1n;
     99 	  mp_ptr tp;
    100 
    101 	  u1n = un;
    102 	  MPN_NORMALIZE (u1, u1n);
    103 
    104 	  if (u1n == 0)
    105 	    return;
    106 
    107 	  /* Should always have u1n == un here, and u1 >= u0. The
    108 	     reason is that we alternate adding u0 to u1 and u1 to u0
    109 	     (corresponding to subtractions a - b and b - a), and we
    110 	     can get a large quotient only just after a switch, which
    111 	     means that we'll add (a multiple of) the larger u to the
    112 	     smaller. */
    113 
    114 	  tp = ctx->tp;
    115 
    116 	  if (qn > u1n)
    117 	    mpn_mul (tp, qp, qn, u1, u1n);
    118 	  else
    119 	    mpn_mul (tp, u1, u1n, qp, qn);
    120 
    121 	  u1n += qn;
    122 	  u1n -= tp[u1n-1] == 0;
    123 
    124 	  if (u1n >= un)
    125 	    {
    126 	      cy = mpn_add (u0, tp, u1n, u0, un);
    127 	      un = u1n;
    128 	    }
    129 	  else
    130 	    /* Note: Unlikely case, maybe never happens? */
    131 	    cy = mpn_add (u0, u0, un, tp, u1n);
    132 
    133 	}
    134       u0[un] = cy;
    135       ctx->un = un + (cy > 0);
    136     }
    137 }
    138 
    139 /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
    140    the matrix-vector multiplication adjusting a, b. If hgcd fails, we
    141    need at most n for the quotient and n+1 for the u update (reusing
    142    the extra u). In all, 4n + 3. */
    143 
    144 mp_size_t
    145 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    146 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
    147 		     mp_ptr tp)
    148 {
    149   mp_size_t ualloc = n + 1;
    150 
    151   /* Keeps track of the second row of the reduction matrix
    152    *
    153    *   M = (v0, v1 ; u0, u1)
    154    *
    155    * which correspond to the first column of the inverse
    156    *
    157    *   M^{-1} = (u1, -v1; -u0, v0)
    158    *
    159    * This implies that
    160    *
    161    *   a =  u1 A (mod B)
    162    *   b = -u0 A (mod B)
    163    *
    164    * where A, B denotes the input values.
    165    */
    166 
    167   struct gcdext_ctx ctx;
    168   mp_size_t un;
    169   mp_ptr u0;
    170   mp_ptr u1;
    171   mp_ptr u2;
    172 
    173   MPN_ZERO (tp, 3*ualloc);
    174   u0 = tp; tp += ualloc;
    175   u1 = tp; tp += ualloc;
    176   u2 = tp; tp += ualloc;
    177 
    178   u1[0] = 1; un = 1;
    179 
    180   ctx.gp = gp;
    181   ctx.up = up;
    182   ctx.usize = usize;
    183 
    184   /* FIXME: Handle n == 2 differently, after the loop? */
    185   while (n >= 2)
    186     {
    187       struct hgcd_matrix1 M;
    188       mp_limb_t ah, al, bh, bl;
    189       mp_limb_t mask;
    190 
    191       mask = ap[n-1] | bp[n-1];
    192       ASSERT (mask > 0);
    193 
    194       if (mask & GMP_NUMB_HIGHBIT)
    195 	{
    196 	  ah = ap[n-1]; al = ap[n-2];
    197 	  bh = bp[n-1]; bl = bp[n-2];
    198 	}
    199       else if (n == 2)
    200 	{
    201 	  /* We use the full inputs without truncation, so we can
    202 	     safely shift left. */
    203 	  int shift;
    204 
    205 	  count_leading_zeros (shift, mask);
    206 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
    207 	  al = ap[0] << shift;
    208 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
    209 	  bl = bp[0] << shift;
    210 	}
    211       else
    212 	{
    213 	  int shift;
    214 
    215 	  count_leading_zeros (shift, mask);
    216 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
    217 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
    218 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
    219 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
    220 	}
    221 
    222       /* Try an mpn_nhgcd2 step */
    223       if (mpn_hgcd2 (ah, al, bh, bl, &M))
    224 	{
    225 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
    226 	  MP_PTR_SWAP (ap, tp);
    227 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
    228 	  MP_PTR_SWAP (u0, u2);
    229 	}
    230       else
    231 	{
    232 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
    233 	     small, or the difference is very small. Perform one
    234 	     subtraction followed by one division. */
    235 	  ctx.u0 = u0;
    236 	  ctx.u1 = u1;
    237 	  ctx.tp = u2;
    238 	  ctx.un = un;
    239 
    240 	  /* Temporary storage n for the quotient and ualloc for the
    241 	     new cofactor. */
    242 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
    243 	  if (n == 0)
    244 	    return ctx.gn;
    245 
    246 	  un = ctx.un;
    247 	}
    248     }
    249   ASSERT_ALWAYS (ap[0] > 0);
    250   ASSERT_ALWAYS (bp[0] > 0);
    251 
    252   if (ap[0] == bp[0])
    253     {
    254       int c;
    255 
    256       /* Which cofactor to return now? Candidates are +u1 and -u0,
    257 	 depending on which of a and b was most recently reduced,
    258 	 which we don't keep track of. So compare and get the smallest
    259 	 one. */
    260 
    261       gp[0] = ap[0];
    262 
    263       MPN_CMP (c, u0, u1, un);
    264       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
    265       if (c < 0)
    266 	{
    267 	  MPN_NORMALIZE (u0, un);
    268 	  MPN_COPY (up, u0, un);
    269 	  *usize = -un;
    270 	}
    271       else
    272 	{
    273 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
    274 	  MPN_COPY (up, u1, un);
    275 	  *usize = un;
    276 	}
    277       return 1;
    278     }
    279   else
    280     {
    281       mp_limb_t uh, vh;
    282       mp_limb_signed_t u;
    283       mp_limb_signed_t v;
    284       int negate;
    285 
    286       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
    287 
    288       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
    289 	 two limbs. */
    290 
    291       if (u == 0)
    292 	{
    293 	  ASSERT (v == 1);
    294 	  MPN_NORMALIZE (u0, un);
    295 	  MPN_COPY (up, u0, un);
    296 	  *usize = -un;
    297 	  return 1;
    298 	}
    299       else if (v == 0)
    300 	{
    301 	  ASSERT (u == 1);
    302 	  MPN_NORMALIZE (u1, un);
    303 	  MPN_COPY (up, u1, un);
    304 	  *usize = un;
    305 	  return 1;
    306 	}
    307       else if (u > 0)
    308 	{
    309 	  negate = 0;
    310 	  ASSERT (v < 0);
    311 	  v = -v;
    312 	}
    313       else
    314 	{
    315 	  negate = 1;
    316 	  ASSERT (v > 0);
    317 	  u = -u;
    318 	}
    319 
    320       uh = mpn_mul_1 (up, u1, un, u);
    321       vh = mpn_addmul_1 (up, u0, un, v);
    322 
    323       if ( (uh | vh) > 0)
    324 	{
    325 	  uh += vh;
    326 	  up[un++] = uh;
    327 	  if (uh < vh)
    328 	    up[un++] = 1;
    329 	}
    330 
    331       MPN_NORMALIZE_NOT_ZERO (up, un);
    332 
    333       *usize = negate ? -un : un;
    334       return 1;
    335     }
    336 }
    337