Home | History | Annotate | Line # | Download | only in generic
gcdext_lehmer.c revision 1.1.1.1.8.1
      1          1.1  mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
      2          1.1  mrg 
      3  1.1.1.1.8.1  tls Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
      4  1.1.1.1.8.1  tls Software Foundation, Inc.
      5          1.1  mrg 
      6          1.1  mrg This file is part of the GNU MP Library.
      7          1.1  mrg 
      8          1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      9          1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     10          1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     11          1.1  mrg option) any later version.
     12          1.1  mrg 
     13          1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     14          1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15          1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16          1.1  mrg License for more details.
     17          1.1  mrg 
     18          1.1  mrg You should have received a copy of the GNU Lesser General Public License
     19          1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     20          1.1  mrg 
     21          1.1  mrg #include "gmp.h"
     22          1.1  mrg #include "gmp-impl.h"
     23          1.1  mrg #include "longlong.h"
     24          1.1  mrg 
     25  1.1.1.1.8.1  tls /* Here, d is the index of the cofactor to update. FIXME: Could use qn
     26  1.1.1.1.8.1  tls    = 0 for the common case q = 1. */
     27  1.1.1.1.8.1  tls void
     28  1.1.1.1.8.1  tls mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
     29  1.1.1.1.8.1  tls 		 mp_srcptr qp, mp_size_t qn, int d)
     30  1.1.1.1.8.1  tls {
     31  1.1.1.1.8.1  tls   struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
     32  1.1.1.1.8.1  tls   mp_size_t un = ctx->un;
     33  1.1.1.1.8.1  tls 
     34  1.1.1.1.8.1  tls   if (gp)
     35  1.1.1.1.8.1  tls     {
     36  1.1.1.1.8.1  tls       mp_srcptr up;
     37  1.1.1.1.8.1  tls 
     38  1.1.1.1.8.1  tls       ASSERT (gn > 0);
     39  1.1.1.1.8.1  tls       ASSERT (gp[gn-1] > 0);
     40  1.1.1.1.8.1  tls 
     41  1.1.1.1.8.1  tls       MPN_COPY (ctx->gp, gp, gn);
     42  1.1.1.1.8.1  tls       ctx->gn = gn;
     43  1.1.1.1.8.1  tls 
     44  1.1.1.1.8.1  tls       if (d < 0)
     45  1.1.1.1.8.1  tls 	{
     46  1.1.1.1.8.1  tls 	  int c;
     47  1.1.1.1.8.1  tls 
     48  1.1.1.1.8.1  tls 	  /* Must return the smallest cofactor, +u1 or -u0 */
     49  1.1.1.1.8.1  tls 	  MPN_CMP (c, ctx->u0, ctx->u1, un);
     50  1.1.1.1.8.1  tls 	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
     51  1.1.1.1.8.1  tls 
     52  1.1.1.1.8.1  tls 	  d = c < 0;
     53  1.1.1.1.8.1  tls 	}
     54  1.1.1.1.8.1  tls 
     55  1.1.1.1.8.1  tls       up = d ? ctx->u0 : ctx->u1;
     56  1.1.1.1.8.1  tls 
     57  1.1.1.1.8.1  tls       MPN_NORMALIZE (up, un);
     58  1.1.1.1.8.1  tls       MPN_COPY (ctx->up, up, un);
     59  1.1.1.1.8.1  tls 
     60  1.1.1.1.8.1  tls       *ctx->usize = d ? -un : un;
     61  1.1.1.1.8.1  tls     }
     62  1.1.1.1.8.1  tls   else
     63  1.1.1.1.8.1  tls     {
     64  1.1.1.1.8.1  tls       mp_limb_t cy;
     65  1.1.1.1.8.1  tls       mp_ptr u0 = ctx->u0;
     66  1.1.1.1.8.1  tls       mp_ptr u1 = ctx->u1;
     67  1.1.1.1.8.1  tls 
     68  1.1.1.1.8.1  tls       ASSERT (d >= 0);
     69  1.1.1.1.8.1  tls 
     70  1.1.1.1.8.1  tls       if (d)
     71  1.1.1.1.8.1  tls 	MP_PTR_SWAP (u0, u1);
     72  1.1.1.1.8.1  tls 
     73  1.1.1.1.8.1  tls       qn -= (qp[qn-1] == 0);
     74  1.1.1.1.8.1  tls 
     75  1.1.1.1.8.1  tls       /* Update u0 += q  * u1 */
     76  1.1.1.1.8.1  tls       if (qn == 1)
     77  1.1.1.1.8.1  tls 	{
     78  1.1.1.1.8.1  tls 	  mp_limb_t q = qp[0];
     79  1.1.1.1.8.1  tls 
     80  1.1.1.1.8.1  tls 	  if (q == 1)
     81  1.1.1.1.8.1  tls 	    /* A common case. */
     82  1.1.1.1.8.1  tls 	    cy = mpn_add_n (u0, u0, u1, un);
     83  1.1.1.1.8.1  tls 	  else
     84  1.1.1.1.8.1  tls 	    cy = mpn_addmul_1 (u0, u1, un, q);
     85  1.1.1.1.8.1  tls 	}
     86  1.1.1.1.8.1  tls       else
     87  1.1.1.1.8.1  tls 	{
     88  1.1.1.1.8.1  tls 	  mp_size_t u1n;
     89  1.1.1.1.8.1  tls 	  mp_ptr tp;
     90  1.1.1.1.8.1  tls 
     91  1.1.1.1.8.1  tls 	  u1n = un;
     92  1.1.1.1.8.1  tls 	  MPN_NORMALIZE (u1, u1n);
     93  1.1.1.1.8.1  tls 
     94  1.1.1.1.8.1  tls 	  if (u1n == 0)
     95  1.1.1.1.8.1  tls 	    return;
     96  1.1.1.1.8.1  tls 
     97  1.1.1.1.8.1  tls 	  /* Should always have u1n == un here, and u1 >= u0. The
     98  1.1.1.1.8.1  tls 	     reason is that we alternate adding u0 to u1 and u1 to u0
     99  1.1.1.1.8.1  tls 	     (corresponding to subtractions a - b and b - a), and we
    100  1.1.1.1.8.1  tls 	     can get a large quotient only just after a switch, which
    101  1.1.1.1.8.1  tls 	     means that we'll add (a multiple of) the larger u to the
    102  1.1.1.1.8.1  tls 	     smaller. */
    103  1.1.1.1.8.1  tls 
    104  1.1.1.1.8.1  tls 	  tp = ctx->tp;
    105  1.1.1.1.8.1  tls 
    106  1.1.1.1.8.1  tls 	  if (qn > u1n)
    107  1.1.1.1.8.1  tls 	    mpn_mul (tp, qp, qn, u1, u1n);
    108  1.1.1.1.8.1  tls 	  else
    109  1.1.1.1.8.1  tls 	    mpn_mul (tp, u1, u1n, qp, qn);
    110  1.1.1.1.8.1  tls 
    111  1.1.1.1.8.1  tls 	  u1n += qn;
    112  1.1.1.1.8.1  tls 	  u1n -= tp[u1n-1] == 0;
    113  1.1.1.1.8.1  tls 
    114  1.1.1.1.8.1  tls 	  if (u1n >= un)
    115  1.1.1.1.8.1  tls 	    {
    116  1.1.1.1.8.1  tls 	      cy = mpn_add (u0, tp, u1n, u0, un);
    117  1.1.1.1.8.1  tls 	      un = u1n;
    118  1.1.1.1.8.1  tls 	    }
    119  1.1.1.1.8.1  tls 	  else
    120  1.1.1.1.8.1  tls 	    /* Note: Unlikely case, maybe never happens? */
    121  1.1.1.1.8.1  tls 	    cy = mpn_add (u0, u0, un, tp, u1n);
    122  1.1.1.1.8.1  tls 
    123  1.1.1.1.8.1  tls 	}
    124  1.1.1.1.8.1  tls       u0[un] = cy;
    125  1.1.1.1.8.1  tls       ctx->un = un + (cy > 0);
    126  1.1.1.1.8.1  tls     }
    127  1.1.1.1.8.1  tls }
    128  1.1.1.1.8.1  tls 
    129  1.1.1.1.8.1  tls /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
    130  1.1.1.1.8.1  tls    the matrix-vector multiplication adjusting a, b. If hgcd fails, we
    131  1.1.1.1.8.1  tls    need at most n for the quotient and n+1 for the u update (reusing
    132  1.1.1.1.8.1  tls    the extra u). In all, 4n + 3. */
    133          1.1  mrg 
    134          1.1  mrg mp_size_t
    135          1.1  mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    136          1.1  mrg 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
    137          1.1  mrg 		     mp_ptr tp)
    138          1.1  mrg {
    139          1.1  mrg   mp_size_t ualloc = n + 1;
    140          1.1  mrg 
    141          1.1  mrg   /* Keeps track of the second row of the reduction matrix
    142          1.1  mrg    *
    143          1.1  mrg    *   M = (v0, v1 ; u0, u1)
    144          1.1  mrg    *
    145          1.1  mrg    * which correspond to the first column of the inverse
    146          1.1  mrg    *
    147          1.1  mrg    *   M^{-1} = (u1, -v1; -u0, v0)
    148  1.1.1.1.8.1  tls    *
    149  1.1.1.1.8.1  tls    * This implies that
    150  1.1.1.1.8.1  tls    *
    151  1.1.1.1.8.1  tls    *   a =  u1 A (mod B)
    152  1.1.1.1.8.1  tls    *   b = -u0 A (mod B)
    153  1.1.1.1.8.1  tls    *
    154  1.1.1.1.8.1  tls    * where A, B denotes the input values.
    155          1.1  mrg    */
    156          1.1  mrg 
    157  1.1.1.1.8.1  tls   struct gcdext_ctx ctx;
    158          1.1  mrg   mp_size_t un;
    159          1.1  mrg   mp_ptr u0;
    160          1.1  mrg   mp_ptr u1;
    161          1.1  mrg   mp_ptr u2;
    162          1.1  mrg 
    163          1.1  mrg   MPN_ZERO (tp, 3*ualloc);
    164          1.1  mrg   u0 = tp; tp += ualloc;
    165          1.1  mrg   u1 = tp; tp += ualloc;
    166          1.1  mrg   u2 = tp; tp += ualloc;
    167          1.1  mrg 
    168          1.1  mrg   u1[0] = 1; un = 1;
    169          1.1  mrg 
    170  1.1.1.1.8.1  tls   ctx.gp = gp;
    171  1.1.1.1.8.1  tls   ctx.up = up;
    172  1.1.1.1.8.1  tls   ctx.usize = usize;
    173  1.1.1.1.8.1  tls 
    174          1.1  mrg   /* FIXME: Handle n == 2 differently, after the loop? */
    175          1.1  mrg   while (n >= 2)
    176          1.1  mrg     {
    177          1.1  mrg       struct hgcd_matrix1 M;
    178          1.1  mrg       mp_limb_t ah, al, bh, bl;
    179          1.1  mrg       mp_limb_t mask;
    180          1.1  mrg 
    181          1.1  mrg       mask = ap[n-1] | bp[n-1];
    182          1.1  mrg       ASSERT (mask > 0);
    183          1.1  mrg 
    184          1.1  mrg       if (mask & GMP_NUMB_HIGHBIT)
    185          1.1  mrg 	{
    186          1.1  mrg 	  ah = ap[n-1]; al = ap[n-2];
    187          1.1  mrg 	  bh = bp[n-1]; bl = bp[n-2];
    188          1.1  mrg 	}
    189          1.1  mrg       else if (n == 2)
    190          1.1  mrg 	{
    191          1.1  mrg 	  /* We use the full inputs without truncation, so we can
    192          1.1  mrg 	     safely shift left. */
    193          1.1  mrg 	  int shift;
    194          1.1  mrg 
    195          1.1  mrg 	  count_leading_zeros (shift, mask);
    196          1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
    197          1.1  mrg 	  al = ap[0] << shift;
    198          1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
    199          1.1  mrg 	  bl = bp[0] << shift;
    200          1.1  mrg 	}
    201          1.1  mrg       else
    202          1.1  mrg 	{
    203          1.1  mrg 	  int shift;
    204          1.1  mrg 
    205          1.1  mrg 	  count_leading_zeros (shift, mask);
    206          1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
    207          1.1  mrg 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
    208          1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
    209          1.1  mrg 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
    210          1.1  mrg 	}
    211          1.1  mrg 
    212          1.1  mrg       /* Try an mpn_nhgcd2 step */
    213          1.1  mrg       if (mpn_hgcd2 (ah, al, bh, bl, &M))
    214          1.1  mrg 	{
    215  1.1.1.1.8.1  tls 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
    216          1.1  mrg 	  MP_PTR_SWAP (ap, tp);
    217          1.1  mrg 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
    218          1.1  mrg 	  MP_PTR_SWAP (u0, u2);
    219          1.1  mrg 	}
    220          1.1  mrg       else
    221          1.1  mrg 	{
    222          1.1  mrg 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
    223          1.1  mrg 	     small, or the difference is very small. Perform one
    224          1.1  mrg 	     subtraction followed by one division. */
    225  1.1.1.1.8.1  tls 	  ctx.u0 = u0;
    226  1.1.1.1.8.1  tls 	  ctx.u1 = u1;
    227  1.1.1.1.8.1  tls 	  ctx.tp = u2;
    228  1.1.1.1.8.1  tls 	  ctx.un = un;
    229          1.1  mrg 
    230          1.1  mrg 	  /* Temporary storage n for the quotient and ualloc for the
    231          1.1  mrg 	     new cofactor. */
    232  1.1.1.1.8.1  tls 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
    233          1.1  mrg 	  if (n == 0)
    234  1.1.1.1.8.1  tls 	    return ctx.gn;
    235          1.1  mrg 
    236  1.1.1.1.8.1  tls 	  un = ctx.un;
    237          1.1  mrg 	}
    238          1.1  mrg     }
    239          1.1  mrg   ASSERT_ALWAYS (ap[0] > 0);
    240          1.1  mrg   ASSERT_ALWAYS (bp[0] > 0);
    241          1.1  mrg 
    242          1.1  mrg   if (ap[0] == bp[0])
    243          1.1  mrg     {
    244          1.1  mrg       int c;
    245          1.1  mrg 
    246          1.1  mrg       /* Which cofactor to return now? Candidates are +u1 and -u0,
    247          1.1  mrg 	 depending on which of a and b was most recently reduced,
    248          1.1  mrg 	 which we don't keep track of. So compare and get the smallest
    249          1.1  mrg 	 one. */
    250          1.1  mrg 
    251          1.1  mrg       gp[0] = ap[0];
    252          1.1  mrg 
    253          1.1  mrg       MPN_CMP (c, u0, u1, un);
    254          1.1  mrg       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
    255          1.1  mrg       if (c < 0)
    256          1.1  mrg 	{
    257          1.1  mrg 	  MPN_NORMALIZE (u0, un);
    258          1.1  mrg 	  MPN_COPY (up, u0, un);
    259          1.1  mrg 	  *usize = -un;
    260          1.1  mrg 	}
    261          1.1  mrg       else
    262          1.1  mrg 	{
    263          1.1  mrg 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
    264          1.1  mrg 	  MPN_COPY (up, u1, un);
    265          1.1  mrg 	  *usize = un;
    266          1.1  mrg 	}
    267          1.1  mrg       return 1;
    268          1.1  mrg     }
    269          1.1  mrg   else
    270          1.1  mrg     {
    271          1.1  mrg       mp_limb_t uh, vh;
    272          1.1  mrg       mp_limb_signed_t u;
    273          1.1  mrg       mp_limb_signed_t v;
    274          1.1  mrg       int negate;
    275          1.1  mrg 
    276          1.1  mrg       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
    277          1.1  mrg 
    278          1.1  mrg       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
    279          1.1  mrg 	 two limbs. */
    280          1.1  mrg 
    281          1.1  mrg       if (u == 0)
    282          1.1  mrg 	{
    283          1.1  mrg 	  ASSERT (v == 1);
    284          1.1  mrg 	  MPN_NORMALIZE (u0, un);
    285          1.1  mrg 	  MPN_COPY (up, u0, un);
    286          1.1  mrg 	  *usize = -un;
    287          1.1  mrg 	  return 1;
    288          1.1  mrg 	}
    289          1.1  mrg       else if (v == 0)
    290          1.1  mrg 	{
    291          1.1  mrg 	  ASSERT (u == 1);
    292          1.1  mrg 	  MPN_NORMALIZE (u1, un);
    293          1.1  mrg 	  MPN_COPY (up, u1, un);
    294          1.1  mrg 	  *usize = un;
    295          1.1  mrg 	  return 1;
    296          1.1  mrg 	}
    297          1.1  mrg       else if (u > 0)
    298          1.1  mrg 	{
    299          1.1  mrg 	  negate = 0;
    300          1.1  mrg 	  ASSERT (v < 0);
    301          1.1  mrg 	  v = -v;
    302          1.1  mrg 	}
    303          1.1  mrg       else
    304          1.1  mrg 	{
    305          1.1  mrg 	  negate = 1;
    306          1.1  mrg 	  ASSERT (v > 0);
    307          1.1  mrg 	  u = -u;
    308          1.1  mrg 	}
    309          1.1  mrg 
    310          1.1  mrg       uh = mpn_mul_1 (up, u1, un, u);
    311          1.1  mrg       vh = mpn_addmul_1 (up, u0, un, v);
    312          1.1  mrg 
    313          1.1  mrg       if ( (uh | vh) > 0)
    314          1.1  mrg 	{
    315          1.1  mrg 	  uh += vh;
    316          1.1  mrg 	  up[un++] = uh;
    317          1.1  mrg 	  if (uh < vh)
    318          1.1  mrg 	    up[un++] = 1;
    319          1.1  mrg 	}
    320          1.1  mrg 
    321          1.1  mrg       MPN_NORMALIZE_NOT_ZERO (up, un);
    322          1.1  mrg 
    323          1.1  mrg       *usize = negate ? -un : un;
    324          1.1  mrg       return 1;
    325          1.1  mrg     }
    326          1.1  mrg }
    327