Home | History | Annotate | Line # | Download | only in generic
gcdext_lehmer.c revision 1.1.1.4
      1      1.1  mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
      2      1.1  mrg 
      3  1.1.1.3  mrg Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
      4  1.1.1.3  mrg Inc.
      5      1.1  mrg 
      6      1.1  mrg This file is part of the GNU MP Library.
      7      1.1  mrg 
      8      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1.1.3  mrg it under the terms of either:
     10  1.1.1.3  mrg 
     11  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     12  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     13  1.1.1.3  mrg     option) any later version.
     14  1.1.1.3  mrg 
     15  1.1.1.3  mrg or
     16  1.1.1.3  mrg 
     17  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     18  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     19  1.1.1.3  mrg     later version.
     20  1.1.1.3  mrg 
     21  1.1.1.3  mrg or both in parallel, as here.
     22      1.1  mrg 
     23      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     24      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26  1.1.1.3  mrg for more details.
     27      1.1  mrg 
     28  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     29  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     30  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     31      1.1  mrg 
     32      1.1  mrg #include "gmp-impl.h"
     33      1.1  mrg #include "longlong.h"
     34      1.1  mrg 
     35  1.1.1.2  mrg /* Here, d is the index of the cofactor to update. FIXME: Could use qn
     36  1.1.1.2  mrg    = 0 for the common case q = 1. */
     37  1.1.1.2  mrg void
     38  1.1.1.2  mrg mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
     39  1.1.1.2  mrg 		 mp_srcptr qp, mp_size_t qn, int d)
     40  1.1.1.2  mrg {
     41  1.1.1.2  mrg   struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
     42  1.1.1.2  mrg   mp_size_t un = ctx->un;
     43  1.1.1.2  mrg 
     44  1.1.1.2  mrg   if (gp)
     45  1.1.1.2  mrg     {
     46  1.1.1.2  mrg       mp_srcptr up;
     47  1.1.1.2  mrg 
     48  1.1.1.2  mrg       ASSERT (gn > 0);
     49  1.1.1.2  mrg       ASSERT (gp[gn-1] > 0);
     50  1.1.1.2  mrg 
     51  1.1.1.2  mrg       MPN_COPY (ctx->gp, gp, gn);
     52  1.1.1.2  mrg       ctx->gn = gn;
     53  1.1.1.2  mrg 
     54  1.1.1.2  mrg       if (d < 0)
     55  1.1.1.2  mrg 	{
     56  1.1.1.2  mrg 	  int c;
     57  1.1.1.2  mrg 
     58  1.1.1.2  mrg 	  /* Must return the smallest cofactor, +u1 or -u0 */
     59  1.1.1.2  mrg 	  MPN_CMP (c, ctx->u0, ctx->u1, un);
     60  1.1.1.2  mrg 	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
     61  1.1.1.2  mrg 
     62  1.1.1.2  mrg 	  d = c < 0;
     63  1.1.1.2  mrg 	}
     64  1.1.1.2  mrg 
     65  1.1.1.2  mrg       up = d ? ctx->u0 : ctx->u1;
     66  1.1.1.2  mrg 
     67  1.1.1.2  mrg       MPN_NORMALIZE (up, un);
     68  1.1.1.2  mrg       MPN_COPY (ctx->up, up, un);
     69  1.1.1.2  mrg 
     70  1.1.1.2  mrg       *ctx->usize = d ? -un : un;
     71  1.1.1.2  mrg     }
     72  1.1.1.2  mrg   else
     73  1.1.1.2  mrg     {
     74  1.1.1.2  mrg       mp_limb_t cy;
     75  1.1.1.2  mrg       mp_ptr u0 = ctx->u0;
     76  1.1.1.2  mrg       mp_ptr u1 = ctx->u1;
     77  1.1.1.2  mrg 
     78  1.1.1.2  mrg       ASSERT (d >= 0);
     79  1.1.1.2  mrg 
     80  1.1.1.2  mrg       if (d)
     81  1.1.1.2  mrg 	MP_PTR_SWAP (u0, u1);
     82  1.1.1.2  mrg 
     83  1.1.1.2  mrg       qn -= (qp[qn-1] == 0);
     84  1.1.1.2  mrg 
     85  1.1.1.2  mrg       /* Update u0 += q  * u1 */
     86  1.1.1.2  mrg       if (qn == 1)
     87  1.1.1.2  mrg 	{
     88  1.1.1.2  mrg 	  mp_limb_t q = qp[0];
     89  1.1.1.2  mrg 
     90  1.1.1.2  mrg 	  if (q == 1)
     91  1.1.1.2  mrg 	    /* A common case. */
     92  1.1.1.2  mrg 	    cy = mpn_add_n (u0, u0, u1, un);
     93  1.1.1.2  mrg 	  else
     94  1.1.1.2  mrg 	    cy = mpn_addmul_1 (u0, u1, un, q);
     95  1.1.1.2  mrg 	}
     96  1.1.1.2  mrg       else
     97  1.1.1.2  mrg 	{
     98  1.1.1.2  mrg 	  mp_size_t u1n;
     99  1.1.1.2  mrg 	  mp_ptr tp;
    100  1.1.1.2  mrg 
    101  1.1.1.2  mrg 	  u1n = un;
    102  1.1.1.2  mrg 	  MPN_NORMALIZE (u1, u1n);
    103  1.1.1.2  mrg 
    104  1.1.1.2  mrg 	  if (u1n == 0)
    105  1.1.1.2  mrg 	    return;
    106  1.1.1.2  mrg 
    107  1.1.1.2  mrg 	  /* Should always have u1n == un here, and u1 >= u0. The
    108  1.1.1.2  mrg 	     reason is that we alternate adding u0 to u1 and u1 to u0
    109  1.1.1.2  mrg 	     (corresponding to subtractions a - b and b - a), and we
    110  1.1.1.2  mrg 	     can get a large quotient only just after a switch, which
    111  1.1.1.2  mrg 	     means that we'll add (a multiple of) the larger u to the
    112  1.1.1.2  mrg 	     smaller. */
    113  1.1.1.2  mrg 
    114  1.1.1.2  mrg 	  tp = ctx->tp;
    115  1.1.1.2  mrg 
    116  1.1.1.2  mrg 	  if (qn > u1n)
    117  1.1.1.2  mrg 	    mpn_mul (tp, qp, qn, u1, u1n);
    118  1.1.1.2  mrg 	  else
    119  1.1.1.2  mrg 	    mpn_mul (tp, u1, u1n, qp, qn);
    120  1.1.1.2  mrg 
    121  1.1.1.2  mrg 	  u1n += qn;
    122  1.1.1.2  mrg 	  u1n -= tp[u1n-1] == 0;
    123  1.1.1.2  mrg 
    124  1.1.1.2  mrg 	  if (u1n >= un)
    125  1.1.1.2  mrg 	    {
    126  1.1.1.2  mrg 	      cy = mpn_add (u0, tp, u1n, u0, un);
    127  1.1.1.2  mrg 	      un = u1n;
    128  1.1.1.2  mrg 	    }
    129  1.1.1.2  mrg 	  else
    130  1.1.1.2  mrg 	    /* Note: Unlikely case, maybe never happens? */
    131  1.1.1.2  mrg 	    cy = mpn_add (u0, u0, un, tp, u1n);
    132  1.1.1.2  mrg 
    133  1.1.1.2  mrg 	}
    134  1.1.1.2  mrg       u0[un] = cy;
    135  1.1.1.2  mrg       ctx->un = un + (cy > 0);
    136  1.1.1.2  mrg     }
    137  1.1.1.2  mrg }
    138  1.1.1.2  mrg 
    139  1.1.1.2  mrg /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
    140  1.1.1.2  mrg    the matrix-vector multiplication adjusting a, b. If hgcd fails, we
    141  1.1.1.2  mrg    need at most n for the quotient and n+1 for the u update (reusing
    142  1.1.1.2  mrg    the extra u). In all, 4n + 3. */
    143      1.1  mrg 
    144      1.1  mrg mp_size_t
    145      1.1  mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    146      1.1  mrg 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
    147      1.1  mrg 		     mp_ptr tp)
    148      1.1  mrg {
    149      1.1  mrg   mp_size_t ualloc = n + 1;
    150      1.1  mrg 
    151      1.1  mrg   /* Keeps track of the second row of the reduction matrix
    152      1.1  mrg    *
    153      1.1  mrg    *   M = (v0, v1 ; u0, u1)
    154      1.1  mrg    *
    155      1.1  mrg    * which correspond to the first column of the inverse
    156      1.1  mrg    *
    157      1.1  mrg    *   M^{-1} = (u1, -v1; -u0, v0)
    158  1.1.1.2  mrg    *
    159  1.1.1.2  mrg    * This implies that
    160  1.1.1.2  mrg    *
    161  1.1.1.2  mrg    *   a =  u1 A (mod B)
    162  1.1.1.2  mrg    *   b = -u0 A (mod B)
    163  1.1.1.2  mrg    *
    164  1.1.1.2  mrg    * where A, B denotes the input values.
    165      1.1  mrg    */
    166      1.1  mrg 
    167  1.1.1.2  mrg   struct gcdext_ctx ctx;
    168      1.1  mrg   mp_size_t un;
    169      1.1  mrg   mp_ptr u0;
    170      1.1  mrg   mp_ptr u1;
    171      1.1  mrg   mp_ptr u2;
    172      1.1  mrg 
    173      1.1  mrg   MPN_ZERO (tp, 3*ualloc);
    174      1.1  mrg   u0 = tp; tp += ualloc;
    175      1.1  mrg   u1 = tp; tp += ualloc;
    176      1.1  mrg   u2 = tp; tp += ualloc;
    177      1.1  mrg 
    178      1.1  mrg   u1[0] = 1; un = 1;
    179      1.1  mrg 
    180  1.1.1.2  mrg   ctx.gp = gp;
    181  1.1.1.2  mrg   ctx.up = up;
    182  1.1.1.2  mrg   ctx.usize = usize;
    183  1.1.1.2  mrg 
    184      1.1  mrg   /* FIXME: Handle n == 2 differently, after the loop? */
    185      1.1  mrg   while (n >= 2)
    186      1.1  mrg     {
    187      1.1  mrg       struct hgcd_matrix1 M;
    188      1.1  mrg       mp_limb_t ah, al, bh, bl;
    189      1.1  mrg       mp_limb_t mask;
    190      1.1  mrg 
    191      1.1  mrg       mask = ap[n-1] | bp[n-1];
    192      1.1  mrg       ASSERT (mask > 0);
    193      1.1  mrg 
    194      1.1  mrg       if (mask & GMP_NUMB_HIGHBIT)
    195      1.1  mrg 	{
    196      1.1  mrg 	  ah = ap[n-1]; al = ap[n-2];
    197      1.1  mrg 	  bh = bp[n-1]; bl = bp[n-2];
    198      1.1  mrg 	}
    199      1.1  mrg       else if (n == 2)
    200      1.1  mrg 	{
    201      1.1  mrg 	  /* We use the full inputs without truncation, so we can
    202      1.1  mrg 	     safely shift left. */
    203      1.1  mrg 	  int shift;
    204      1.1  mrg 
    205      1.1  mrg 	  count_leading_zeros (shift, mask);
    206      1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
    207      1.1  mrg 	  al = ap[0] << shift;
    208      1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
    209      1.1  mrg 	  bl = bp[0] << shift;
    210      1.1  mrg 	}
    211      1.1  mrg       else
    212      1.1  mrg 	{
    213      1.1  mrg 	  int shift;
    214      1.1  mrg 
    215      1.1  mrg 	  count_leading_zeros (shift, mask);
    216      1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
    217      1.1  mrg 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
    218      1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
    219      1.1  mrg 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
    220      1.1  mrg 	}
    221      1.1  mrg 
    222      1.1  mrg       /* Try an mpn_nhgcd2 step */
    223      1.1  mrg       if (mpn_hgcd2 (ah, al, bh, bl, &M))
    224      1.1  mrg 	{
    225  1.1.1.2  mrg 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
    226      1.1  mrg 	  MP_PTR_SWAP (ap, tp);
    227      1.1  mrg 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
    228      1.1  mrg 	  MP_PTR_SWAP (u0, u2);
    229      1.1  mrg 	}
    230      1.1  mrg       else
    231      1.1  mrg 	{
    232      1.1  mrg 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
    233      1.1  mrg 	     small, or the difference is very small. Perform one
    234      1.1  mrg 	     subtraction followed by one division. */
    235  1.1.1.2  mrg 	  ctx.u0 = u0;
    236  1.1.1.2  mrg 	  ctx.u1 = u1;
    237  1.1.1.2  mrg 	  ctx.tp = u2;
    238  1.1.1.2  mrg 	  ctx.un = un;
    239      1.1  mrg 
    240      1.1  mrg 	  /* Temporary storage n for the quotient and ualloc for the
    241      1.1  mrg 	     new cofactor. */
    242  1.1.1.2  mrg 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
    243      1.1  mrg 	  if (n == 0)
    244  1.1.1.2  mrg 	    return ctx.gn;
    245      1.1  mrg 
    246  1.1.1.2  mrg 	  un = ctx.un;
    247      1.1  mrg 	}
    248      1.1  mrg     }
    249      1.1  mrg   ASSERT_ALWAYS (ap[0] > 0);
    250      1.1  mrg   ASSERT_ALWAYS (bp[0] > 0);
    251      1.1  mrg 
    252      1.1  mrg   if (ap[0] == bp[0])
    253      1.1  mrg     {
    254      1.1  mrg       int c;
    255      1.1  mrg 
    256      1.1  mrg       /* Which cofactor to return now? Candidates are +u1 and -u0,
    257      1.1  mrg 	 depending on which of a and b was most recently reduced,
    258      1.1  mrg 	 which we don't keep track of. So compare and get the smallest
    259      1.1  mrg 	 one. */
    260      1.1  mrg 
    261      1.1  mrg       gp[0] = ap[0];
    262      1.1  mrg 
    263      1.1  mrg       MPN_CMP (c, u0, u1, un);
    264      1.1  mrg       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
    265      1.1  mrg       if (c < 0)
    266      1.1  mrg 	{
    267      1.1  mrg 	  MPN_NORMALIZE (u0, un);
    268      1.1  mrg 	  MPN_COPY (up, u0, un);
    269      1.1  mrg 	  *usize = -un;
    270      1.1  mrg 	}
    271      1.1  mrg       else
    272      1.1  mrg 	{
    273      1.1  mrg 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
    274      1.1  mrg 	  MPN_COPY (up, u1, un);
    275      1.1  mrg 	  *usize = un;
    276      1.1  mrg 	}
    277      1.1  mrg       return 1;
    278      1.1  mrg     }
    279      1.1  mrg   else
    280      1.1  mrg     {
    281      1.1  mrg       mp_limb_t uh, vh;
    282      1.1  mrg       mp_limb_signed_t u;
    283      1.1  mrg       mp_limb_signed_t v;
    284      1.1  mrg       int negate;
    285      1.1  mrg 
    286      1.1  mrg       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
    287      1.1  mrg 
    288      1.1  mrg       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
    289      1.1  mrg 	 two limbs. */
    290      1.1  mrg 
    291      1.1  mrg       if (u == 0)
    292      1.1  mrg 	{
    293      1.1  mrg 	  ASSERT (v == 1);
    294      1.1  mrg 	  MPN_NORMALIZE (u0, un);
    295      1.1  mrg 	  MPN_COPY (up, u0, un);
    296      1.1  mrg 	  *usize = -un;
    297      1.1  mrg 	  return 1;
    298      1.1  mrg 	}
    299      1.1  mrg       else if (v == 0)
    300      1.1  mrg 	{
    301      1.1  mrg 	  ASSERT (u == 1);
    302      1.1  mrg 	  MPN_NORMALIZE (u1, un);
    303      1.1  mrg 	  MPN_COPY (up, u1, un);
    304      1.1  mrg 	  *usize = un;
    305      1.1  mrg 	  return 1;
    306      1.1  mrg 	}
    307      1.1  mrg       else if (u > 0)
    308      1.1  mrg 	{
    309      1.1  mrg 	  negate = 0;
    310      1.1  mrg 	  ASSERT (v < 0);
    311      1.1  mrg 	  v = -v;
    312      1.1  mrg 	}
    313      1.1  mrg       else
    314      1.1  mrg 	{
    315      1.1  mrg 	  negate = 1;
    316      1.1  mrg 	  ASSERT (v > 0);
    317      1.1  mrg 	  u = -u;
    318      1.1  mrg 	}
    319      1.1  mrg 
    320      1.1  mrg       uh = mpn_mul_1 (up, u1, un, u);
    321      1.1  mrg       vh = mpn_addmul_1 (up, u0, un, v);
    322      1.1  mrg 
    323      1.1  mrg       if ( (uh | vh) > 0)
    324      1.1  mrg 	{
    325      1.1  mrg 	  uh += vh;
    326      1.1  mrg 	  up[un++] = uh;
    327      1.1  mrg 	  if (uh < vh)
    328      1.1  mrg 	    up[un++] = 1;
    329      1.1  mrg 	}
    330      1.1  mrg 
    331      1.1  mrg       MPN_NORMALIZE_NOT_ZERO (up, un);
    332      1.1  mrg 
    333      1.1  mrg       *usize = negate ? -un : un;
    334      1.1  mrg       return 1;
    335      1.1  mrg     }
    336      1.1  mrg }
    337