Home | History | Annotate | Line # | Download | only in generic
gcdext_lehmer.c revision 1.1.1.1
      1  1.1  mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
      2  1.1  mrg 
      3  1.1  mrg Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
      4  1.1  mrg Foundation, Inc.
      5  1.1  mrg 
      6  1.1  mrg This file is part of the GNU MP Library.
      7  1.1  mrg 
      8  1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     10  1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     11  1.1  mrg option) any later version.
     12  1.1  mrg 
     13  1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     14  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15  1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16  1.1  mrg License for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU Lesser General Public License
     19  1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     20  1.1  mrg 
     21  1.1  mrg #include "gmp.h"
     22  1.1  mrg #include "gmp-impl.h"
     23  1.1  mrg #include "longlong.h"
     24  1.1  mrg 
     25  1.1  mrg /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
     26  1.1  mrg    multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
     27  1.1  mrg    needed for the division, with most n for the quotient, and n+1 for
     28  1.1  mrg    the product q u0. In all, 4n + 3. */
     29  1.1  mrg 
     30  1.1  mrg mp_size_t
     31  1.1  mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
     32  1.1  mrg 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
     33  1.1  mrg 		     mp_ptr tp)
     34  1.1  mrg {
     35  1.1  mrg   mp_size_t ualloc = n + 1;
     36  1.1  mrg 
     37  1.1  mrg   /* Keeps track of the second row of the reduction matrix
     38  1.1  mrg    *
     39  1.1  mrg    *   M = (v0, v1 ; u0, u1)
     40  1.1  mrg    *
     41  1.1  mrg    * which correspond to the first column of the inverse
     42  1.1  mrg    *
     43  1.1  mrg    *   M^{-1} = (u1, -v1; -u0, v0)
     44  1.1  mrg    */
     45  1.1  mrg 
     46  1.1  mrg   mp_size_t un;
     47  1.1  mrg   mp_ptr u0;
     48  1.1  mrg   mp_ptr u1;
     49  1.1  mrg   mp_ptr u2;
     50  1.1  mrg 
     51  1.1  mrg   MPN_ZERO (tp, 3*ualloc);
     52  1.1  mrg   u0 = tp; tp += ualloc;
     53  1.1  mrg   u1 = tp; tp += ualloc;
     54  1.1  mrg   u2 = tp; tp += ualloc;
     55  1.1  mrg 
     56  1.1  mrg   u1[0] = 1; un = 1;
     57  1.1  mrg 
     58  1.1  mrg   /* FIXME: Handle n == 2 differently, after the loop? */
     59  1.1  mrg   while (n >= 2)
     60  1.1  mrg     {
     61  1.1  mrg       struct hgcd_matrix1 M;
     62  1.1  mrg       mp_limb_t ah, al, bh, bl;
     63  1.1  mrg       mp_limb_t mask;
     64  1.1  mrg 
     65  1.1  mrg       mask = ap[n-1] | bp[n-1];
     66  1.1  mrg       ASSERT (mask > 0);
     67  1.1  mrg 
     68  1.1  mrg       if (mask & GMP_NUMB_HIGHBIT)
     69  1.1  mrg 	{
     70  1.1  mrg 	  ah = ap[n-1]; al = ap[n-2];
     71  1.1  mrg 	  bh = bp[n-1]; bl = bp[n-2];
     72  1.1  mrg 	}
     73  1.1  mrg       else if (n == 2)
     74  1.1  mrg 	{
     75  1.1  mrg 	  /* We use the full inputs without truncation, so we can
     76  1.1  mrg 	     safely shift left. */
     77  1.1  mrg 	  int shift;
     78  1.1  mrg 
     79  1.1  mrg 	  count_leading_zeros (shift, mask);
     80  1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
     81  1.1  mrg 	  al = ap[0] << shift;
     82  1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
     83  1.1  mrg 	  bl = bp[0] << shift;
     84  1.1  mrg 	}
     85  1.1  mrg       else
     86  1.1  mrg 	{
     87  1.1  mrg 	  int shift;
     88  1.1  mrg 
     89  1.1  mrg 	  count_leading_zeros (shift, mask);
     90  1.1  mrg 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
     91  1.1  mrg 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
     92  1.1  mrg 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
     93  1.1  mrg 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
     94  1.1  mrg 	}
     95  1.1  mrg 
     96  1.1  mrg       /* Try an mpn_nhgcd2 step */
     97  1.1  mrg       if (mpn_hgcd2 (ah, al, bh, bl, &M))
     98  1.1  mrg 	{
     99  1.1  mrg 	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
    100  1.1  mrg 	  MP_PTR_SWAP (ap, tp);
    101  1.1  mrg 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
    102  1.1  mrg 	  MP_PTR_SWAP (u0, u2);
    103  1.1  mrg 	}
    104  1.1  mrg       else
    105  1.1  mrg 	{
    106  1.1  mrg 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
    107  1.1  mrg 	     small, or the difference is very small. Perform one
    108  1.1  mrg 	     subtraction followed by one division. */
    109  1.1  mrg 	  mp_size_t gn;
    110  1.1  mrg 	  mp_size_t updated_un = un;
    111  1.1  mrg 
    112  1.1  mrg 	  /* Temporary storage n for the quotient and ualloc for the
    113  1.1  mrg 	     new cofactor. */
    114  1.1  mrg 	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
    115  1.1  mrg 				      u0, u1, &updated_un, tp, u2);
    116  1.1  mrg 	  if (n == 0)
    117  1.1  mrg 	    return gn;
    118  1.1  mrg 
    119  1.1  mrg 	  un = updated_un;
    120  1.1  mrg 	}
    121  1.1  mrg     }
    122  1.1  mrg   ASSERT_ALWAYS (ap[0] > 0);
    123  1.1  mrg   ASSERT_ALWAYS (bp[0] > 0);
    124  1.1  mrg 
    125  1.1  mrg   if (ap[0] == bp[0])
    126  1.1  mrg     {
    127  1.1  mrg       int c;
    128  1.1  mrg 
    129  1.1  mrg       /* Which cofactor to return now? Candidates are +u1 and -u0,
    130  1.1  mrg 	 depending on which of a and b was most recently reduced,
    131  1.1  mrg 	 which we don't keep track of. So compare and get the smallest
    132  1.1  mrg 	 one. */
    133  1.1  mrg 
    134  1.1  mrg       gp[0] = ap[0];
    135  1.1  mrg 
    136  1.1  mrg       MPN_CMP (c, u0, u1, un);
    137  1.1  mrg       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
    138  1.1  mrg       if (c < 0)
    139  1.1  mrg 	{
    140  1.1  mrg 	  MPN_NORMALIZE (u0, un);
    141  1.1  mrg 	  MPN_COPY (up, u0, un);
    142  1.1  mrg 	  *usize = -un;
    143  1.1  mrg 	}
    144  1.1  mrg       else
    145  1.1  mrg 	{
    146  1.1  mrg 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
    147  1.1  mrg 	  MPN_COPY (up, u1, un);
    148  1.1  mrg 	  *usize = un;
    149  1.1  mrg 	}
    150  1.1  mrg       return 1;
    151  1.1  mrg     }
    152  1.1  mrg   else
    153  1.1  mrg     {
    154  1.1  mrg       mp_limb_t uh, vh;
    155  1.1  mrg       mp_limb_signed_t u;
    156  1.1  mrg       mp_limb_signed_t v;
    157  1.1  mrg       int negate;
    158  1.1  mrg 
    159  1.1  mrg       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
    160  1.1  mrg 
    161  1.1  mrg       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
    162  1.1  mrg 	 two limbs. */
    163  1.1  mrg 
    164  1.1  mrg       if (u == 0)
    165  1.1  mrg 	{
    166  1.1  mrg 	  ASSERT (v == 1);
    167  1.1  mrg 	  MPN_NORMALIZE (u0, un);
    168  1.1  mrg 	  MPN_COPY (up, u0, un);
    169  1.1  mrg 	  *usize = -un;
    170  1.1  mrg 	  return 1;
    171  1.1  mrg 	}
    172  1.1  mrg       else if (v == 0)
    173  1.1  mrg 	{
    174  1.1  mrg 	  ASSERT (u == 1);
    175  1.1  mrg 	  MPN_NORMALIZE (u1, un);
    176  1.1  mrg 	  MPN_COPY (up, u1, un);
    177  1.1  mrg 	  *usize = un;
    178  1.1  mrg 	  return 1;
    179  1.1  mrg 	}
    180  1.1  mrg       else if (u > 0)
    181  1.1  mrg 	{
    182  1.1  mrg 	  negate = 0;
    183  1.1  mrg 	  ASSERT (v < 0);
    184  1.1  mrg 	  v = -v;
    185  1.1  mrg 	}
    186  1.1  mrg       else
    187  1.1  mrg 	{
    188  1.1  mrg 	  negate = 1;
    189  1.1  mrg 	  ASSERT (v > 0);
    190  1.1  mrg 	  u = -u;
    191  1.1  mrg 	}
    192  1.1  mrg 
    193  1.1  mrg       uh = mpn_mul_1 (up, u1, un, u);
    194  1.1  mrg       vh = mpn_addmul_1 (up, u0, un, v);
    195  1.1  mrg 
    196  1.1  mrg       if ( (uh | vh) > 0)
    197  1.1  mrg 	{
    198  1.1  mrg 	  uh += vh;
    199  1.1  mrg 	  up[un++] = uh;
    200  1.1  mrg 	  if (uh < vh)
    201  1.1  mrg 	    up[un++] = 1;
    202  1.1  mrg 	}
    203  1.1  mrg 
    204  1.1  mrg       MPN_NORMALIZE_NOT_ZERO (up, un);
    205  1.1  mrg 
    206  1.1  mrg       *usize = negate ? -un : un;
    207  1.1  mrg       return 1;
    208  1.1  mrg     }
    209  1.1  mrg }
    210