gcdext_lehmer.c revision 1.1 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1 mrg Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4 1.1 mrg Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20 1.1 mrg
21 1.1 mrg #include "gmp.h"
22 1.1 mrg #include "gmp-impl.h"
23 1.1 mrg #include "longlong.h"
24 1.1 mrg
25 1.1 mrg /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
26 1.1 mrg multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
27 1.1 mrg needed for the division, with most n for the quotient, and n+1 for
28 1.1 mrg the product q u0. In all, 4n + 3. */
29 1.1 mrg
30 1.1 mrg mp_size_t
31 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
32 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
33 1.1 mrg mp_ptr tp)
34 1.1 mrg {
35 1.1 mrg mp_size_t ualloc = n + 1;
36 1.1 mrg
37 1.1 mrg /* Keeps track of the second row of the reduction matrix
38 1.1 mrg *
39 1.1 mrg * M = (v0, v1 ; u0, u1)
40 1.1 mrg *
41 1.1 mrg * which correspond to the first column of the inverse
42 1.1 mrg *
43 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
44 1.1 mrg */
45 1.1 mrg
46 1.1 mrg mp_size_t un;
47 1.1 mrg mp_ptr u0;
48 1.1 mrg mp_ptr u1;
49 1.1 mrg mp_ptr u2;
50 1.1 mrg
51 1.1 mrg MPN_ZERO (tp, 3*ualloc);
52 1.1 mrg u0 = tp; tp += ualloc;
53 1.1 mrg u1 = tp; tp += ualloc;
54 1.1 mrg u2 = tp; tp += ualloc;
55 1.1 mrg
56 1.1 mrg u1[0] = 1; un = 1;
57 1.1 mrg
58 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
59 1.1 mrg while (n >= 2)
60 1.1 mrg {
61 1.1 mrg struct hgcd_matrix1 M;
62 1.1 mrg mp_limb_t ah, al, bh, bl;
63 1.1 mrg mp_limb_t mask;
64 1.1 mrg
65 1.1 mrg mask = ap[n-1] | bp[n-1];
66 1.1 mrg ASSERT (mask > 0);
67 1.1 mrg
68 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
69 1.1 mrg {
70 1.1 mrg ah = ap[n-1]; al = ap[n-2];
71 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
72 1.1 mrg }
73 1.1 mrg else if (n == 2)
74 1.1 mrg {
75 1.1 mrg /* We use the full inputs without truncation, so we can
76 1.1 mrg safely shift left. */
77 1.1 mrg int shift;
78 1.1 mrg
79 1.1 mrg count_leading_zeros (shift, mask);
80 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
81 1.1 mrg al = ap[0] << shift;
82 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
83 1.1 mrg bl = bp[0] << shift;
84 1.1 mrg }
85 1.1 mrg else
86 1.1 mrg {
87 1.1 mrg int shift;
88 1.1 mrg
89 1.1 mrg count_leading_zeros (shift, mask);
90 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
91 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
92 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
93 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
94 1.1 mrg }
95 1.1 mrg
96 1.1 mrg /* Try an mpn_nhgcd2 step */
97 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
98 1.1 mrg {
99 1.1 mrg n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
100 1.1 mrg MP_PTR_SWAP (ap, tp);
101 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
102 1.1 mrg MP_PTR_SWAP (u0, u2);
103 1.1 mrg }
104 1.1 mrg else
105 1.1 mrg {
106 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
107 1.1 mrg small, or the difference is very small. Perform one
108 1.1 mrg subtraction followed by one division. */
109 1.1 mrg mp_size_t gn;
110 1.1 mrg mp_size_t updated_un = un;
111 1.1 mrg
112 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
113 1.1 mrg new cofactor. */
114 1.1 mrg n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
115 1.1 mrg u0, u1, &updated_un, tp, u2);
116 1.1 mrg if (n == 0)
117 1.1 mrg return gn;
118 1.1 mrg
119 1.1 mrg un = updated_un;
120 1.1 mrg }
121 1.1 mrg }
122 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
123 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
124 1.1 mrg
125 1.1 mrg if (ap[0] == bp[0])
126 1.1 mrg {
127 1.1 mrg int c;
128 1.1 mrg
129 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
130 1.1 mrg depending on which of a and b was most recently reduced,
131 1.1 mrg which we don't keep track of. So compare and get the smallest
132 1.1 mrg one. */
133 1.1 mrg
134 1.1 mrg gp[0] = ap[0];
135 1.1 mrg
136 1.1 mrg MPN_CMP (c, u0, u1, un);
137 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
138 1.1 mrg if (c < 0)
139 1.1 mrg {
140 1.1 mrg MPN_NORMALIZE (u0, un);
141 1.1 mrg MPN_COPY (up, u0, un);
142 1.1 mrg *usize = -un;
143 1.1 mrg }
144 1.1 mrg else
145 1.1 mrg {
146 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
147 1.1 mrg MPN_COPY (up, u1, un);
148 1.1 mrg *usize = un;
149 1.1 mrg }
150 1.1 mrg return 1;
151 1.1 mrg }
152 1.1 mrg else
153 1.1 mrg {
154 1.1 mrg mp_limb_t uh, vh;
155 1.1 mrg mp_limb_signed_t u;
156 1.1 mrg mp_limb_signed_t v;
157 1.1 mrg int negate;
158 1.1 mrg
159 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
160 1.1 mrg
161 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
162 1.1 mrg two limbs. */
163 1.1 mrg
164 1.1 mrg if (u == 0)
165 1.1 mrg {
166 1.1 mrg ASSERT (v == 1);
167 1.1 mrg MPN_NORMALIZE (u0, un);
168 1.1 mrg MPN_COPY (up, u0, un);
169 1.1 mrg *usize = -un;
170 1.1 mrg return 1;
171 1.1 mrg }
172 1.1 mrg else if (v == 0)
173 1.1 mrg {
174 1.1 mrg ASSERT (u == 1);
175 1.1 mrg MPN_NORMALIZE (u1, un);
176 1.1 mrg MPN_COPY (up, u1, un);
177 1.1 mrg *usize = un;
178 1.1 mrg return 1;
179 1.1 mrg }
180 1.1 mrg else if (u > 0)
181 1.1 mrg {
182 1.1 mrg negate = 0;
183 1.1 mrg ASSERT (v < 0);
184 1.1 mrg v = -v;
185 1.1 mrg }
186 1.1 mrg else
187 1.1 mrg {
188 1.1 mrg negate = 1;
189 1.1 mrg ASSERT (v > 0);
190 1.1 mrg u = -u;
191 1.1 mrg }
192 1.1 mrg
193 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
194 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
195 1.1 mrg
196 1.1 mrg if ( (uh | vh) > 0)
197 1.1 mrg {
198 1.1 mrg uh += vh;
199 1.1 mrg up[un++] = uh;
200 1.1 mrg if (uh < vh)
201 1.1 mrg up[un++] = 1;
202 1.1 mrg }
203 1.1 mrg
204 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
205 1.1 mrg
206 1.1 mrg *usize = negate ? -un : un;
207 1.1 mrg return 1;
208 1.1 mrg }
209 1.1 mrg }
210