gcdext_lehmer.c revision 1.1.1.2 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1.1.2 mrg Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
4 1.1.1.2 mrg Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20 1.1 mrg
21 1.1 mrg #include "gmp.h"
22 1.1 mrg #include "gmp-impl.h"
23 1.1 mrg #include "longlong.h"
24 1.1 mrg
25 1.1.1.2 mrg /* Here, d is the index of the cofactor to update. FIXME: Could use qn
26 1.1.1.2 mrg = 0 for the common case q = 1. */
27 1.1.1.2 mrg void
28 1.1.1.2 mrg mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
29 1.1.1.2 mrg mp_srcptr qp, mp_size_t qn, int d)
30 1.1.1.2 mrg {
31 1.1.1.2 mrg struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
32 1.1.1.2 mrg mp_size_t un = ctx->un;
33 1.1.1.2 mrg
34 1.1.1.2 mrg if (gp)
35 1.1.1.2 mrg {
36 1.1.1.2 mrg mp_srcptr up;
37 1.1.1.2 mrg
38 1.1.1.2 mrg ASSERT (gn > 0);
39 1.1.1.2 mrg ASSERT (gp[gn-1] > 0);
40 1.1.1.2 mrg
41 1.1.1.2 mrg MPN_COPY (ctx->gp, gp, gn);
42 1.1.1.2 mrg ctx->gn = gn;
43 1.1.1.2 mrg
44 1.1.1.2 mrg if (d < 0)
45 1.1.1.2 mrg {
46 1.1.1.2 mrg int c;
47 1.1.1.2 mrg
48 1.1.1.2 mrg /* Must return the smallest cofactor, +u1 or -u0 */
49 1.1.1.2 mrg MPN_CMP (c, ctx->u0, ctx->u1, un);
50 1.1.1.2 mrg ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
51 1.1.1.2 mrg
52 1.1.1.2 mrg d = c < 0;
53 1.1.1.2 mrg }
54 1.1.1.2 mrg
55 1.1.1.2 mrg up = d ? ctx->u0 : ctx->u1;
56 1.1.1.2 mrg
57 1.1.1.2 mrg MPN_NORMALIZE (up, un);
58 1.1.1.2 mrg MPN_COPY (ctx->up, up, un);
59 1.1.1.2 mrg
60 1.1.1.2 mrg *ctx->usize = d ? -un : un;
61 1.1.1.2 mrg }
62 1.1.1.2 mrg else
63 1.1.1.2 mrg {
64 1.1.1.2 mrg mp_limb_t cy;
65 1.1.1.2 mrg mp_ptr u0 = ctx->u0;
66 1.1.1.2 mrg mp_ptr u1 = ctx->u1;
67 1.1.1.2 mrg
68 1.1.1.2 mrg ASSERT (d >= 0);
69 1.1.1.2 mrg
70 1.1.1.2 mrg if (d)
71 1.1.1.2 mrg MP_PTR_SWAP (u0, u1);
72 1.1.1.2 mrg
73 1.1.1.2 mrg qn -= (qp[qn-1] == 0);
74 1.1.1.2 mrg
75 1.1.1.2 mrg /* Update u0 += q * u1 */
76 1.1.1.2 mrg if (qn == 1)
77 1.1.1.2 mrg {
78 1.1.1.2 mrg mp_limb_t q = qp[0];
79 1.1.1.2 mrg
80 1.1.1.2 mrg if (q == 1)
81 1.1.1.2 mrg /* A common case. */
82 1.1.1.2 mrg cy = mpn_add_n (u0, u0, u1, un);
83 1.1.1.2 mrg else
84 1.1.1.2 mrg cy = mpn_addmul_1 (u0, u1, un, q);
85 1.1.1.2 mrg }
86 1.1.1.2 mrg else
87 1.1.1.2 mrg {
88 1.1.1.2 mrg mp_size_t u1n;
89 1.1.1.2 mrg mp_ptr tp;
90 1.1.1.2 mrg
91 1.1.1.2 mrg u1n = un;
92 1.1.1.2 mrg MPN_NORMALIZE (u1, u1n);
93 1.1.1.2 mrg
94 1.1.1.2 mrg if (u1n == 0)
95 1.1.1.2 mrg return;
96 1.1.1.2 mrg
97 1.1.1.2 mrg /* Should always have u1n == un here, and u1 >= u0. The
98 1.1.1.2 mrg reason is that we alternate adding u0 to u1 and u1 to u0
99 1.1.1.2 mrg (corresponding to subtractions a - b and b - a), and we
100 1.1.1.2 mrg can get a large quotient only just after a switch, which
101 1.1.1.2 mrg means that we'll add (a multiple of) the larger u to the
102 1.1.1.2 mrg smaller. */
103 1.1.1.2 mrg
104 1.1.1.2 mrg tp = ctx->tp;
105 1.1.1.2 mrg
106 1.1.1.2 mrg if (qn > u1n)
107 1.1.1.2 mrg mpn_mul (tp, qp, qn, u1, u1n);
108 1.1.1.2 mrg else
109 1.1.1.2 mrg mpn_mul (tp, u1, u1n, qp, qn);
110 1.1.1.2 mrg
111 1.1.1.2 mrg u1n += qn;
112 1.1.1.2 mrg u1n -= tp[u1n-1] == 0;
113 1.1.1.2 mrg
114 1.1.1.2 mrg if (u1n >= un)
115 1.1.1.2 mrg {
116 1.1.1.2 mrg cy = mpn_add (u0, tp, u1n, u0, un);
117 1.1.1.2 mrg un = u1n;
118 1.1.1.2 mrg }
119 1.1.1.2 mrg else
120 1.1.1.2 mrg /* Note: Unlikely case, maybe never happens? */
121 1.1.1.2 mrg cy = mpn_add (u0, u0, un, tp, u1n);
122 1.1.1.2 mrg
123 1.1.1.2 mrg }
124 1.1.1.2 mrg u0[un] = cy;
125 1.1.1.2 mrg ctx->un = un + (cy > 0);
126 1.1.1.2 mrg }
127 1.1.1.2 mrg }
128 1.1.1.2 mrg
129 1.1.1.2 mrg /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
130 1.1.1.2 mrg the matrix-vector multiplication adjusting a, b. If hgcd fails, we
131 1.1.1.2 mrg need at most n for the quotient and n+1 for the u update (reusing
132 1.1.1.2 mrg the extra u). In all, 4n + 3. */
133 1.1 mrg
134 1.1 mrg mp_size_t
135 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
136 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
137 1.1 mrg mp_ptr tp)
138 1.1 mrg {
139 1.1 mrg mp_size_t ualloc = n + 1;
140 1.1 mrg
141 1.1 mrg /* Keeps track of the second row of the reduction matrix
142 1.1 mrg *
143 1.1 mrg * M = (v0, v1 ; u0, u1)
144 1.1 mrg *
145 1.1 mrg * which correspond to the first column of the inverse
146 1.1 mrg *
147 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
148 1.1.1.2 mrg *
149 1.1.1.2 mrg * This implies that
150 1.1.1.2 mrg *
151 1.1.1.2 mrg * a = u1 A (mod B)
152 1.1.1.2 mrg * b = -u0 A (mod B)
153 1.1.1.2 mrg *
154 1.1.1.2 mrg * where A, B denotes the input values.
155 1.1 mrg */
156 1.1 mrg
157 1.1.1.2 mrg struct gcdext_ctx ctx;
158 1.1 mrg mp_size_t un;
159 1.1 mrg mp_ptr u0;
160 1.1 mrg mp_ptr u1;
161 1.1 mrg mp_ptr u2;
162 1.1 mrg
163 1.1 mrg MPN_ZERO (tp, 3*ualloc);
164 1.1 mrg u0 = tp; tp += ualloc;
165 1.1 mrg u1 = tp; tp += ualloc;
166 1.1 mrg u2 = tp; tp += ualloc;
167 1.1 mrg
168 1.1 mrg u1[0] = 1; un = 1;
169 1.1 mrg
170 1.1.1.2 mrg ctx.gp = gp;
171 1.1.1.2 mrg ctx.up = up;
172 1.1.1.2 mrg ctx.usize = usize;
173 1.1.1.2 mrg
174 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
175 1.1 mrg while (n >= 2)
176 1.1 mrg {
177 1.1 mrg struct hgcd_matrix1 M;
178 1.1 mrg mp_limb_t ah, al, bh, bl;
179 1.1 mrg mp_limb_t mask;
180 1.1 mrg
181 1.1 mrg mask = ap[n-1] | bp[n-1];
182 1.1 mrg ASSERT (mask > 0);
183 1.1 mrg
184 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
185 1.1 mrg {
186 1.1 mrg ah = ap[n-1]; al = ap[n-2];
187 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
188 1.1 mrg }
189 1.1 mrg else if (n == 2)
190 1.1 mrg {
191 1.1 mrg /* We use the full inputs without truncation, so we can
192 1.1 mrg safely shift left. */
193 1.1 mrg int shift;
194 1.1 mrg
195 1.1 mrg count_leading_zeros (shift, mask);
196 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
197 1.1 mrg al = ap[0] << shift;
198 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
199 1.1 mrg bl = bp[0] << shift;
200 1.1 mrg }
201 1.1 mrg else
202 1.1 mrg {
203 1.1 mrg int shift;
204 1.1 mrg
205 1.1 mrg count_leading_zeros (shift, mask);
206 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
207 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
208 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
209 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
210 1.1 mrg }
211 1.1 mrg
212 1.1 mrg /* Try an mpn_nhgcd2 step */
213 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
214 1.1 mrg {
215 1.1.1.2 mrg n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
216 1.1 mrg MP_PTR_SWAP (ap, tp);
217 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
218 1.1 mrg MP_PTR_SWAP (u0, u2);
219 1.1 mrg }
220 1.1 mrg else
221 1.1 mrg {
222 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
223 1.1 mrg small, or the difference is very small. Perform one
224 1.1 mrg subtraction followed by one division. */
225 1.1.1.2 mrg ctx.u0 = u0;
226 1.1.1.2 mrg ctx.u1 = u1;
227 1.1.1.2 mrg ctx.tp = u2;
228 1.1.1.2 mrg ctx.un = un;
229 1.1 mrg
230 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
231 1.1 mrg new cofactor. */
232 1.1.1.2 mrg n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
233 1.1 mrg if (n == 0)
234 1.1.1.2 mrg return ctx.gn;
235 1.1 mrg
236 1.1.1.2 mrg un = ctx.un;
237 1.1 mrg }
238 1.1 mrg }
239 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
240 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
241 1.1 mrg
242 1.1 mrg if (ap[0] == bp[0])
243 1.1 mrg {
244 1.1 mrg int c;
245 1.1 mrg
246 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
247 1.1 mrg depending on which of a and b was most recently reduced,
248 1.1 mrg which we don't keep track of. So compare and get the smallest
249 1.1 mrg one. */
250 1.1 mrg
251 1.1 mrg gp[0] = ap[0];
252 1.1 mrg
253 1.1 mrg MPN_CMP (c, u0, u1, un);
254 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
255 1.1 mrg if (c < 0)
256 1.1 mrg {
257 1.1 mrg MPN_NORMALIZE (u0, un);
258 1.1 mrg MPN_COPY (up, u0, un);
259 1.1 mrg *usize = -un;
260 1.1 mrg }
261 1.1 mrg else
262 1.1 mrg {
263 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
264 1.1 mrg MPN_COPY (up, u1, un);
265 1.1 mrg *usize = un;
266 1.1 mrg }
267 1.1 mrg return 1;
268 1.1 mrg }
269 1.1 mrg else
270 1.1 mrg {
271 1.1 mrg mp_limb_t uh, vh;
272 1.1 mrg mp_limb_signed_t u;
273 1.1 mrg mp_limb_signed_t v;
274 1.1 mrg int negate;
275 1.1 mrg
276 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
277 1.1 mrg
278 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
279 1.1 mrg two limbs. */
280 1.1 mrg
281 1.1 mrg if (u == 0)
282 1.1 mrg {
283 1.1 mrg ASSERT (v == 1);
284 1.1 mrg MPN_NORMALIZE (u0, un);
285 1.1 mrg MPN_COPY (up, u0, un);
286 1.1 mrg *usize = -un;
287 1.1 mrg return 1;
288 1.1 mrg }
289 1.1 mrg else if (v == 0)
290 1.1 mrg {
291 1.1 mrg ASSERT (u == 1);
292 1.1 mrg MPN_NORMALIZE (u1, un);
293 1.1 mrg MPN_COPY (up, u1, un);
294 1.1 mrg *usize = un;
295 1.1 mrg return 1;
296 1.1 mrg }
297 1.1 mrg else if (u > 0)
298 1.1 mrg {
299 1.1 mrg negate = 0;
300 1.1 mrg ASSERT (v < 0);
301 1.1 mrg v = -v;
302 1.1 mrg }
303 1.1 mrg else
304 1.1 mrg {
305 1.1 mrg negate = 1;
306 1.1 mrg ASSERT (v > 0);
307 1.1 mrg u = -u;
308 1.1 mrg }
309 1.1 mrg
310 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
311 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
312 1.1 mrg
313 1.1 mrg if ( (uh | vh) > 0)
314 1.1 mrg {
315 1.1 mrg uh += vh;
316 1.1 mrg up[un++] = uh;
317 1.1 mrg if (uh < vh)
318 1.1 mrg up[un++] = 1;
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
322 1.1 mrg
323 1.1 mrg *usize = negate ? -un : un;
324 1.1 mrg return 1;
325 1.1 mrg }
326 1.1 mrg }
327