gcdext_lehmer.c revision 1.1.1.1.2.1 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1.1.1.2.1 yamt Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
4 1.1.1.1.2.1 yamt Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20 1.1 mrg
21 1.1 mrg #include "gmp.h"
22 1.1 mrg #include "gmp-impl.h"
23 1.1 mrg #include "longlong.h"
24 1.1 mrg
25 1.1.1.1.2.1 yamt /* Here, d is the index of the cofactor to update. FIXME: Could use qn
26 1.1.1.1.2.1 yamt = 0 for the common case q = 1. */
27 1.1.1.1.2.1 yamt void
28 1.1.1.1.2.1 yamt mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
29 1.1.1.1.2.1 yamt mp_srcptr qp, mp_size_t qn, int d)
30 1.1.1.1.2.1 yamt {
31 1.1.1.1.2.1 yamt struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
32 1.1.1.1.2.1 yamt mp_size_t un = ctx->un;
33 1.1.1.1.2.1 yamt
34 1.1.1.1.2.1 yamt if (gp)
35 1.1.1.1.2.1 yamt {
36 1.1.1.1.2.1 yamt mp_srcptr up;
37 1.1.1.1.2.1 yamt
38 1.1.1.1.2.1 yamt ASSERT (gn > 0);
39 1.1.1.1.2.1 yamt ASSERT (gp[gn-1] > 0);
40 1.1.1.1.2.1 yamt
41 1.1.1.1.2.1 yamt MPN_COPY (ctx->gp, gp, gn);
42 1.1.1.1.2.1 yamt ctx->gn = gn;
43 1.1.1.1.2.1 yamt
44 1.1.1.1.2.1 yamt if (d < 0)
45 1.1.1.1.2.1 yamt {
46 1.1.1.1.2.1 yamt int c;
47 1.1.1.1.2.1 yamt
48 1.1.1.1.2.1 yamt /* Must return the smallest cofactor, +u1 or -u0 */
49 1.1.1.1.2.1 yamt MPN_CMP (c, ctx->u0, ctx->u1, un);
50 1.1.1.1.2.1 yamt ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
51 1.1.1.1.2.1 yamt
52 1.1.1.1.2.1 yamt d = c < 0;
53 1.1.1.1.2.1 yamt }
54 1.1.1.1.2.1 yamt
55 1.1.1.1.2.1 yamt up = d ? ctx->u0 : ctx->u1;
56 1.1.1.1.2.1 yamt
57 1.1.1.1.2.1 yamt MPN_NORMALIZE (up, un);
58 1.1.1.1.2.1 yamt MPN_COPY (ctx->up, up, un);
59 1.1.1.1.2.1 yamt
60 1.1.1.1.2.1 yamt *ctx->usize = d ? -un : un;
61 1.1.1.1.2.1 yamt }
62 1.1.1.1.2.1 yamt else
63 1.1.1.1.2.1 yamt {
64 1.1.1.1.2.1 yamt mp_limb_t cy;
65 1.1.1.1.2.1 yamt mp_ptr u0 = ctx->u0;
66 1.1.1.1.2.1 yamt mp_ptr u1 = ctx->u1;
67 1.1.1.1.2.1 yamt
68 1.1.1.1.2.1 yamt ASSERT (d >= 0);
69 1.1.1.1.2.1 yamt
70 1.1.1.1.2.1 yamt if (d)
71 1.1.1.1.2.1 yamt MP_PTR_SWAP (u0, u1);
72 1.1.1.1.2.1 yamt
73 1.1.1.1.2.1 yamt qn -= (qp[qn-1] == 0);
74 1.1.1.1.2.1 yamt
75 1.1.1.1.2.1 yamt /* Update u0 += q * u1 */
76 1.1.1.1.2.1 yamt if (qn == 1)
77 1.1.1.1.2.1 yamt {
78 1.1.1.1.2.1 yamt mp_limb_t q = qp[0];
79 1.1.1.1.2.1 yamt
80 1.1.1.1.2.1 yamt if (q == 1)
81 1.1.1.1.2.1 yamt /* A common case. */
82 1.1.1.1.2.1 yamt cy = mpn_add_n (u0, u0, u1, un);
83 1.1.1.1.2.1 yamt else
84 1.1.1.1.2.1 yamt cy = mpn_addmul_1 (u0, u1, un, q);
85 1.1.1.1.2.1 yamt }
86 1.1.1.1.2.1 yamt else
87 1.1.1.1.2.1 yamt {
88 1.1.1.1.2.1 yamt mp_size_t u1n;
89 1.1.1.1.2.1 yamt mp_ptr tp;
90 1.1.1.1.2.1 yamt
91 1.1.1.1.2.1 yamt u1n = un;
92 1.1.1.1.2.1 yamt MPN_NORMALIZE (u1, u1n);
93 1.1.1.1.2.1 yamt
94 1.1.1.1.2.1 yamt if (u1n == 0)
95 1.1.1.1.2.1 yamt return;
96 1.1.1.1.2.1 yamt
97 1.1.1.1.2.1 yamt /* Should always have u1n == un here, and u1 >= u0. The
98 1.1.1.1.2.1 yamt reason is that we alternate adding u0 to u1 and u1 to u0
99 1.1.1.1.2.1 yamt (corresponding to subtractions a - b and b - a), and we
100 1.1.1.1.2.1 yamt can get a large quotient only just after a switch, which
101 1.1.1.1.2.1 yamt means that we'll add (a multiple of) the larger u to the
102 1.1.1.1.2.1 yamt smaller. */
103 1.1.1.1.2.1 yamt
104 1.1.1.1.2.1 yamt tp = ctx->tp;
105 1.1.1.1.2.1 yamt
106 1.1.1.1.2.1 yamt if (qn > u1n)
107 1.1.1.1.2.1 yamt mpn_mul (tp, qp, qn, u1, u1n);
108 1.1.1.1.2.1 yamt else
109 1.1.1.1.2.1 yamt mpn_mul (tp, u1, u1n, qp, qn);
110 1.1.1.1.2.1 yamt
111 1.1.1.1.2.1 yamt u1n += qn;
112 1.1.1.1.2.1 yamt u1n -= tp[u1n-1] == 0;
113 1.1.1.1.2.1 yamt
114 1.1.1.1.2.1 yamt if (u1n >= un)
115 1.1.1.1.2.1 yamt {
116 1.1.1.1.2.1 yamt cy = mpn_add (u0, tp, u1n, u0, un);
117 1.1.1.1.2.1 yamt un = u1n;
118 1.1.1.1.2.1 yamt }
119 1.1.1.1.2.1 yamt else
120 1.1.1.1.2.1 yamt /* Note: Unlikely case, maybe never happens? */
121 1.1.1.1.2.1 yamt cy = mpn_add (u0, u0, un, tp, u1n);
122 1.1.1.1.2.1 yamt
123 1.1.1.1.2.1 yamt }
124 1.1.1.1.2.1 yamt u0[un] = cy;
125 1.1.1.1.2.1 yamt ctx->un = un + (cy > 0);
126 1.1.1.1.2.1 yamt }
127 1.1.1.1.2.1 yamt }
128 1.1.1.1.2.1 yamt
129 1.1.1.1.2.1 yamt /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
130 1.1.1.1.2.1 yamt the matrix-vector multiplication adjusting a, b. If hgcd fails, we
131 1.1.1.1.2.1 yamt need at most n for the quotient and n+1 for the u update (reusing
132 1.1.1.1.2.1 yamt the extra u). In all, 4n + 3. */
133 1.1 mrg
134 1.1 mrg mp_size_t
135 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
136 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
137 1.1 mrg mp_ptr tp)
138 1.1 mrg {
139 1.1 mrg mp_size_t ualloc = n + 1;
140 1.1 mrg
141 1.1 mrg /* Keeps track of the second row of the reduction matrix
142 1.1 mrg *
143 1.1 mrg * M = (v0, v1 ; u0, u1)
144 1.1 mrg *
145 1.1 mrg * which correspond to the first column of the inverse
146 1.1 mrg *
147 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
148 1.1.1.1.2.1 yamt *
149 1.1.1.1.2.1 yamt * This implies that
150 1.1.1.1.2.1 yamt *
151 1.1.1.1.2.1 yamt * a = u1 A (mod B)
152 1.1.1.1.2.1 yamt * b = -u0 A (mod B)
153 1.1.1.1.2.1 yamt *
154 1.1.1.1.2.1 yamt * where A, B denotes the input values.
155 1.1 mrg */
156 1.1 mrg
157 1.1.1.1.2.1 yamt struct gcdext_ctx ctx;
158 1.1 mrg mp_size_t un;
159 1.1 mrg mp_ptr u0;
160 1.1 mrg mp_ptr u1;
161 1.1 mrg mp_ptr u2;
162 1.1 mrg
163 1.1 mrg MPN_ZERO (tp, 3*ualloc);
164 1.1 mrg u0 = tp; tp += ualloc;
165 1.1 mrg u1 = tp; tp += ualloc;
166 1.1 mrg u2 = tp; tp += ualloc;
167 1.1 mrg
168 1.1 mrg u1[0] = 1; un = 1;
169 1.1 mrg
170 1.1.1.1.2.1 yamt ctx.gp = gp;
171 1.1.1.1.2.1 yamt ctx.up = up;
172 1.1.1.1.2.1 yamt ctx.usize = usize;
173 1.1.1.1.2.1 yamt
174 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
175 1.1 mrg while (n >= 2)
176 1.1 mrg {
177 1.1 mrg struct hgcd_matrix1 M;
178 1.1 mrg mp_limb_t ah, al, bh, bl;
179 1.1 mrg mp_limb_t mask;
180 1.1 mrg
181 1.1 mrg mask = ap[n-1] | bp[n-1];
182 1.1 mrg ASSERT (mask > 0);
183 1.1 mrg
184 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
185 1.1 mrg {
186 1.1 mrg ah = ap[n-1]; al = ap[n-2];
187 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
188 1.1 mrg }
189 1.1 mrg else if (n == 2)
190 1.1 mrg {
191 1.1 mrg /* We use the full inputs without truncation, so we can
192 1.1 mrg safely shift left. */
193 1.1 mrg int shift;
194 1.1 mrg
195 1.1 mrg count_leading_zeros (shift, mask);
196 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
197 1.1 mrg al = ap[0] << shift;
198 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
199 1.1 mrg bl = bp[0] << shift;
200 1.1 mrg }
201 1.1 mrg else
202 1.1 mrg {
203 1.1 mrg int shift;
204 1.1 mrg
205 1.1 mrg count_leading_zeros (shift, mask);
206 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
207 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
208 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
209 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
210 1.1 mrg }
211 1.1 mrg
212 1.1 mrg /* Try an mpn_nhgcd2 step */
213 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
214 1.1 mrg {
215 1.1.1.1.2.1 yamt n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
216 1.1 mrg MP_PTR_SWAP (ap, tp);
217 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
218 1.1 mrg MP_PTR_SWAP (u0, u2);
219 1.1 mrg }
220 1.1 mrg else
221 1.1 mrg {
222 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
223 1.1 mrg small, or the difference is very small. Perform one
224 1.1 mrg subtraction followed by one division. */
225 1.1.1.1.2.1 yamt ctx.u0 = u0;
226 1.1.1.1.2.1 yamt ctx.u1 = u1;
227 1.1.1.1.2.1 yamt ctx.tp = u2;
228 1.1.1.1.2.1 yamt ctx.un = un;
229 1.1 mrg
230 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
231 1.1 mrg new cofactor. */
232 1.1.1.1.2.1 yamt n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
233 1.1 mrg if (n == 0)
234 1.1.1.1.2.1 yamt return ctx.gn;
235 1.1 mrg
236 1.1.1.1.2.1 yamt un = ctx.un;
237 1.1 mrg }
238 1.1 mrg }
239 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
240 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
241 1.1 mrg
242 1.1 mrg if (ap[0] == bp[0])
243 1.1 mrg {
244 1.1 mrg int c;
245 1.1 mrg
246 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
247 1.1 mrg depending on which of a and b was most recently reduced,
248 1.1 mrg which we don't keep track of. So compare and get the smallest
249 1.1 mrg one. */
250 1.1 mrg
251 1.1 mrg gp[0] = ap[0];
252 1.1 mrg
253 1.1 mrg MPN_CMP (c, u0, u1, un);
254 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
255 1.1 mrg if (c < 0)
256 1.1 mrg {
257 1.1 mrg MPN_NORMALIZE (u0, un);
258 1.1 mrg MPN_COPY (up, u0, un);
259 1.1 mrg *usize = -un;
260 1.1 mrg }
261 1.1 mrg else
262 1.1 mrg {
263 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
264 1.1 mrg MPN_COPY (up, u1, un);
265 1.1 mrg *usize = un;
266 1.1 mrg }
267 1.1 mrg return 1;
268 1.1 mrg }
269 1.1 mrg else
270 1.1 mrg {
271 1.1 mrg mp_limb_t uh, vh;
272 1.1 mrg mp_limb_signed_t u;
273 1.1 mrg mp_limb_signed_t v;
274 1.1 mrg int negate;
275 1.1 mrg
276 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
277 1.1 mrg
278 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
279 1.1 mrg two limbs. */
280 1.1 mrg
281 1.1 mrg if (u == 0)
282 1.1 mrg {
283 1.1 mrg ASSERT (v == 1);
284 1.1 mrg MPN_NORMALIZE (u0, un);
285 1.1 mrg MPN_COPY (up, u0, un);
286 1.1 mrg *usize = -un;
287 1.1 mrg return 1;
288 1.1 mrg }
289 1.1 mrg else if (v == 0)
290 1.1 mrg {
291 1.1 mrg ASSERT (u == 1);
292 1.1 mrg MPN_NORMALIZE (u1, un);
293 1.1 mrg MPN_COPY (up, u1, un);
294 1.1 mrg *usize = un;
295 1.1 mrg return 1;
296 1.1 mrg }
297 1.1 mrg else if (u > 0)
298 1.1 mrg {
299 1.1 mrg negate = 0;
300 1.1 mrg ASSERT (v < 0);
301 1.1 mrg v = -v;
302 1.1 mrg }
303 1.1 mrg else
304 1.1 mrg {
305 1.1 mrg negate = 1;
306 1.1 mrg ASSERT (v > 0);
307 1.1 mrg u = -u;
308 1.1 mrg }
309 1.1 mrg
310 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
311 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
312 1.1 mrg
313 1.1 mrg if ( (uh | vh) > 0)
314 1.1 mrg {
315 1.1 mrg uh += vh;
316 1.1 mrg up[un++] = uh;
317 1.1 mrg if (uh < vh)
318 1.1 mrg up[un++] = 1;
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
322 1.1 mrg
323 1.1 mrg *usize = negate ? -un : un;
324 1.1 mrg return 1;
325 1.1 mrg }
326 1.1 mrg }
327