gcdext_lehmer.c revision 1.1.1.3 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1.1.3 mrg Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
4 1.1.1.3 mrg Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1.1.3 mrg it under the terms of either:
10 1.1.1.3 mrg
11 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
12 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
13 1.1.1.3 mrg option) any later version.
14 1.1.1.3 mrg
15 1.1.1.3 mrg or
16 1.1.1.3 mrg
17 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
18 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
19 1.1.1.3 mrg later version.
20 1.1.1.3 mrg
21 1.1.1.3 mrg or both in parallel, as here.
22 1.1 mrg
23 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
24 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 1.1.1.3 mrg for more details.
27 1.1 mrg
28 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
29 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
30 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
31 1.1 mrg
32 1.1 mrg #include "gmp.h"
33 1.1 mrg #include "gmp-impl.h"
34 1.1 mrg #include "longlong.h"
35 1.1 mrg
36 1.1.1.2 mrg /* Here, d is the index of the cofactor to update. FIXME: Could use qn
37 1.1.1.2 mrg = 0 for the common case q = 1. */
38 1.1.1.2 mrg void
39 1.1.1.2 mrg mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
40 1.1.1.2 mrg mp_srcptr qp, mp_size_t qn, int d)
41 1.1.1.2 mrg {
42 1.1.1.2 mrg struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
43 1.1.1.2 mrg mp_size_t un = ctx->un;
44 1.1.1.2 mrg
45 1.1.1.2 mrg if (gp)
46 1.1.1.2 mrg {
47 1.1.1.2 mrg mp_srcptr up;
48 1.1.1.2 mrg
49 1.1.1.2 mrg ASSERT (gn > 0);
50 1.1.1.2 mrg ASSERT (gp[gn-1] > 0);
51 1.1.1.2 mrg
52 1.1.1.2 mrg MPN_COPY (ctx->gp, gp, gn);
53 1.1.1.2 mrg ctx->gn = gn;
54 1.1.1.2 mrg
55 1.1.1.2 mrg if (d < 0)
56 1.1.1.2 mrg {
57 1.1.1.2 mrg int c;
58 1.1.1.2 mrg
59 1.1.1.2 mrg /* Must return the smallest cofactor, +u1 or -u0 */
60 1.1.1.2 mrg MPN_CMP (c, ctx->u0, ctx->u1, un);
61 1.1.1.2 mrg ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
62 1.1.1.2 mrg
63 1.1.1.2 mrg d = c < 0;
64 1.1.1.2 mrg }
65 1.1.1.2 mrg
66 1.1.1.2 mrg up = d ? ctx->u0 : ctx->u1;
67 1.1.1.2 mrg
68 1.1.1.2 mrg MPN_NORMALIZE (up, un);
69 1.1.1.2 mrg MPN_COPY (ctx->up, up, un);
70 1.1.1.2 mrg
71 1.1.1.2 mrg *ctx->usize = d ? -un : un;
72 1.1.1.2 mrg }
73 1.1.1.2 mrg else
74 1.1.1.2 mrg {
75 1.1.1.2 mrg mp_limb_t cy;
76 1.1.1.2 mrg mp_ptr u0 = ctx->u0;
77 1.1.1.2 mrg mp_ptr u1 = ctx->u1;
78 1.1.1.2 mrg
79 1.1.1.2 mrg ASSERT (d >= 0);
80 1.1.1.2 mrg
81 1.1.1.2 mrg if (d)
82 1.1.1.2 mrg MP_PTR_SWAP (u0, u1);
83 1.1.1.2 mrg
84 1.1.1.2 mrg qn -= (qp[qn-1] == 0);
85 1.1.1.2 mrg
86 1.1.1.2 mrg /* Update u0 += q * u1 */
87 1.1.1.2 mrg if (qn == 1)
88 1.1.1.2 mrg {
89 1.1.1.2 mrg mp_limb_t q = qp[0];
90 1.1.1.2 mrg
91 1.1.1.2 mrg if (q == 1)
92 1.1.1.2 mrg /* A common case. */
93 1.1.1.2 mrg cy = mpn_add_n (u0, u0, u1, un);
94 1.1.1.2 mrg else
95 1.1.1.2 mrg cy = mpn_addmul_1 (u0, u1, un, q);
96 1.1.1.2 mrg }
97 1.1.1.2 mrg else
98 1.1.1.2 mrg {
99 1.1.1.2 mrg mp_size_t u1n;
100 1.1.1.2 mrg mp_ptr tp;
101 1.1.1.2 mrg
102 1.1.1.2 mrg u1n = un;
103 1.1.1.2 mrg MPN_NORMALIZE (u1, u1n);
104 1.1.1.2 mrg
105 1.1.1.2 mrg if (u1n == 0)
106 1.1.1.2 mrg return;
107 1.1.1.2 mrg
108 1.1.1.2 mrg /* Should always have u1n == un here, and u1 >= u0. The
109 1.1.1.2 mrg reason is that we alternate adding u0 to u1 and u1 to u0
110 1.1.1.2 mrg (corresponding to subtractions a - b and b - a), and we
111 1.1.1.2 mrg can get a large quotient only just after a switch, which
112 1.1.1.2 mrg means that we'll add (a multiple of) the larger u to the
113 1.1.1.2 mrg smaller. */
114 1.1.1.2 mrg
115 1.1.1.2 mrg tp = ctx->tp;
116 1.1.1.2 mrg
117 1.1.1.2 mrg if (qn > u1n)
118 1.1.1.2 mrg mpn_mul (tp, qp, qn, u1, u1n);
119 1.1.1.2 mrg else
120 1.1.1.2 mrg mpn_mul (tp, u1, u1n, qp, qn);
121 1.1.1.2 mrg
122 1.1.1.2 mrg u1n += qn;
123 1.1.1.2 mrg u1n -= tp[u1n-1] == 0;
124 1.1.1.2 mrg
125 1.1.1.2 mrg if (u1n >= un)
126 1.1.1.2 mrg {
127 1.1.1.2 mrg cy = mpn_add (u0, tp, u1n, u0, un);
128 1.1.1.2 mrg un = u1n;
129 1.1.1.2 mrg }
130 1.1.1.2 mrg else
131 1.1.1.2 mrg /* Note: Unlikely case, maybe never happens? */
132 1.1.1.2 mrg cy = mpn_add (u0, u0, un, tp, u1n);
133 1.1.1.2 mrg
134 1.1.1.2 mrg }
135 1.1.1.2 mrg u0[un] = cy;
136 1.1.1.2 mrg ctx->un = un + (cy > 0);
137 1.1.1.2 mrg }
138 1.1.1.2 mrg }
139 1.1.1.2 mrg
140 1.1.1.2 mrg /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
141 1.1.1.2 mrg the matrix-vector multiplication adjusting a, b. If hgcd fails, we
142 1.1.1.2 mrg need at most n for the quotient and n+1 for the u update (reusing
143 1.1.1.2 mrg the extra u). In all, 4n + 3. */
144 1.1 mrg
145 1.1 mrg mp_size_t
146 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
147 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
148 1.1 mrg mp_ptr tp)
149 1.1 mrg {
150 1.1 mrg mp_size_t ualloc = n + 1;
151 1.1 mrg
152 1.1 mrg /* Keeps track of the second row of the reduction matrix
153 1.1 mrg *
154 1.1 mrg * M = (v0, v1 ; u0, u1)
155 1.1 mrg *
156 1.1 mrg * which correspond to the first column of the inverse
157 1.1 mrg *
158 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
159 1.1.1.2 mrg *
160 1.1.1.2 mrg * This implies that
161 1.1.1.2 mrg *
162 1.1.1.2 mrg * a = u1 A (mod B)
163 1.1.1.2 mrg * b = -u0 A (mod B)
164 1.1.1.2 mrg *
165 1.1.1.2 mrg * where A, B denotes the input values.
166 1.1 mrg */
167 1.1 mrg
168 1.1.1.2 mrg struct gcdext_ctx ctx;
169 1.1 mrg mp_size_t un;
170 1.1 mrg mp_ptr u0;
171 1.1 mrg mp_ptr u1;
172 1.1 mrg mp_ptr u2;
173 1.1 mrg
174 1.1 mrg MPN_ZERO (tp, 3*ualloc);
175 1.1 mrg u0 = tp; tp += ualloc;
176 1.1 mrg u1 = tp; tp += ualloc;
177 1.1 mrg u2 = tp; tp += ualloc;
178 1.1 mrg
179 1.1 mrg u1[0] = 1; un = 1;
180 1.1 mrg
181 1.1.1.2 mrg ctx.gp = gp;
182 1.1.1.2 mrg ctx.up = up;
183 1.1.1.2 mrg ctx.usize = usize;
184 1.1.1.2 mrg
185 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
186 1.1 mrg while (n >= 2)
187 1.1 mrg {
188 1.1 mrg struct hgcd_matrix1 M;
189 1.1 mrg mp_limb_t ah, al, bh, bl;
190 1.1 mrg mp_limb_t mask;
191 1.1 mrg
192 1.1 mrg mask = ap[n-1] | bp[n-1];
193 1.1 mrg ASSERT (mask > 0);
194 1.1 mrg
195 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
196 1.1 mrg {
197 1.1 mrg ah = ap[n-1]; al = ap[n-2];
198 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
199 1.1 mrg }
200 1.1 mrg else if (n == 2)
201 1.1 mrg {
202 1.1 mrg /* We use the full inputs without truncation, so we can
203 1.1 mrg safely shift left. */
204 1.1 mrg int shift;
205 1.1 mrg
206 1.1 mrg count_leading_zeros (shift, mask);
207 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
208 1.1 mrg al = ap[0] << shift;
209 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
210 1.1 mrg bl = bp[0] << shift;
211 1.1 mrg }
212 1.1 mrg else
213 1.1 mrg {
214 1.1 mrg int shift;
215 1.1 mrg
216 1.1 mrg count_leading_zeros (shift, mask);
217 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
218 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
219 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
220 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
221 1.1 mrg }
222 1.1 mrg
223 1.1 mrg /* Try an mpn_nhgcd2 step */
224 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
225 1.1 mrg {
226 1.1.1.2 mrg n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
227 1.1 mrg MP_PTR_SWAP (ap, tp);
228 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
229 1.1 mrg MP_PTR_SWAP (u0, u2);
230 1.1 mrg }
231 1.1 mrg else
232 1.1 mrg {
233 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
234 1.1 mrg small, or the difference is very small. Perform one
235 1.1 mrg subtraction followed by one division. */
236 1.1.1.2 mrg ctx.u0 = u0;
237 1.1.1.2 mrg ctx.u1 = u1;
238 1.1.1.2 mrg ctx.tp = u2;
239 1.1.1.2 mrg ctx.un = un;
240 1.1 mrg
241 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
242 1.1 mrg new cofactor. */
243 1.1.1.2 mrg n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
244 1.1 mrg if (n == 0)
245 1.1.1.2 mrg return ctx.gn;
246 1.1 mrg
247 1.1.1.2 mrg un = ctx.un;
248 1.1 mrg }
249 1.1 mrg }
250 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
251 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
252 1.1 mrg
253 1.1 mrg if (ap[0] == bp[0])
254 1.1 mrg {
255 1.1 mrg int c;
256 1.1 mrg
257 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
258 1.1 mrg depending on which of a and b was most recently reduced,
259 1.1 mrg which we don't keep track of. So compare and get the smallest
260 1.1 mrg one. */
261 1.1 mrg
262 1.1 mrg gp[0] = ap[0];
263 1.1 mrg
264 1.1 mrg MPN_CMP (c, u0, u1, un);
265 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
266 1.1 mrg if (c < 0)
267 1.1 mrg {
268 1.1 mrg MPN_NORMALIZE (u0, un);
269 1.1 mrg MPN_COPY (up, u0, un);
270 1.1 mrg *usize = -un;
271 1.1 mrg }
272 1.1 mrg else
273 1.1 mrg {
274 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
275 1.1 mrg MPN_COPY (up, u1, un);
276 1.1 mrg *usize = un;
277 1.1 mrg }
278 1.1 mrg return 1;
279 1.1 mrg }
280 1.1 mrg else
281 1.1 mrg {
282 1.1 mrg mp_limb_t uh, vh;
283 1.1 mrg mp_limb_signed_t u;
284 1.1 mrg mp_limb_signed_t v;
285 1.1 mrg int negate;
286 1.1 mrg
287 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
288 1.1 mrg
289 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
290 1.1 mrg two limbs. */
291 1.1 mrg
292 1.1 mrg if (u == 0)
293 1.1 mrg {
294 1.1 mrg ASSERT (v == 1);
295 1.1 mrg MPN_NORMALIZE (u0, un);
296 1.1 mrg MPN_COPY (up, u0, un);
297 1.1 mrg *usize = -un;
298 1.1 mrg return 1;
299 1.1 mrg }
300 1.1 mrg else if (v == 0)
301 1.1 mrg {
302 1.1 mrg ASSERT (u == 1);
303 1.1 mrg MPN_NORMALIZE (u1, un);
304 1.1 mrg MPN_COPY (up, u1, un);
305 1.1 mrg *usize = un;
306 1.1 mrg return 1;
307 1.1 mrg }
308 1.1 mrg else if (u > 0)
309 1.1 mrg {
310 1.1 mrg negate = 0;
311 1.1 mrg ASSERT (v < 0);
312 1.1 mrg v = -v;
313 1.1 mrg }
314 1.1 mrg else
315 1.1 mrg {
316 1.1 mrg negate = 1;
317 1.1 mrg ASSERT (v > 0);
318 1.1 mrg u = -u;
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
322 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
323 1.1 mrg
324 1.1 mrg if ( (uh | vh) > 0)
325 1.1 mrg {
326 1.1 mrg uh += vh;
327 1.1 mrg up[un++] = uh;
328 1.1 mrg if (uh < vh)
329 1.1 mrg up[un++] = 1;
330 1.1 mrg }
331 1.1 mrg
332 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
333 1.1 mrg
334 1.1 mrg *usize = negate ? -un : un;
335 1.1 mrg return 1;
336 1.1 mrg }
337 1.1 mrg }
338