gcdext_lehmer.c revision 1.1.1.4 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1.1.3 mrg Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
4 1.1.1.3 mrg Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1.1.3 mrg it under the terms of either:
10 1.1.1.3 mrg
11 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
12 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
13 1.1.1.3 mrg option) any later version.
14 1.1.1.3 mrg
15 1.1.1.3 mrg or
16 1.1.1.3 mrg
17 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
18 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
19 1.1.1.3 mrg later version.
20 1.1.1.3 mrg
21 1.1.1.3 mrg or both in parallel, as here.
22 1.1 mrg
23 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
24 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 1.1.1.3 mrg for more details.
27 1.1 mrg
28 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
29 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
30 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
31 1.1 mrg
32 1.1 mrg #include "gmp-impl.h"
33 1.1 mrg #include "longlong.h"
34 1.1 mrg
35 1.1.1.2 mrg /* Here, d is the index of the cofactor to update. FIXME: Could use qn
36 1.1.1.2 mrg = 0 for the common case q = 1. */
37 1.1.1.2 mrg void
38 1.1.1.2 mrg mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
39 1.1.1.2 mrg mp_srcptr qp, mp_size_t qn, int d)
40 1.1.1.2 mrg {
41 1.1.1.2 mrg struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
42 1.1.1.2 mrg mp_size_t un = ctx->un;
43 1.1.1.2 mrg
44 1.1.1.2 mrg if (gp)
45 1.1.1.2 mrg {
46 1.1.1.2 mrg mp_srcptr up;
47 1.1.1.2 mrg
48 1.1.1.2 mrg ASSERT (gn > 0);
49 1.1.1.2 mrg ASSERT (gp[gn-1] > 0);
50 1.1.1.2 mrg
51 1.1.1.2 mrg MPN_COPY (ctx->gp, gp, gn);
52 1.1.1.2 mrg ctx->gn = gn;
53 1.1.1.2 mrg
54 1.1.1.2 mrg if (d < 0)
55 1.1.1.2 mrg {
56 1.1.1.2 mrg int c;
57 1.1.1.2 mrg
58 1.1.1.2 mrg /* Must return the smallest cofactor, +u1 or -u0 */
59 1.1.1.2 mrg MPN_CMP (c, ctx->u0, ctx->u1, un);
60 1.1.1.2 mrg ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
61 1.1.1.2 mrg
62 1.1.1.2 mrg d = c < 0;
63 1.1.1.2 mrg }
64 1.1.1.2 mrg
65 1.1.1.2 mrg up = d ? ctx->u0 : ctx->u1;
66 1.1.1.2 mrg
67 1.1.1.2 mrg MPN_NORMALIZE (up, un);
68 1.1.1.2 mrg MPN_COPY (ctx->up, up, un);
69 1.1.1.2 mrg
70 1.1.1.2 mrg *ctx->usize = d ? -un : un;
71 1.1.1.2 mrg }
72 1.1.1.2 mrg else
73 1.1.1.2 mrg {
74 1.1.1.2 mrg mp_limb_t cy;
75 1.1.1.2 mrg mp_ptr u0 = ctx->u0;
76 1.1.1.2 mrg mp_ptr u1 = ctx->u1;
77 1.1.1.2 mrg
78 1.1.1.2 mrg ASSERT (d >= 0);
79 1.1.1.2 mrg
80 1.1.1.2 mrg if (d)
81 1.1.1.2 mrg MP_PTR_SWAP (u0, u1);
82 1.1.1.2 mrg
83 1.1.1.2 mrg qn -= (qp[qn-1] == 0);
84 1.1.1.2 mrg
85 1.1.1.2 mrg /* Update u0 += q * u1 */
86 1.1.1.2 mrg if (qn == 1)
87 1.1.1.2 mrg {
88 1.1.1.2 mrg mp_limb_t q = qp[0];
89 1.1.1.2 mrg
90 1.1.1.2 mrg if (q == 1)
91 1.1.1.2 mrg /* A common case. */
92 1.1.1.2 mrg cy = mpn_add_n (u0, u0, u1, un);
93 1.1.1.2 mrg else
94 1.1.1.2 mrg cy = mpn_addmul_1 (u0, u1, un, q);
95 1.1.1.2 mrg }
96 1.1.1.2 mrg else
97 1.1.1.2 mrg {
98 1.1.1.2 mrg mp_size_t u1n;
99 1.1.1.2 mrg mp_ptr tp;
100 1.1.1.2 mrg
101 1.1.1.2 mrg u1n = un;
102 1.1.1.2 mrg MPN_NORMALIZE (u1, u1n);
103 1.1.1.2 mrg
104 1.1.1.2 mrg if (u1n == 0)
105 1.1.1.2 mrg return;
106 1.1.1.2 mrg
107 1.1.1.2 mrg /* Should always have u1n == un here, and u1 >= u0. The
108 1.1.1.2 mrg reason is that we alternate adding u0 to u1 and u1 to u0
109 1.1.1.2 mrg (corresponding to subtractions a - b and b - a), and we
110 1.1.1.2 mrg can get a large quotient only just after a switch, which
111 1.1.1.2 mrg means that we'll add (a multiple of) the larger u to the
112 1.1.1.2 mrg smaller. */
113 1.1.1.2 mrg
114 1.1.1.2 mrg tp = ctx->tp;
115 1.1.1.2 mrg
116 1.1.1.2 mrg if (qn > u1n)
117 1.1.1.2 mrg mpn_mul (tp, qp, qn, u1, u1n);
118 1.1.1.2 mrg else
119 1.1.1.2 mrg mpn_mul (tp, u1, u1n, qp, qn);
120 1.1.1.2 mrg
121 1.1.1.2 mrg u1n += qn;
122 1.1.1.2 mrg u1n -= tp[u1n-1] == 0;
123 1.1.1.2 mrg
124 1.1.1.2 mrg if (u1n >= un)
125 1.1.1.2 mrg {
126 1.1.1.2 mrg cy = mpn_add (u0, tp, u1n, u0, un);
127 1.1.1.2 mrg un = u1n;
128 1.1.1.2 mrg }
129 1.1.1.2 mrg else
130 1.1.1.2 mrg /* Note: Unlikely case, maybe never happens? */
131 1.1.1.2 mrg cy = mpn_add (u0, u0, un, tp, u1n);
132 1.1.1.2 mrg
133 1.1.1.2 mrg }
134 1.1.1.2 mrg u0[un] = cy;
135 1.1.1.2 mrg ctx->un = un + (cy > 0);
136 1.1.1.2 mrg }
137 1.1.1.2 mrg }
138 1.1.1.2 mrg
139 1.1.1.2 mrg /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
140 1.1.1.2 mrg the matrix-vector multiplication adjusting a, b. If hgcd fails, we
141 1.1.1.2 mrg need at most n for the quotient and n+1 for the u update (reusing
142 1.1.1.2 mrg the extra u). In all, 4n + 3. */
143 1.1 mrg
144 1.1 mrg mp_size_t
145 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
146 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
147 1.1 mrg mp_ptr tp)
148 1.1 mrg {
149 1.1 mrg mp_size_t ualloc = n + 1;
150 1.1 mrg
151 1.1 mrg /* Keeps track of the second row of the reduction matrix
152 1.1 mrg *
153 1.1 mrg * M = (v0, v1 ; u0, u1)
154 1.1 mrg *
155 1.1 mrg * which correspond to the first column of the inverse
156 1.1 mrg *
157 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
158 1.1.1.2 mrg *
159 1.1.1.2 mrg * This implies that
160 1.1.1.2 mrg *
161 1.1.1.2 mrg * a = u1 A (mod B)
162 1.1.1.2 mrg * b = -u0 A (mod B)
163 1.1.1.2 mrg *
164 1.1.1.2 mrg * where A, B denotes the input values.
165 1.1 mrg */
166 1.1 mrg
167 1.1.1.2 mrg struct gcdext_ctx ctx;
168 1.1 mrg mp_size_t un;
169 1.1 mrg mp_ptr u0;
170 1.1 mrg mp_ptr u1;
171 1.1 mrg mp_ptr u2;
172 1.1 mrg
173 1.1 mrg MPN_ZERO (tp, 3*ualloc);
174 1.1 mrg u0 = tp; tp += ualloc;
175 1.1 mrg u1 = tp; tp += ualloc;
176 1.1 mrg u2 = tp; tp += ualloc;
177 1.1 mrg
178 1.1 mrg u1[0] = 1; un = 1;
179 1.1 mrg
180 1.1.1.2 mrg ctx.gp = gp;
181 1.1.1.2 mrg ctx.up = up;
182 1.1.1.2 mrg ctx.usize = usize;
183 1.1.1.2 mrg
184 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
185 1.1 mrg while (n >= 2)
186 1.1 mrg {
187 1.1 mrg struct hgcd_matrix1 M;
188 1.1 mrg mp_limb_t ah, al, bh, bl;
189 1.1 mrg mp_limb_t mask;
190 1.1 mrg
191 1.1 mrg mask = ap[n-1] | bp[n-1];
192 1.1 mrg ASSERT (mask > 0);
193 1.1 mrg
194 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
195 1.1 mrg {
196 1.1 mrg ah = ap[n-1]; al = ap[n-2];
197 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
198 1.1 mrg }
199 1.1 mrg else if (n == 2)
200 1.1 mrg {
201 1.1 mrg /* We use the full inputs without truncation, so we can
202 1.1 mrg safely shift left. */
203 1.1 mrg int shift;
204 1.1 mrg
205 1.1 mrg count_leading_zeros (shift, mask);
206 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
207 1.1 mrg al = ap[0] << shift;
208 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
209 1.1 mrg bl = bp[0] << shift;
210 1.1 mrg }
211 1.1 mrg else
212 1.1 mrg {
213 1.1 mrg int shift;
214 1.1 mrg
215 1.1 mrg count_leading_zeros (shift, mask);
216 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
217 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
218 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
219 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
220 1.1 mrg }
221 1.1 mrg
222 1.1 mrg /* Try an mpn_nhgcd2 step */
223 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
224 1.1 mrg {
225 1.1.1.2 mrg n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
226 1.1 mrg MP_PTR_SWAP (ap, tp);
227 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
228 1.1 mrg MP_PTR_SWAP (u0, u2);
229 1.1 mrg }
230 1.1 mrg else
231 1.1 mrg {
232 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
233 1.1 mrg small, or the difference is very small. Perform one
234 1.1 mrg subtraction followed by one division. */
235 1.1.1.2 mrg ctx.u0 = u0;
236 1.1.1.2 mrg ctx.u1 = u1;
237 1.1.1.2 mrg ctx.tp = u2;
238 1.1.1.2 mrg ctx.un = un;
239 1.1 mrg
240 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
241 1.1 mrg new cofactor. */
242 1.1.1.2 mrg n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
243 1.1 mrg if (n == 0)
244 1.1.1.2 mrg return ctx.gn;
245 1.1 mrg
246 1.1.1.2 mrg un = ctx.un;
247 1.1 mrg }
248 1.1 mrg }
249 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
250 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
251 1.1 mrg
252 1.1 mrg if (ap[0] == bp[0])
253 1.1 mrg {
254 1.1 mrg int c;
255 1.1 mrg
256 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
257 1.1 mrg depending on which of a and b was most recently reduced,
258 1.1 mrg which we don't keep track of. So compare and get the smallest
259 1.1 mrg one. */
260 1.1 mrg
261 1.1 mrg gp[0] = ap[0];
262 1.1 mrg
263 1.1 mrg MPN_CMP (c, u0, u1, un);
264 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
265 1.1 mrg if (c < 0)
266 1.1 mrg {
267 1.1 mrg MPN_NORMALIZE (u0, un);
268 1.1 mrg MPN_COPY (up, u0, un);
269 1.1 mrg *usize = -un;
270 1.1 mrg }
271 1.1 mrg else
272 1.1 mrg {
273 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
274 1.1 mrg MPN_COPY (up, u1, un);
275 1.1 mrg *usize = un;
276 1.1 mrg }
277 1.1 mrg return 1;
278 1.1 mrg }
279 1.1 mrg else
280 1.1 mrg {
281 1.1 mrg mp_limb_t uh, vh;
282 1.1 mrg mp_limb_signed_t u;
283 1.1 mrg mp_limb_signed_t v;
284 1.1 mrg int negate;
285 1.1 mrg
286 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
287 1.1 mrg
288 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
289 1.1 mrg two limbs. */
290 1.1 mrg
291 1.1 mrg if (u == 0)
292 1.1 mrg {
293 1.1 mrg ASSERT (v == 1);
294 1.1 mrg MPN_NORMALIZE (u0, un);
295 1.1 mrg MPN_COPY (up, u0, un);
296 1.1 mrg *usize = -un;
297 1.1 mrg return 1;
298 1.1 mrg }
299 1.1 mrg else if (v == 0)
300 1.1 mrg {
301 1.1 mrg ASSERT (u == 1);
302 1.1 mrg MPN_NORMALIZE (u1, un);
303 1.1 mrg MPN_COPY (up, u1, un);
304 1.1 mrg *usize = un;
305 1.1 mrg return 1;
306 1.1 mrg }
307 1.1 mrg else if (u > 0)
308 1.1 mrg {
309 1.1 mrg negate = 0;
310 1.1 mrg ASSERT (v < 0);
311 1.1 mrg v = -v;
312 1.1 mrg }
313 1.1 mrg else
314 1.1 mrg {
315 1.1 mrg negate = 1;
316 1.1 mrg ASSERT (v > 0);
317 1.1 mrg u = -u;
318 1.1 mrg }
319 1.1 mrg
320 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
321 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
322 1.1 mrg
323 1.1 mrg if ( (uh | vh) > 0)
324 1.1 mrg {
325 1.1 mrg uh += vh;
326 1.1 mrg up[un++] = uh;
327 1.1 mrg if (uh < vh)
328 1.1 mrg up[un++] = 1;
329 1.1 mrg }
330 1.1 mrg
331 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
332 1.1 mrg
333 1.1 mrg *usize = negate ? -un : un;
334 1.1 mrg return 1;
335 1.1 mrg }
336 1.1 mrg }
337