gcdext_lehmer.c revision 1.1.1.1.8.1 1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor.
2 1.1 mrg
3 1.1.1.1.8.1 tls Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
4 1.1.1.1.8.1 tls Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20 1.1 mrg
21 1.1 mrg #include "gmp.h"
22 1.1 mrg #include "gmp-impl.h"
23 1.1 mrg #include "longlong.h"
24 1.1 mrg
25 1.1.1.1.8.1 tls /* Here, d is the index of the cofactor to update. FIXME: Could use qn
26 1.1.1.1.8.1 tls = 0 for the common case q = 1. */
27 1.1.1.1.8.1 tls void
28 1.1.1.1.8.1 tls mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
29 1.1.1.1.8.1 tls mp_srcptr qp, mp_size_t qn, int d)
30 1.1.1.1.8.1 tls {
31 1.1.1.1.8.1 tls struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
32 1.1.1.1.8.1 tls mp_size_t un = ctx->un;
33 1.1.1.1.8.1 tls
34 1.1.1.1.8.1 tls if (gp)
35 1.1.1.1.8.1 tls {
36 1.1.1.1.8.1 tls mp_srcptr up;
37 1.1.1.1.8.1 tls
38 1.1.1.1.8.1 tls ASSERT (gn > 0);
39 1.1.1.1.8.1 tls ASSERT (gp[gn-1] > 0);
40 1.1.1.1.8.1 tls
41 1.1.1.1.8.1 tls MPN_COPY (ctx->gp, gp, gn);
42 1.1.1.1.8.1 tls ctx->gn = gn;
43 1.1.1.1.8.1 tls
44 1.1.1.1.8.1 tls if (d < 0)
45 1.1.1.1.8.1 tls {
46 1.1.1.1.8.1 tls int c;
47 1.1.1.1.8.1 tls
48 1.1.1.1.8.1 tls /* Must return the smallest cofactor, +u1 or -u0 */
49 1.1.1.1.8.1 tls MPN_CMP (c, ctx->u0, ctx->u1, un);
50 1.1.1.1.8.1 tls ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
51 1.1.1.1.8.1 tls
52 1.1.1.1.8.1 tls d = c < 0;
53 1.1.1.1.8.1 tls }
54 1.1.1.1.8.1 tls
55 1.1.1.1.8.1 tls up = d ? ctx->u0 : ctx->u1;
56 1.1.1.1.8.1 tls
57 1.1.1.1.8.1 tls MPN_NORMALIZE (up, un);
58 1.1.1.1.8.1 tls MPN_COPY (ctx->up, up, un);
59 1.1.1.1.8.1 tls
60 1.1.1.1.8.1 tls *ctx->usize = d ? -un : un;
61 1.1.1.1.8.1 tls }
62 1.1.1.1.8.1 tls else
63 1.1.1.1.8.1 tls {
64 1.1.1.1.8.1 tls mp_limb_t cy;
65 1.1.1.1.8.1 tls mp_ptr u0 = ctx->u0;
66 1.1.1.1.8.1 tls mp_ptr u1 = ctx->u1;
67 1.1.1.1.8.1 tls
68 1.1.1.1.8.1 tls ASSERT (d >= 0);
69 1.1.1.1.8.1 tls
70 1.1.1.1.8.1 tls if (d)
71 1.1.1.1.8.1 tls MP_PTR_SWAP (u0, u1);
72 1.1.1.1.8.1 tls
73 1.1.1.1.8.1 tls qn -= (qp[qn-1] == 0);
74 1.1.1.1.8.1 tls
75 1.1.1.1.8.1 tls /* Update u0 += q * u1 */
76 1.1.1.1.8.1 tls if (qn == 1)
77 1.1.1.1.8.1 tls {
78 1.1.1.1.8.1 tls mp_limb_t q = qp[0];
79 1.1.1.1.8.1 tls
80 1.1.1.1.8.1 tls if (q == 1)
81 1.1.1.1.8.1 tls /* A common case. */
82 1.1.1.1.8.1 tls cy = mpn_add_n (u0, u0, u1, un);
83 1.1.1.1.8.1 tls else
84 1.1.1.1.8.1 tls cy = mpn_addmul_1 (u0, u1, un, q);
85 1.1.1.1.8.1 tls }
86 1.1.1.1.8.1 tls else
87 1.1.1.1.8.1 tls {
88 1.1.1.1.8.1 tls mp_size_t u1n;
89 1.1.1.1.8.1 tls mp_ptr tp;
90 1.1.1.1.8.1 tls
91 1.1.1.1.8.1 tls u1n = un;
92 1.1.1.1.8.1 tls MPN_NORMALIZE (u1, u1n);
93 1.1.1.1.8.1 tls
94 1.1.1.1.8.1 tls if (u1n == 0)
95 1.1.1.1.8.1 tls return;
96 1.1.1.1.8.1 tls
97 1.1.1.1.8.1 tls /* Should always have u1n == un here, and u1 >= u0. The
98 1.1.1.1.8.1 tls reason is that we alternate adding u0 to u1 and u1 to u0
99 1.1.1.1.8.1 tls (corresponding to subtractions a - b and b - a), and we
100 1.1.1.1.8.1 tls can get a large quotient only just after a switch, which
101 1.1.1.1.8.1 tls means that we'll add (a multiple of) the larger u to the
102 1.1.1.1.8.1 tls smaller. */
103 1.1.1.1.8.1 tls
104 1.1.1.1.8.1 tls tp = ctx->tp;
105 1.1.1.1.8.1 tls
106 1.1.1.1.8.1 tls if (qn > u1n)
107 1.1.1.1.8.1 tls mpn_mul (tp, qp, qn, u1, u1n);
108 1.1.1.1.8.1 tls else
109 1.1.1.1.8.1 tls mpn_mul (tp, u1, u1n, qp, qn);
110 1.1.1.1.8.1 tls
111 1.1.1.1.8.1 tls u1n += qn;
112 1.1.1.1.8.1 tls u1n -= tp[u1n-1] == 0;
113 1.1.1.1.8.1 tls
114 1.1.1.1.8.1 tls if (u1n >= un)
115 1.1.1.1.8.1 tls {
116 1.1.1.1.8.1 tls cy = mpn_add (u0, tp, u1n, u0, un);
117 1.1.1.1.8.1 tls un = u1n;
118 1.1.1.1.8.1 tls }
119 1.1.1.1.8.1 tls else
120 1.1.1.1.8.1 tls /* Note: Unlikely case, maybe never happens? */
121 1.1.1.1.8.1 tls cy = mpn_add (u0, u0, un, tp, u1n);
122 1.1.1.1.8.1 tls
123 1.1.1.1.8.1 tls }
124 1.1.1.1.8.1 tls u0[un] = cy;
125 1.1.1.1.8.1 tls ctx->un = un + (cy > 0);
126 1.1.1.1.8.1 tls }
127 1.1.1.1.8.1 tls }
128 1.1.1.1.8.1 tls
129 1.1.1.1.8.1 tls /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
130 1.1.1.1.8.1 tls the matrix-vector multiplication adjusting a, b. If hgcd fails, we
131 1.1.1.1.8.1 tls need at most n for the quotient and n+1 for the u update (reusing
132 1.1.1.1.8.1 tls the extra u). In all, 4n + 3. */
133 1.1 mrg
134 1.1 mrg mp_size_t
135 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
136 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n,
137 1.1 mrg mp_ptr tp)
138 1.1 mrg {
139 1.1 mrg mp_size_t ualloc = n + 1;
140 1.1 mrg
141 1.1 mrg /* Keeps track of the second row of the reduction matrix
142 1.1 mrg *
143 1.1 mrg * M = (v0, v1 ; u0, u1)
144 1.1 mrg *
145 1.1 mrg * which correspond to the first column of the inverse
146 1.1 mrg *
147 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0)
148 1.1.1.1.8.1 tls *
149 1.1.1.1.8.1 tls * This implies that
150 1.1.1.1.8.1 tls *
151 1.1.1.1.8.1 tls * a = u1 A (mod B)
152 1.1.1.1.8.1 tls * b = -u0 A (mod B)
153 1.1.1.1.8.1 tls *
154 1.1.1.1.8.1 tls * where A, B denotes the input values.
155 1.1 mrg */
156 1.1 mrg
157 1.1.1.1.8.1 tls struct gcdext_ctx ctx;
158 1.1 mrg mp_size_t un;
159 1.1 mrg mp_ptr u0;
160 1.1 mrg mp_ptr u1;
161 1.1 mrg mp_ptr u2;
162 1.1 mrg
163 1.1 mrg MPN_ZERO (tp, 3*ualloc);
164 1.1 mrg u0 = tp; tp += ualloc;
165 1.1 mrg u1 = tp; tp += ualloc;
166 1.1 mrg u2 = tp; tp += ualloc;
167 1.1 mrg
168 1.1 mrg u1[0] = 1; un = 1;
169 1.1 mrg
170 1.1.1.1.8.1 tls ctx.gp = gp;
171 1.1.1.1.8.1 tls ctx.up = up;
172 1.1.1.1.8.1 tls ctx.usize = usize;
173 1.1.1.1.8.1 tls
174 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */
175 1.1 mrg while (n >= 2)
176 1.1 mrg {
177 1.1 mrg struct hgcd_matrix1 M;
178 1.1 mrg mp_limb_t ah, al, bh, bl;
179 1.1 mrg mp_limb_t mask;
180 1.1 mrg
181 1.1 mrg mask = ap[n-1] | bp[n-1];
182 1.1 mrg ASSERT (mask > 0);
183 1.1 mrg
184 1.1 mrg if (mask & GMP_NUMB_HIGHBIT)
185 1.1 mrg {
186 1.1 mrg ah = ap[n-1]; al = ap[n-2];
187 1.1 mrg bh = bp[n-1]; bl = bp[n-2];
188 1.1 mrg }
189 1.1 mrg else if (n == 2)
190 1.1 mrg {
191 1.1 mrg /* We use the full inputs without truncation, so we can
192 1.1 mrg safely shift left. */
193 1.1 mrg int shift;
194 1.1 mrg
195 1.1 mrg count_leading_zeros (shift, mask);
196 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
197 1.1 mrg al = ap[0] << shift;
198 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
199 1.1 mrg bl = bp[0] << shift;
200 1.1 mrg }
201 1.1 mrg else
202 1.1 mrg {
203 1.1 mrg int shift;
204 1.1 mrg
205 1.1 mrg count_leading_zeros (shift, mask);
206 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
207 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
208 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
209 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
210 1.1 mrg }
211 1.1 mrg
212 1.1 mrg /* Try an mpn_nhgcd2 step */
213 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M))
214 1.1 mrg {
215 1.1.1.1.8.1 tls n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
216 1.1 mrg MP_PTR_SWAP (ap, tp);
217 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
218 1.1 mrg MP_PTR_SWAP (u0, u2);
219 1.1 mrg }
220 1.1 mrg else
221 1.1 mrg {
222 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very
223 1.1 mrg small, or the difference is very small. Perform one
224 1.1 mrg subtraction followed by one division. */
225 1.1.1.1.8.1 tls ctx.u0 = u0;
226 1.1.1.1.8.1 tls ctx.u1 = u1;
227 1.1.1.1.8.1 tls ctx.tp = u2;
228 1.1.1.1.8.1 tls ctx.un = un;
229 1.1 mrg
230 1.1 mrg /* Temporary storage n for the quotient and ualloc for the
231 1.1 mrg new cofactor. */
232 1.1.1.1.8.1 tls n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
233 1.1 mrg if (n == 0)
234 1.1.1.1.8.1 tls return ctx.gn;
235 1.1 mrg
236 1.1.1.1.8.1 tls un = ctx.un;
237 1.1 mrg }
238 1.1 mrg }
239 1.1 mrg ASSERT_ALWAYS (ap[0] > 0);
240 1.1 mrg ASSERT_ALWAYS (bp[0] > 0);
241 1.1 mrg
242 1.1 mrg if (ap[0] == bp[0])
243 1.1 mrg {
244 1.1 mrg int c;
245 1.1 mrg
246 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0,
247 1.1 mrg depending on which of a and b was most recently reduced,
248 1.1 mrg which we don't keep track of. So compare and get the smallest
249 1.1 mrg one. */
250 1.1 mrg
251 1.1 mrg gp[0] = ap[0];
252 1.1 mrg
253 1.1 mrg MPN_CMP (c, u0, u1, un);
254 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
255 1.1 mrg if (c < 0)
256 1.1 mrg {
257 1.1 mrg MPN_NORMALIZE (u0, un);
258 1.1 mrg MPN_COPY (up, u0, un);
259 1.1 mrg *usize = -un;
260 1.1 mrg }
261 1.1 mrg else
262 1.1 mrg {
263 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un);
264 1.1 mrg MPN_COPY (up, u1, un);
265 1.1 mrg *usize = un;
266 1.1 mrg }
267 1.1 mrg return 1;
268 1.1 mrg }
269 1.1 mrg else
270 1.1 mrg {
271 1.1 mrg mp_limb_t uh, vh;
272 1.1 mrg mp_limb_signed_t u;
273 1.1 mrg mp_limb_signed_t v;
274 1.1 mrg int negate;
275 1.1 mrg
276 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
277 1.1 mrg
278 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or
279 1.1 mrg two limbs. */
280 1.1 mrg
281 1.1 mrg if (u == 0)
282 1.1 mrg {
283 1.1 mrg ASSERT (v == 1);
284 1.1 mrg MPN_NORMALIZE (u0, un);
285 1.1 mrg MPN_COPY (up, u0, un);
286 1.1 mrg *usize = -un;
287 1.1 mrg return 1;
288 1.1 mrg }
289 1.1 mrg else if (v == 0)
290 1.1 mrg {
291 1.1 mrg ASSERT (u == 1);
292 1.1 mrg MPN_NORMALIZE (u1, un);
293 1.1 mrg MPN_COPY (up, u1, un);
294 1.1 mrg *usize = un;
295 1.1 mrg return 1;
296 1.1 mrg }
297 1.1 mrg else if (u > 0)
298 1.1 mrg {
299 1.1 mrg negate = 0;
300 1.1 mrg ASSERT (v < 0);
301 1.1 mrg v = -v;
302 1.1 mrg }
303 1.1 mrg else
304 1.1 mrg {
305 1.1 mrg negate = 1;
306 1.1 mrg ASSERT (v > 0);
307 1.1 mrg u = -u;
308 1.1 mrg }
309 1.1 mrg
310 1.1 mrg uh = mpn_mul_1 (up, u1, un, u);
311 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v);
312 1.1 mrg
313 1.1 mrg if ( (uh | vh) > 0)
314 1.1 mrg {
315 1.1 mrg uh += vh;
316 1.1 mrg up[un++] = uh;
317 1.1 mrg if (uh < vh)
318 1.1 mrg up[un++] = 1;
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un);
322 1.1 mrg
323 1.1 mrg *usize = negate ? -un : un;
324 1.1 mrg return 1;
325 1.1 mrg }
326 1.1 mrg }
327