hgcd2_jacobi.c revision 1.1.1.3 1 1.1 mrg /* hgcd2_jacobi.c
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 1.1 mrg
7 1.1.1.2 mrg Copyright 1996, 1998, 2000-2004, 2008, 2011 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1.1.2 mrg it under the terms of either:
13 1.1.1.2 mrg
14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
16 1.1.1.2 mrg option) any later version.
17 1.1.1.2 mrg
18 1.1.1.2 mrg or
19 1.1.1.2 mrg
20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
22 1.1.1.2 mrg later version.
23 1.1.1.2 mrg
24 1.1.1.2 mrg or both in parallel, as here.
25 1.1 mrg
26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 1.1.1.2 mrg for more details.
30 1.1 mrg
31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
34 1.1 mrg
35 1.1 mrg #include "gmp-impl.h"
36 1.1 mrg #include "longlong.h"
37 1.1 mrg
38 1.1 mrg #if GMP_NAIL_BITS > 0
39 1.1 mrg #error Nails not supported.
40 1.1 mrg #endif
41 1.1 mrg
42 1.1 mrg /* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
43 1.1 mrg possibly be renamed. */
44 1.1 mrg static inline mp_limb_t
45 1.1 mrg div1 (mp_ptr rp,
46 1.1 mrg mp_limb_t n0,
47 1.1 mrg mp_limb_t d0)
48 1.1 mrg {
49 1.1 mrg mp_limb_t q = 0;
50 1.1 mrg
51 1.1 mrg if ((mp_limb_signed_t) n0 < 0)
52 1.1 mrg {
53 1.1 mrg int cnt;
54 1.1 mrg for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
55 1.1 mrg {
56 1.1 mrg d0 = d0 << 1;
57 1.1 mrg }
58 1.1 mrg
59 1.1 mrg q = 0;
60 1.1 mrg while (cnt)
61 1.1 mrg {
62 1.1 mrg q <<= 1;
63 1.1 mrg if (n0 >= d0)
64 1.1 mrg {
65 1.1 mrg n0 = n0 - d0;
66 1.1 mrg q |= 1;
67 1.1 mrg }
68 1.1 mrg d0 = d0 >> 1;
69 1.1 mrg cnt--;
70 1.1 mrg }
71 1.1 mrg }
72 1.1 mrg else
73 1.1 mrg {
74 1.1 mrg int cnt;
75 1.1 mrg for (cnt = 0; n0 >= d0; cnt++)
76 1.1 mrg {
77 1.1 mrg d0 = d0 << 1;
78 1.1 mrg }
79 1.1 mrg
80 1.1 mrg q = 0;
81 1.1 mrg while (cnt)
82 1.1 mrg {
83 1.1 mrg d0 = d0 >> 1;
84 1.1 mrg q <<= 1;
85 1.1 mrg if (n0 >= d0)
86 1.1 mrg {
87 1.1 mrg n0 = n0 - d0;
88 1.1 mrg q |= 1;
89 1.1 mrg }
90 1.1 mrg cnt--;
91 1.1 mrg }
92 1.1 mrg }
93 1.1 mrg *rp = n0;
94 1.1 mrg return q;
95 1.1 mrg }
96 1.1 mrg
97 1.1 mrg /* Two-limb division optimized for small quotients. */
98 1.1 mrg static inline mp_limb_t
99 1.1 mrg div2 (mp_ptr rp,
100 1.1 mrg mp_limb_t nh, mp_limb_t nl,
101 1.1 mrg mp_limb_t dh, mp_limb_t dl)
102 1.1 mrg {
103 1.1 mrg mp_limb_t q = 0;
104 1.1 mrg
105 1.1 mrg if ((mp_limb_signed_t) nh < 0)
106 1.1 mrg {
107 1.1 mrg int cnt;
108 1.1 mrg for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
109 1.1 mrg {
110 1.1 mrg dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
111 1.1 mrg dl = dl << 1;
112 1.1 mrg }
113 1.1 mrg
114 1.1 mrg while (cnt)
115 1.1 mrg {
116 1.1 mrg q <<= 1;
117 1.1 mrg if (nh > dh || (nh == dh && nl >= dl))
118 1.1 mrg {
119 1.1 mrg sub_ddmmss (nh, nl, nh, nl, dh, dl);
120 1.1 mrg q |= 1;
121 1.1 mrg }
122 1.1 mrg dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
123 1.1 mrg dh = dh >> 1;
124 1.1 mrg cnt--;
125 1.1 mrg }
126 1.1 mrg }
127 1.1 mrg else
128 1.1 mrg {
129 1.1 mrg int cnt;
130 1.1 mrg for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
131 1.1 mrg {
132 1.1 mrg dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
133 1.1 mrg dl = dl << 1;
134 1.1 mrg }
135 1.1 mrg
136 1.1 mrg while (cnt)
137 1.1 mrg {
138 1.1 mrg dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
139 1.1 mrg dh = dh >> 1;
140 1.1 mrg q <<= 1;
141 1.1 mrg if (nh > dh || (nh == dh && nl >= dl))
142 1.1 mrg {
143 1.1 mrg sub_ddmmss (nh, nl, nh, nl, dh, dl);
144 1.1 mrg q |= 1;
145 1.1 mrg }
146 1.1 mrg cnt--;
147 1.1 mrg }
148 1.1 mrg }
149 1.1 mrg
150 1.1 mrg rp[0] = nl;
151 1.1 mrg rp[1] = nh;
152 1.1 mrg
153 1.1 mrg return q;
154 1.1 mrg }
155 1.1 mrg
156 1.1 mrg int
157 1.1 mrg mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
158 1.1 mrg struct hgcd_matrix1 *M, unsigned *bitsp)
159 1.1 mrg {
160 1.1 mrg mp_limb_t u00, u01, u10, u11;
161 1.1 mrg unsigned bits = *bitsp;
162 1.1 mrg
163 1.1 mrg if (ah < 2 || bh < 2)
164 1.1 mrg return 0;
165 1.1 mrg
166 1.1 mrg if (ah > bh || (ah == bh && al > bl))
167 1.1 mrg {
168 1.1 mrg sub_ddmmss (ah, al, ah, al, bh, bl);
169 1.1 mrg if (ah < 2)
170 1.1 mrg return 0;
171 1.1 mrg
172 1.1 mrg u00 = u01 = u11 = 1;
173 1.1 mrg u10 = 0;
174 1.1 mrg bits = mpn_jacobi_update (bits, 1, 1);
175 1.1 mrg }
176 1.1 mrg else
177 1.1 mrg {
178 1.1 mrg sub_ddmmss (bh, bl, bh, bl, ah, al);
179 1.1 mrg if (bh < 2)
180 1.1 mrg return 0;
181 1.1 mrg
182 1.1 mrg u00 = u10 = u11 = 1;
183 1.1 mrg u01 = 0;
184 1.1 mrg bits = mpn_jacobi_update (bits, 0, 1);
185 1.1 mrg }
186 1.1 mrg
187 1.1 mrg if (ah < bh)
188 1.1 mrg goto subtract_a;
189 1.1 mrg
190 1.1 mrg for (;;)
191 1.1 mrg {
192 1.1 mrg ASSERT (ah >= bh);
193 1.1 mrg if (ah == bh)
194 1.1 mrg goto done;
195 1.1 mrg
196 1.1 mrg if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
197 1.1 mrg {
198 1.1 mrg ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
199 1.1 mrg bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
200 1.1 mrg
201 1.1 mrg break;
202 1.1 mrg }
203 1.1 mrg
204 1.1 mrg /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
205 1.1 mrg 1), affecting the second column of M. */
206 1.1 mrg ASSERT (ah > bh);
207 1.1 mrg sub_ddmmss (ah, al, ah, al, bh, bl);
208 1.1 mrg
209 1.1 mrg if (ah < 2)
210 1.1 mrg goto done;
211 1.1 mrg
212 1.1 mrg if (ah <= bh)
213 1.1 mrg {
214 1.1 mrg /* Use q = 1 */
215 1.1 mrg u01 += u00;
216 1.1 mrg u11 += u10;
217 1.1 mrg bits = mpn_jacobi_update (bits, 1, 1);
218 1.1 mrg }
219 1.1 mrg else
220 1.1 mrg {
221 1.1 mrg mp_limb_t r[2];
222 1.1 mrg mp_limb_t q = div2 (r, ah, al, bh, bl);
223 1.1 mrg al = r[0]; ah = r[1];
224 1.1 mrg if (ah < 2)
225 1.1 mrg {
226 1.1 mrg /* A is too small, but q is correct. */
227 1.1 mrg u01 += q * u00;
228 1.1 mrg u11 += q * u10;
229 1.1 mrg bits = mpn_jacobi_update (bits, 1, q & 3);
230 1.1 mrg goto done;
231 1.1 mrg }
232 1.1 mrg q++;
233 1.1 mrg u01 += q * u00;
234 1.1 mrg u11 += q * u10;
235 1.1 mrg bits = mpn_jacobi_update (bits, 1, q & 3);
236 1.1 mrg }
237 1.1 mrg subtract_a:
238 1.1 mrg ASSERT (bh >= ah);
239 1.1 mrg if (ah == bh)
240 1.1 mrg goto done;
241 1.1 mrg
242 1.1 mrg if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
243 1.1 mrg {
244 1.1 mrg ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
245 1.1 mrg bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
246 1.1 mrg
247 1.1 mrg goto subtract_a1;
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
251 1.1 mrg 1), affecting the first column of M. */
252 1.1 mrg sub_ddmmss (bh, bl, bh, bl, ah, al);
253 1.1 mrg
254 1.1 mrg if (bh < 2)
255 1.1 mrg goto done;
256 1.1 mrg
257 1.1 mrg if (bh <= ah)
258 1.1 mrg {
259 1.1 mrg /* Use q = 1 */
260 1.1 mrg u00 += u01;
261 1.1 mrg u10 += u11;
262 1.1 mrg bits = mpn_jacobi_update (bits, 0, 1);
263 1.1 mrg }
264 1.1 mrg else
265 1.1 mrg {
266 1.1 mrg mp_limb_t r[2];
267 1.1 mrg mp_limb_t q = div2 (r, bh, bl, ah, al);
268 1.1 mrg bl = r[0]; bh = r[1];
269 1.1 mrg if (bh < 2)
270 1.1 mrg {
271 1.1 mrg /* B is too small, but q is correct. */
272 1.1 mrg u00 += q * u01;
273 1.1 mrg u10 += q * u11;
274 1.1 mrg bits = mpn_jacobi_update (bits, 0, q & 3);
275 1.1 mrg goto done;
276 1.1 mrg }
277 1.1 mrg q++;
278 1.1 mrg u00 += q * u01;
279 1.1 mrg u10 += q * u11;
280 1.1 mrg bits = mpn_jacobi_update (bits, 0, q & 3);
281 1.1 mrg }
282 1.1 mrg }
283 1.1 mrg
284 1.1 mrg /* NOTE: Since we discard the least significant half limb, we don't
285 1.1 mrg get a truly maximal M (corresponding to |a - b| <
286 1.1 mrg 2^{GMP_LIMB_BITS +1}). */
287 1.1 mrg /* Single precision loop */
288 1.1 mrg for (;;)
289 1.1 mrg {
290 1.1 mrg ASSERT (ah >= bh);
291 1.1 mrg if (ah == bh)
292 1.1 mrg break;
293 1.1 mrg
294 1.1 mrg ah -= bh;
295 1.1 mrg if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
296 1.1 mrg break;
297 1.1 mrg
298 1.1 mrg if (ah <= bh)
299 1.1 mrg {
300 1.1 mrg /* Use q = 1 */
301 1.1 mrg u01 += u00;
302 1.1 mrg u11 += u10;
303 1.1 mrg bits = mpn_jacobi_update (bits, 1, 1);
304 1.1 mrg }
305 1.1 mrg else
306 1.1 mrg {
307 1.1 mrg mp_limb_t r;
308 1.1 mrg mp_limb_t q = div1 (&r, ah, bh);
309 1.1 mrg ah = r;
310 1.1 mrg if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
311 1.1 mrg {
312 1.1 mrg /* A is too small, but q is correct. */
313 1.1 mrg u01 += q * u00;
314 1.1 mrg u11 += q * u10;
315 1.1 mrg bits = mpn_jacobi_update (bits, 1, q & 3);
316 1.1 mrg break;
317 1.1 mrg }
318 1.1 mrg q++;
319 1.1 mrg u01 += q * u00;
320 1.1 mrg u11 += q * u10;
321 1.1 mrg bits = mpn_jacobi_update (bits, 1, q & 3);
322 1.1 mrg }
323 1.1 mrg subtract_a1:
324 1.1 mrg ASSERT (bh >= ah);
325 1.1 mrg if (ah == bh)
326 1.1 mrg break;
327 1.1 mrg
328 1.1 mrg bh -= ah;
329 1.1 mrg if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
330 1.1 mrg break;
331 1.1 mrg
332 1.1 mrg if (bh <= ah)
333 1.1 mrg {
334 1.1 mrg /* Use q = 1 */
335 1.1 mrg u00 += u01;
336 1.1 mrg u10 += u11;
337 1.1 mrg bits = mpn_jacobi_update (bits, 0, 1);
338 1.1 mrg }
339 1.1 mrg else
340 1.1 mrg {
341 1.1 mrg mp_limb_t r;
342 1.1 mrg mp_limb_t q = div1 (&r, bh, ah);
343 1.1 mrg bh = r;
344 1.1 mrg if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
345 1.1 mrg {
346 1.1 mrg /* B is too small, but q is correct. */
347 1.1 mrg u00 += q * u01;
348 1.1 mrg u10 += q * u11;
349 1.1 mrg bits = mpn_jacobi_update (bits, 0, q & 3);
350 1.1 mrg break;
351 1.1 mrg }
352 1.1 mrg q++;
353 1.1 mrg u00 += q * u01;
354 1.1 mrg u10 += q * u11;
355 1.1 mrg bits = mpn_jacobi_update (bits, 0, q & 3);
356 1.1 mrg }
357 1.1 mrg }
358 1.1 mrg
359 1.1 mrg done:
360 1.1 mrg M->u[0][0] = u00; M->u[0][1] = u01;
361 1.1 mrg M->u[1][0] = u10; M->u[1][1] = u11;
362 1.1 mrg *bitsp = bits;
363 1.1 mrg
364 1.1 mrg return 1;
365 1.1 mrg }
366