hgcd_reduce.c revision 1.1.1.2 1 1.1 mrg /* hgcd_reduce.c.
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 1.1 mrg
7 1.1 mrg Copyright 2011, 2012 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1.1.2 mrg it under the terms of either:
13 1.1.1.2 mrg
14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
16 1.1.1.2 mrg option) any later version.
17 1.1.1.2 mrg
18 1.1.1.2 mrg or
19 1.1.1.2 mrg
20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
22 1.1.1.2 mrg later version.
23 1.1.1.2 mrg
24 1.1.1.2 mrg or both in parallel, as here.
25 1.1 mrg
26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 1.1.1.2 mrg for more details.
30 1.1 mrg
31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
34 1.1 mrg
35 1.1 mrg #include "gmp.h"
36 1.1 mrg #include "gmp-impl.h"
37 1.1 mrg #include "longlong.h"
38 1.1 mrg
39 1.1 mrg /* Computes R -= A * B. Result must be non-negative. Normalized down
40 1.1 mrg to size an, and resulting size is returned. */
41 1.1 mrg static mp_size_t
42 1.1 mrg submul (mp_ptr rp, mp_size_t rn,
43 1.1 mrg mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
44 1.1 mrg {
45 1.1 mrg mp_ptr tp;
46 1.1 mrg TMP_DECL;
47 1.1 mrg
48 1.1 mrg ASSERT (bn > 0);
49 1.1 mrg ASSERT (an >= bn);
50 1.1 mrg ASSERT (rn >= an);
51 1.1 mrg ASSERT (an + bn <= rn + 1);
52 1.1 mrg
53 1.1 mrg TMP_MARK;
54 1.1 mrg tp = TMP_ALLOC_LIMBS (an + bn);
55 1.1 mrg
56 1.1 mrg mpn_mul (tp, ap, an, bp, bn);
57 1.1.1.2 mrg ASSERT ((an + bn <= rn) || (tp[rn] == 0));
58 1.1.1.2 mrg ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
59 1.1 mrg TMP_FREE;
60 1.1 mrg
61 1.1 mrg while (rn > an && (rp[rn-1] == 0))
62 1.1 mrg rn--;
63 1.1 mrg
64 1.1 mrg return rn;
65 1.1 mrg }
66 1.1 mrg
67 1.1 mrg /* Computes (a, b) <-- M^{-1} (a; b) */
68 1.1 mrg /* FIXME:
69 1.1 mrg x Take scratch parameter, and figure out scratch need.
70 1.1 mrg
71 1.1 mrg x Use some fallback for small M->n?
72 1.1 mrg */
73 1.1 mrg static mp_size_t
74 1.1 mrg hgcd_matrix_apply (const struct hgcd_matrix *M,
75 1.1 mrg mp_ptr ap, mp_ptr bp,
76 1.1 mrg mp_size_t n)
77 1.1 mrg {
78 1.1 mrg mp_size_t an, bn, un, vn, nn;
79 1.1 mrg mp_size_t mn[2][2];
80 1.1 mrg mp_size_t modn;
81 1.1 mrg mp_ptr tp, sp, scratch;
82 1.1 mrg mp_limb_t cy;
83 1.1 mrg unsigned i, j;
84 1.1 mrg
85 1.1 mrg TMP_DECL;
86 1.1 mrg
87 1.1 mrg ASSERT ( (ap[n-1] | bp[n-1]) > 0);
88 1.1 mrg
89 1.1 mrg an = n;
90 1.1 mrg MPN_NORMALIZE (ap, an);
91 1.1 mrg bn = n;
92 1.1 mrg MPN_NORMALIZE (bp, bn);
93 1.1 mrg
94 1.1 mrg for (i = 0; i < 2; i++)
95 1.1 mrg for (j = 0; j < 2; j++)
96 1.1 mrg {
97 1.1 mrg mp_size_t k;
98 1.1 mrg k = M->n;
99 1.1 mrg MPN_NORMALIZE (M->p[i][j], k);
100 1.1 mrg mn[i][j] = k;
101 1.1 mrg }
102 1.1 mrg
103 1.1 mrg ASSERT (mn[0][0] > 0);
104 1.1 mrg ASSERT (mn[1][1] > 0);
105 1.1 mrg ASSERT ( (mn[0][1] | mn[1][0]) > 0);
106 1.1 mrg
107 1.1 mrg TMP_MARK;
108 1.1 mrg
109 1.1 mrg if (mn[0][1] == 0)
110 1.1 mrg {
111 1.1 mrg /* A unchanged, M = (1, 0; q, 1) */
112 1.1 mrg ASSERT (mn[0][0] == 1);
113 1.1 mrg ASSERT (M->p[0][0][0] == 1);
114 1.1 mrg ASSERT (mn[1][1] == 1);
115 1.1 mrg ASSERT (M->p[1][1][0] == 1);
116 1.1 mrg
117 1.1 mrg /* Put B <-- B - q A */
118 1.1 mrg nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
119 1.1 mrg }
120 1.1 mrg else if (mn[1][0] == 0)
121 1.1 mrg {
122 1.1 mrg /* B unchanged, M = (1, q; 0, 1) */
123 1.1 mrg ASSERT (mn[0][0] == 1);
124 1.1 mrg ASSERT (M->p[0][0][0] == 1);
125 1.1 mrg ASSERT (mn[1][1] == 1);
126 1.1 mrg ASSERT (M->p[1][1][0] == 1);
127 1.1 mrg
128 1.1 mrg /* Put A <-- A - q * B */
129 1.1 mrg nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
130 1.1 mrg }
131 1.1 mrg else
132 1.1 mrg {
133 1.1 mrg /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01.
134 1.1 mrg B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */
135 1.1 mrg un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
136 1.1 mrg vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
137 1.1 mrg
138 1.1 mrg nn = MAX (un, vn);
139 1.1 mrg /* In the range of interest, mulmod_bnm1 should always beat mullo. */
140 1.1 mrg modn = mpn_mulmod_bnm1_next_size (nn + 1);
141 1.1 mrg
142 1.1.1.2 mrg TMP_ALLOC_LIMBS_3 (tp, modn,
143 1.1.1.2 mrg sp, modn,
144 1.1.1.2 mrg scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
145 1.1 mrg
146 1.1 mrg ASSERT (n <= 2*modn);
147 1.1 mrg
148 1.1 mrg if (n > modn)
149 1.1 mrg {
150 1.1 mrg cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
151 1.1 mrg MPN_INCR_U (ap, modn, cy);
152 1.1 mrg
153 1.1 mrg cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
154 1.1 mrg MPN_INCR_U (bp, modn, cy);
155 1.1 mrg
156 1.1 mrg n = modn;
157 1.1 mrg }
158 1.1 mrg
159 1.1 mrg mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
160 1.1 mrg mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
161 1.1 mrg
162 1.1 mrg /* FIXME: Handle the small n case in some better way. */
163 1.1 mrg if (n + mn[1][1] < modn)
164 1.1 mrg MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
165 1.1 mrg if (n + mn[0][1] < modn)
166 1.1 mrg MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
167 1.1 mrg
168 1.1 mrg cy = mpn_sub_n (tp, tp, sp, modn);
169 1.1 mrg MPN_DECR_U (tp, modn, cy);
170 1.1 mrg
171 1.1 mrg ASSERT (mpn_zero_p (tp + nn, modn - nn));
172 1.1 mrg
173 1.1 mrg mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
174 1.1 mrg MPN_COPY (ap, tp, nn);
175 1.1 mrg mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
176 1.1 mrg
177 1.1 mrg if (n + mn[1][0] < modn)
178 1.1 mrg MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
179 1.1 mrg if (n + mn[0][0] < modn)
180 1.1 mrg MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
181 1.1 mrg
182 1.1 mrg cy = mpn_sub_n (tp, tp, sp, modn);
183 1.1 mrg MPN_DECR_U (tp, modn, cy);
184 1.1 mrg
185 1.1 mrg ASSERT (mpn_zero_p (tp + nn, modn - nn));
186 1.1 mrg MPN_COPY (bp, tp, nn);
187 1.1 mrg
188 1.1 mrg while ( (ap[nn-1] | bp[nn-1]) == 0)
189 1.1 mrg {
190 1.1 mrg nn--;
191 1.1 mrg ASSERT (nn > 0);
192 1.1 mrg }
193 1.1 mrg }
194 1.1 mrg TMP_FREE;
195 1.1 mrg
196 1.1 mrg return nn;
197 1.1 mrg }
198 1.1 mrg
199 1.1 mrg mp_size_t
200 1.1 mrg mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
201 1.1 mrg {
202 1.1 mrg mp_size_t itch;
203 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
204 1.1 mrg {
205 1.1 mrg itch = mpn_hgcd_itch (n-p);
206 1.1 mrg
207 1.1 mrg /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
208 1.1 mrg (p + ceil((n-p)/2) - 1 <= n + p - 1 */
209 1.1 mrg if (itch < n + p - 1)
210 1.1 mrg itch = n + p - 1;
211 1.1 mrg }
212 1.1 mrg else
213 1.1 mrg {
214 1.1 mrg itch = 2*(n-p) + mpn_hgcd_itch (n-p);
215 1.1 mrg /* Currently, hgcd_matrix_apply allocates its own storage. */
216 1.1 mrg }
217 1.1 mrg return itch;
218 1.1 mrg }
219 1.1 mrg
220 1.1 mrg /* FIXME: Document storage need. */
221 1.1 mrg mp_size_t
222 1.1 mrg mpn_hgcd_reduce (struct hgcd_matrix *M,
223 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
224 1.1 mrg mp_ptr tp)
225 1.1 mrg {
226 1.1 mrg mp_size_t nn;
227 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
228 1.1 mrg {
229 1.1 mrg nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
230 1.1 mrg if (nn > 0)
231 1.1 mrg /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
232 1.1 mrg = 2 (n - 1) */
233 1.1 mrg return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
234 1.1 mrg }
235 1.1 mrg else
236 1.1 mrg {
237 1.1 mrg MPN_COPY (tp, ap + p, n - p);
238 1.1 mrg MPN_COPY (tp + n - p, bp + p, n - p);
239 1.1 mrg if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
240 1.1 mrg return hgcd_matrix_apply (M, ap, bp, n);
241 1.1 mrg }
242 1.1 mrg return 0;
243 1.1 mrg }
244