hgcd_reduce.c revision 1.1.1.1 1 1.1 mrg /* hgcd_reduce.c.
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 1.1 mrg
7 1.1 mrg Copyright 2011, 2012 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
13 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
14 1.1 mrg option) any later version.
15 1.1 mrg
16 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
17 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 1.1 mrg License for more details.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU Lesser General Public License
22 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 1.1 mrg
24 1.1 mrg #include "gmp.h"
25 1.1 mrg #include "gmp-impl.h"
26 1.1 mrg #include "longlong.h"
27 1.1 mrg
28 1.1 mrg /* Computes R -= A * B. Result must be non-negative. Normalized down
29 1.1 mrg to size an, and resulting size is returned. */
30 1.1 mrg static mp_size_t
31 1.1 mrg submul (mp_ptr rp, mp_size_t rn,
32 1.1 mrg mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
33 1.1 mrg {
34 1.1 mrg mp_ptr tp;
35 1.1 mrg TMP_DECL;
36 1.1 mrg
37 1.1 mrg ASSERT (bn > 0);
38 1.1 mrg ASSERT (an >= bn);
39 1.1 mrg ASSERT (rn >= an);
40 1.1 mrg ASSERT (an + bn <= rn + 1);
41 1.1 mrg
42 1.1 mrg TMP_MARK;
43 1.1 mrg tp = TMP_ALLOC_LIMBS (an + bn);
44 1.1 mrg
45 1.1 mrg mpn_mul (tp, ap, an, bp, bn);
46 1.1 mrg if (an + bn > rn)
47 1.1 mrg {
48 1.1 mrg ASSERT (tp[rn] == 0);
49 1.1 mrg bn--;
50 1.1 mrg }
51 1.1 mrg ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
52 1.1 mrg TMP_FREE;
53 1.1 mrg
54 1.1 mrg while (rn > an && (rp[rn-1] == 0))
55 1.1 mrg rn--;
56 1.1 mrg
57 1.1 mrg return rn;
58 1.1 mrg }
59 1.1 mrg
60 1.1 mrg /* Computes (a, b) <-- M^{-1} (a; b) */
61 1.1 mrg /* FIXME:
62 1.1 mrg x Take scratch parameter, and figure out scratch need.
63 1.1 mrg
64 1.1 mrg x Use some fallback for small M->n?
65 1.1 mrg */
66 1.1 mrg static mp_size_t
67 1.1 mrg hgcd_matrix_apply (const struct hgcd_matrix *M,
68 1.1 mrg mp_ptr ap, mp_ptr bp,
69 1.1 mrg mp_size_t n)
70 1.1 mrg {
71 1.1 mrg mp_size_t an, bn, un, vn, nn;
72 1.1 mrg mp_size_t mn[2][2];
73 1.1 mrg mp_size_t modn;
74 1.1 mrg mp_ptr tp, sp, scratch;
75 1.1 mrg mp_limb_t cy;
76 1.1 mrg unsigned i, j;
77 1.1 mrg
78 1.1 mrg TMP_DECL;
79 1.1 mrg
80 1.1 mrg ASSERT ( (ap[n-1] | bp[n-1]) > 0);
81 1.1 mrg
82 1.1 mrg an = n;
83 1.1 mrg MPN_NORMALIZE (ap, an);
84 1.1 mrg bn = n;
85 1.1 mrg MPN_NORMALIZE (bp, bn);
86 1.1 mrg
87 1.1 mrg for (i = 0; i < 2; i++)
88 1.1 mrg for (j = 0; j < 2; j++)
89 1.1 mrg {
90 1.1 mrg mp_size_t k;
91 1.1 mrg k = M->n;
92 1.1 mrg MPN_NORMALIZE (M->p[i][j], k);
93 1.1 mrg mn[i][j] = k;
94 1.1 mrg }
95 1.1 mrg
96 1.1 mrg ASSERT (mn[0][0] > 0);
97 1.1 mrg ASSERT (mn[1][1] > 0);
98 1.1 mrg ASSERT ( (mn[0][1] | mn[1][0]) > 0);
99 1.1 mrg
100 1.1 mrg TMP_MARK;
101 1.1 mrg
102 1.1 mrg if (mn[0][1] == 0)
103 1.1 mrg {
104 1.1 mrg /* A unchanged, M = (1, 0; q, 1) */
105 1.1 mrg ASSERT (mn[0][0] == 1);
106 1.1 mrg ASSERT (M->p[0][0][0] == 1);
107 1.1 mrg ASSERT (mn[1][1] == 1);
108 1.1 mrg ASSERT (M->p[1][1][0] == 1);
109 1.1 mrg
110 1.1 mrg /* Put B <-- B - q A */
111 1.1 mrg nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
112 1.1 mrg }
113 1.1 mrg else if (mn[1][0] == 0)
114 1.1 mrg {
115 1.1 mrg /* B unchanged, M = (1, q; 0, 1) */
116 1.1 mrg ASSERT (mn[0][0] == 1);
117 1.1 mrg ASSERT (M->p[0][0][0] == 1);
118 1.1 mrg ASSERT (mn[1][1] == 1);
119 1.1 mrg ASSERT (M->p[1][1][0] == 1);
120 1.1 mrg
121 1.1 mrg /* Put A <-- A - q * B */
122 1.1 mrg nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
123 1.1 mrg }
124 1.1 mrg else
125 1.1 mrg {
126 1.1 mrg /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01.
127 1.1 mrg B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */
128 1.1 mrg un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
129 1.1 mrg vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
130 1.1 mrg
131 1.1 mrg nn = MAX (un, vn);
132 1.1 mrg /* In the range of interest, mulmod_bnm1 should always beat mullo. */
133 1.1 mrg modn = mpn_mulmod_bnm1_next_size (nn + 1);
134 1.1 mrg
135 1.1 mrg scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
136 1.1 mrg tp = TMP_ALLOC_LIMBS (modn);
137 1.1 mrg sp = TMP_ALLOC_LIMBS (modn);
138 1.1 mrg
139 1.1 mrg ASSERT (n <= 2*modn);
140 1.1 mrg
141 1.1 mrg if (n > modn)
142 1.1 mrg {
143 1.1 mrg cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
144 1.1 mrg MPN_INCR_U (ap, modn, cy);
145 1.1 mrg
146 1.1 mrg cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
147 1.1 mrg MPN_INCR_U (bp, modn, cy);
148 1.1 mrg
149 1.1 mrg n = modn;
150 1.1 mrg }
151 1.1 mrg
152 1.1 mrg mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
153 1.1 mrg mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
154 1.1 mrg
155 1.1 mrg /* FIXME: Handle the small n case in some better way. */
156 1.1 mrg if (n + mn[1][1] < modn)
157 1.1 mrg MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
158 1.1 mrg if (n + mn[0][1] < modn)
159 1.1 mrg MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
160 1.1 mrg
161 1.1 mrg cy = mpn_sub_n (tp, tp, sp, modn);
162 1.1 mrg MPN_DECR_U (tp, modn, cy);
163 1.1 mrg
164 1.1 mrg ASSERT (mpn_zero_p (tp + nn, modn - nn));
165 1.1 mrg
166 1.1 mrg mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
167 1.1 mrg MPN_COPY (ap, tp, nn);
168 1.1 mrg mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
169 1.1 mrg
170 1.1 mrg if (n + mn[1][0] < modn)
171 1.1 mrg MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
172 1.1 mrg if (n + mn[0][0] < modn)
173 1.1 mrg MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
174 1.1 mrg
175 1.1 mrg cy = mpn_sub_n (tp, tp, sp, modn);
176 1.1 mrg MPN_DECR_U (tp, modn, cy);
177 1.1 mrg
178 1.1 mrg ASSERT (mpn_zero_p (tp + nn, modn - nn));
179 1.1 mrg MPN_COPY (bp, tp, nn);
180 1.1 mrg
181 1.1 mrg while ( (ap[nn-1] | bp[nn-1]) == 0)
182 1.1 mrg {
183 1.1 mrg nn--;
184 1.1 mrg ASSERT (nn > 0);
185 1.1 mrg }
186 1.1 mrg }
187 1.1 mrg TMP_FREE;
188 1.1 mrg
189 1.1 mrg return nn;
190 1.1 mrg }
191 1.1 mrg
192 1.1 mrg mp_size_t
193 1.1 mrg mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
194 1.1 mrg {
195 1.1 mrg mp_size_t itch;
196 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
197 1.1 mrg {
198 1.1 mrg itch = mpn_hgcd_itch (n-p);
199 1.1 mrg
200 1.1 mrg /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
201 1.1 mrg (p + ceil((n-p)/2) - 1 <= n + p - 1 */
202 1.1 mrg if (itch < n + p - 1)
203 1.1 mrg itch = n + p - 1;
204 1.1 mrg }
205 1.1 mrg else
206 1.1 mrg {
207 1.1 mrg itch = 2*(n-p) + mpn_hgcd_itch (n-p);
208 1.1 mrg /* Currently, hgcd_matrix_apply allocates its own storage. */
209 1.1 mrg }
210 1.1 mrg return itch;
211 1.1 mrg }
212 1.1 mrg
213 1.1 mrg /* FIXME: Document storage need. */
214 1.1 mrg mp_size_t
215 1.1 mrg mpn_hgcd_reduce (struct hgcd_matrix *M,
216 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
217 1.1 mrg mp_ptr tp)
218 1.1 mrg {
219 1.1 mrg mp_size_t nn;
220 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
221 1.1 mrg {
222 1.1 mrg nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
223 1.1 mrg if (nn > 0)
224 1.1 mrg /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
225 1.1 mrg = 2 (n - 1) */
226 1.1 mrg return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
227 1.1 mrg }
228 1.1 mrg else
229 1.1 mrg {
230 1.1 mrg MPN_COPY (tp, ap + p, n - p);
231 1.1 mrg MPN_COPY (tp + n - p, bp + p, n - p);
232 1.1 mrg if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
233 1.1 mrg return hgcd_matrix_apply (M, ap, bp, n);
234 1.1 mrg }
235 1.1 mrg return 0;
236 1.1 mrg }
237