hgcd.c revision 1.1.1.2 1 1.1 mrg /* hgcd.c.
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 1.1 mrg
7 1.1.1.2 mrg Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
13 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
14 1.1 mrg option) any later version.
15 1.1 mrg
16 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
17 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 1.1 mrg License for more details.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU Lesser General Public License
22 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 1.1 mrg
24 1.1 mrg #include "gmp.h"
25 1.1 mrg #include "gmp-impl.h"
26 1.1 mrg #include "longlong.h"
27 1.1 mrg
28 1.1 mrg
29 1.1 mrg /* Size analysis for hgcd:
30 1.1 mrg
31 1.1 mrg For the recursive calls, we have n1 <= ceil(n / 2). Then the
32 1.1 mrg storage need is determined by the storage for the recursive call
33 1.1 mrg computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
34 1.1 mrg (after this, the storage needed for M1 can be recycled).
35 1.1 mrg
36 1.1 mrg Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
37 1.1 mrg = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
38 1.1 mrg and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
39 1.1 mrg 4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
40 1.1 mrg
41 1.1 mrg For the recursive call, we need S(n1) = S(ceil(n/2)).
42 1.1 mrg
43 1.1 mrg S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
44 1.1 mrg <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
45 1.1 mrg <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
46 1.1 mrg <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
47 1.1 mrg */
48 1.1 mrg
49 1.1 mrg mp_size_t
50 1.1 mrg mpn_hgcd_itch (mp_size_t n)
51 1.1 mrg {
52 1.1 mrg unsigned k;
53 1.1 mrg int count;
54 1.1 mrg mp_size_t nscaled;
55 1.1 mrg
56 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
57 1.1.1.2 mrg return n;
58 1.1 mrg
59 1.1 mrg /* Get the recursion depth. */
60 1.1 mrg nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
61 1.1 mrg count_leading_zeros (count, nscaled);
62 1.1 mrg k = GMP_LIMB_BITS - count;
63 1.1 mrg
64 1.1.1.2 mrg return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
65 1.1 mrg }
66 1.1 mrg
67 1.1 mrg /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
68 1.1 mrg with elements of size at most (n+1)/2 - 1. Returns new size of a,
69 1.1 mrg b, or zero if no reduction is possible. */
70 1.1 mrg
71 1.1 mrg mp_size_t
72 1.1 mrg mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
73 1.1 mrg struct hgcd_matrix *M, mp_ptr tp)
74 1.1 mrg {
75 1.1 mrg mp_size_t s = n/2 + 1;
76 1.1 mrg
77 1.1.1.2 mrg mp_size_t nn;
78 1.1 mrg int success = 0;
79 1.1 mrg
80 1.1 mrg if (n <= s)
81 1.1 mrg /* Happens when n <= 2, a fairly uninteresting case but exercised
82 1.1 mrg by the random inputs of the testsuite. */
83 1.1 mrg return 0;
84 1.1 mrg
85 1.1 mrg ASSERT ((ap[n-1] | bp[n-1]) > 0);
86 1.1 mrg
87 1.1 mrg ASSERT ((n+1)/2 - 1 < M->alloc);
88 1.1 mrg
89 1.1.1.2 mrg if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
90 1.1 mrg {
91 1.1.1.2 mrg mp_size_t n2 = (3*n)/4 + 1;
92 1.1.1.2 mrg mp_size_t p = n/2;
93 1.1 mrg
94 1.1.1.2 mrg nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
95 1.1.1.2 mrg if (nn)
96 1.1.1.2 mrg {
97 1.1.1.2 mrg n = nn;
98 1.1.1.2 mrg success = 1;
99 1.1.1.2 mrg }
100 1.1.1.2 mrg
101 1.1.1.2 mrg /* NOTE: It apppears this loop never runs more than once (at
102 1.1.1.2 mrg least when not recursing to hgcd_appr). */
103 1.1.1.2 mrg while (n > n2)
104 1.1.1.2 mrg {
105 1.1.1.2 mrg /* Needs n + 1 storage */
106 1.1.1.2 mrg nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
107 1.1.1.2 mrg if (!nn)
108 1.1.1.2 mrg return success ? n : 0;
109 1.1 mrg
110 1.1.1.2 mrg n = nn;
111 1.1.1.2 mrg success = 1;
112 1.1.1.2 mrg }
113 1.1 mrg
114 1.1.1.2 mrg if (n > s + 2)
115 1.1 mrg {
116 1.1.1.2 mrg struct hgcd_matrix M1;
117 1.1.1.2 mrg mp_size_t scratch;
118 1.1 mrg
119 1.1.1.2 mrg p = 2*s - n + 1;
120 1.1.1.2 mrg scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
121 1.1 mrg
122 1.1.1.2 mrg mpn_hgcd_matrix_init(&M1, n - p, tp);
123 1.1 mrg
124 1.1.1.2 mrg /* FIXME: Should use hgcd_reduce, but that may require more
125 1.1.1.2 mrg scratch space, which requires review. */
126 1.1 mrg
127 1.1.1.2 mrg nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
128 1.1.1.2 mrg if (nn > 0)
129 1.1.1.2 mrg {
130 1.1.1.2 mrg /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
131 1.1.1.2 mrg ASSERT (M->n + 2 >= M1.n);
132 1.1 mrg
133 1.1.1.2 mrg /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
134 1.1.1.2 mrg then either q or q + 1 is a correct quotient, and M1 will
135 1.1.1.2 mrg start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
136 1.1.1.2 mrg rules out the case that the size of M * M1 is much
137 1.1.1.2 mrg smaller than the expected M->n + M1->n. */
138 1.1 mrg
139 1.1.1.2 mrg ASSERT (M->n + M1.n < M->alloc);
140 1.1 mrg
141 1.1.1.2 mrg /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
142 1.1.1.2 mrg = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
143 1.1.1.2 mrg n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
144 1.1 mrg
145 1.1.1.2 mrg /* We need a bound for of M->n + M1.n. Let n be the original
146 1.1.1.2 mrg input size. Then
147 1.1.1.2 mrg
148 1.1.1.2 mrg ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
149 1.1.1.2 mrg
150 1.1.1.2 mrg and it follows that
151 1.1.1.2 mrg
152 1.1.1.2 mrg M.n + M1.n <= ceil(n/2) + 1
153 1.1.1.2 mrg
154 1.1.1.2 mrg Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
155 1.1.1.2 mrg amount of needed scratch space. */
156 1.1.1.2 mrg mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
157 1.1.1.2 mrg success = 1;
158 1.1.1.2 mrg }
159 1.1 mrg }
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg for (;;)
163 1.1 mrg {
164 1.1 mrg /* Needs s+3 < n */
165 1.1.1.2 mrg nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
166 1.1 mrg if (!nn)
167 1.1 mrg return success ? n : 0;
168 1.1 mrg
169 1.1 mrg n = nn;
170 1.1 mrg success = 1;
171 1.1 mrg }
172 1.1 mrg }
173