broot.c revision 1.1.1.2 1 1.1 mrg /* mpn_broot -- Compute hensel sqrt
2 1.1 mrg
3 1.1.1.2 mrg Contributed to the GNU project by Niels Mller
4 1.1 mrg
5 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8 1.1 mrg
9 1.1 mrg Copyright 2012 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1.1.2 mrg it under the terms of either:
15 1.1.1.2 mrg
16 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
17 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
18 1.1.1.2 mrg option) any later version.
19 1.1.1.2 mrg
20 1.1.1.2 mrg or
21 1.1.1.2 mrg
22 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
23 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
24 1.1.1.2 mrg later version.
25 1.1.1.2 mrg
26 1.1.1.2 mrg or both in parallel, as here.
27 1.1 mrg
28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 1.1.1.2 mrg for more details.
32 1.1 mrg
33 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
34 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
35 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
36 1.1 mrg
37 1.1 mrg #include "gmp.h"
38 1.1 mrg #include "gmp-impl.h"
39 1.1 mrg
40 1.1 mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
41 1.1 mrg typical use will have e small. */
42 1.1 mrg static mp_limb_t
43 1.1 mrg powlimb (mp_limb_t a, mp_limb_t e)
44 1.1 mrg {
45 1.1 mrg mp_limb_t r = 1;
46 1.1 mrg mp_limb_t s = a;
47 1.1 mrg
48 1.1 mrg for (r = 1, s = a; e > 0; e >>= 1, s *= s)
49 1.1 mrg if (e & 1)
50 1.1 mrg r *= s;
51 1.1 mrg
52 1.1 mrg return r;
53 1.1 mrg }
54 1.1 mrg
55 1.1 mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
56 1.1 mrg
57 1.1 mrg Iterates
58 1.1 mrg
59 1.1 mrg r' <-- r - r * (a^{k-1} r^k - 1) / n
60 1.1 mrg
61 1.1 mrg If
62 1.1 mrg
63 1.1 mrg a^{k-1} r^k = 1 (mod 2^m),
64 1.1 mrg
65 1.1 mrg then
66 1.1 mrg
67 1.1 mrg a^{k-1} r'^k = 1 (mod 2^{2m}),
68 1.1 mrg
69 1.1 mrg Compute the update term as
70 1.1 mrg
71 1.1 mrg r' = r - (a^{k-1} r^{k+1} - r) / k
72 1.1 mrg
73 1.1.1.2 mrg where we still have cancellation of low limbs.
74 1.1 mrg
75 1.1 mrg */
76 1.1 mrg void
77 1.1 mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
78 1.1 mrg {
79 1.1 mrg mp_size_t sizes[GMP_LIMB_BITS * 2];
80 1.1.1.2 mrg mp_ptr akm1, tp, rnp, ep;
81 1.1 mrg mp_limb_t a0, r0, km1, kp1h, kinv;
82 1.1 mrg mp_size_t rn;
83 1.1 mrg unsigned i;
84 1.1 mrg
85 1.1 mrg TMP_DECL;
86 1.1 mrg
87 1.1 mrg ASSERT (n > 0);
88 1.1 mrg ASSERT (ap[0] & 1);
89 1.1 mrg ASSERT (k & 1);
90 1.1 mrg ASSERT (k >= 3);
91 1.1 mrg
92 1.1 mrg TMP_MARK;
93 1.1 mrg
94 1.1 mrg akm1 = TMP_ALLOC_LIMBS (4*n);
95 1.1 mrg tp = akm1 + n;
96 1.1 mrg
97 1.1 mrg km1 = k-1;
98 1.1 mrg /* FIXME: Could arrange the iteration so we don't need to compute
99 1.1 mrg this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
100 1.1 mrg that we can use wraparound also for a*r, since the low half is
101 1.1 mrg unchanged from the previous iteration. Or possibly mulmid. Also,
102 1.1 mrg a r = a^{1/k}, so we get that value too, for free? */
103 1.1 mrg mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
104 1.1 mrg
105 1.1 mrg a0 = ap[0];
106 1.1 mrg binvert_limb (kinv, k);
107 1.1 mrg
108 1.1 mrg /* 4 bits: a^{1/k - 1} (mod 16):
109 1.1 mrg
110 1.1 mrg a % 8
111 1.1 mrg 1 3 5 7
112 1.1 mrg k%4 +-------
113 1.1 mrg 1 |1 1 1 1
114 1.1 mrg 3 |1 9 9 1
115 1.1 mrg */
116 1.1 mrg r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
117 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
118 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
119 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
120 1.1 mrg #if GMP_NUMB_BITS > 32
121 1.1 mrg {
122 1.1 mrg unsigned prec = 32;
123 1.1 mrg do
124 1.1 mrg {
125 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
126 1.1 mrg prec *= 2;
127 1.1 mrg }
128 1.1 mrg while (prec < GMP_NUMB_BITS);
129 1.1 mrg }
130 1.1 mrg #endif
131 1.1 mrg
132 1.1 mrg rp[0] = r0;
133 1.1 mrg if (n == 1)
134 1.1 mrg {
135 1.1 mrg TMP_FREE;
136 1.1 mrg return;
137 1.1 mrg }
138 1.1 mrg
139 1.1 mrg /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
140 1.1 mrg kp1h = k/2 + 1;
141 1.1 mrg
142 1.1 mrg /* FIXME: Special case for two limb iteration. */
143 1.1 mrg rnp = TMP_ALLOC_LIMBS (2*n + 1);
144 1.1 mrg ep = rnp + n;
145 1.1 mrg
146 1.1 mrg /* FIXME: Possible to this on the fly with some bit fiddling. */
147 1.1 mrg for (i = 0; n > 1; n = (n + 1)/2)
148 1.1 mrg sizes[i++] = n;
149 1.1 mrg
150 1.1 mrg rn = 1;
151 1.1 mrg
152 1.1 mrg while (i-- > 0)
153 1.1 mrg {
154 1.1 mrg /* Compute x^{k+1}. */
155 1.1 mrg mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
156 1.1.1.2 mrg final iteration. */
157 1.1 mrg mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
158 1.1 mrg
159 1.1.1.2 mrg /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
160 1.1 mrg
161 1.1 mrg mpn_mullo_n (ep, rnp, akm1, sizes[i]);
162 1.1 mrg ASSERT (mpn_cmp (ep, rp, rn) == 0);
163 1.1 mrg
164 1.1 mrg ASSERT (sizes[i] <= 2*rn);
165 1.1 mrg mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
166 1.1 mrg mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
167 1.1 mrg rn = sizes[i];
168 1.1 mrg }
169 1.1 mrg TMP_FREE;
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
173 1.1 mrg void
174 1.1 mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
175 1.1 mrg {
176 1.1 mrg mp_ptr tp;
177 1.1 mrg TMP_DECL;
178 1.1 mrg
179 1.1 mrg ASSERT (n > 0);
180 1.1 mrg ASSERT (ap[0] & 1);
181 1.1 mrg ASSERT (k & 1);
182 1.1 mrg
183 1.1 mrg if (k == 1)
184 1.1 mrg {
185 1.1 mrg MPN_COPY (rp, ap, n);
186 1.1 mrg return;
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg TMP_MARK;
190 1.1 mrg tp = TMP_ALLOC_LIMBS (n);
191 1.1 mrg
192 1.1 mrg mpn_broot_invm1 (tp, ap, n, k);
193 1.1 mrg mpn_mullo_n (rp, tp, ap, n);
194 1.1 mrg
195 1.1 mrg TMP_FREE;
196 1.1 mrg }
197