broot.c revision 1.1 1 1.1 mrg /* mpn_broot -- Compute hensel sqrt
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Niels Mller
4 1.1 mrg
5 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8 1.1 mrg
9 1.1 mrg Copyright 2012 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
15 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
16 1.1 mrg option) any later version.
17 1.1 mrg
18 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
19 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 1.1 mrg License for more details.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU Lesser General Public License
24 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 1.1 mrg
26 1.1 mrg #include "gmp.h"
27 1.1 mrg #include "gmp-impl.h"
28 1.1 mrg
29 1.1 mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
30 1.1 mrg typical use will have e small. */
31 1.1 mrg static mp_limb_t
32 1.1 mrg powlimb (mp_limb_t a, mp_limb_t e)
33 1.1 mrg {
34 1.1 mrg mp_limb_t r = 1;
35 1.1 mrg mp_limb_t s = a;
36 1.1 mrg
37 1.1 mrg for (r = 1, s = a; e > 0; e >>= 1, s *= s)
38 1.1 mrg if (e & 1)
39 1.1 mrg r *= s;
40 1.1 mrg
41 1.1 mrg return r;
42 1.1 mrg }
43 1.1 mrg
44 1.1 mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
45 1.1 mrg
46 1.1 mrg Iterates
47 1.1 mrg
48 1.1 mrg r' <-- r - r * (a^{k-1} r^k - 1) / n
49 1.1 mrg
50 1.1 mrg If
51 1.1 mrg
52 1.1 mrg a^{k-1} r^k = 1 (mod 2^m),
53 1.1 mrg
54 1.1 mrg then
55 1.1 mrg
56 1.1 mrg a^{k-1} r'^k = 1 (mod 2^{2m}),
57 1.1 mrg
58 1.1 mrg Compute the update term as
59 1.1 mrg
60 1.1 mrg r' = r - (a^{k-1} r^{k+1} - r) / k
61 1.1 mrg
62 1.1 mrg where we still have cancelation of low limbs.
63 1.1 mrg
64 1.1 mrg */
65 1.1 mrg void
66 1.1 mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
67 1.1 mrg {
68 1.1 mrg mp_size_t sizes[GMP_LIMB_BITS * 2];
69 1.1 mrg mp_ptr akm1, tp, rnp, ep, scratch;
70 1.1 mrg mp_limb_t a0, r0, km1, kp1h, kinv;
71 1.1 mrg mp_size_t rn;
72 1.1 mrg unsigned i;
73 1.1 mrg
74 1.1 mrg TMP_DECL;
75 1.1 mrg
76 1.1 mrg ASSERT (n > 0);
77 1.1 mrg ASSERT (ap[0] & 1);
78 1.1 mrg ASSERT (k & 1);
79 1.1 mrg ASSERT (k >= 3);
80 1.1 mrg
81 1.1 mrg TMP_MARK;
82 1.1 mrg
83 1.1 mrg akm1 = TMP_ALLOC_LIMBS (4*n);
84 1.1 mrg tp = akm1 + n;
85 1.1 mrg
86 1.1 mrg km1 = k-1;
87 1.1 mrg /* FIXME: Could arrange the iteration so we don't need to compute
88 1.1 mrg this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
89 1.1 mrg that we can use wraparound also for a*r, since the low half is
90 1.1 mrg unchanged from the previous iteration. Or possibly mulmid. Also,
91 1.1 mrg a r = a^{1/k}, so we get that value too, for free? */
92 1.1 mrg mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
93 1.1 mrg
94 1.1 mrg a0 = ap[0];
95 1.1 mrg binvert_limb (kinv, k);
96 1.1 mrg
97 1.1 mrg /* 4 bits: a^{1/k - 1} (mod 16):
98 1.1 mrg
99 1.1 mrg a % 8
100 1.1 mrg 1 3 5 7
101 1.1 mrg k%4 +-------
102 1.1 mrg 1 |1 1 1 1
103 1.1 mrg 3 |1 9 9 1
104 1.1 mrg */
105 1.1 mrg r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
106 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
107 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
108 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
109 1.1 mrg #if GMP_NUMB_BITS > 32
110 1.1 mrg {
111 1.1 mrg unsigned prec = 32;
112 1.1 mrg do
113 1.1 mrg {
114 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
115 1.1 mrg prec *= 2;
116 1.1 mrg }
117 1.1 mrg while (prec < GMP_NUMB_BITS);
118 1.1 mrg }
119 1.1 mrg #endif
120 1.1 mrg
121 1.1 mrg rp[0] = r0;
122 1.1 mrg if (n == 1)
123 1.1 mrg {
124 1.1 mrg TMP_FREE;
125 1.1 mrg return;
126 1.1 mrg }
127 1.1 mrg
128 1.1 mrg /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
129 1.1 mrg kp1h = k/2 + 1;
130 1.1 mrg
131 1.1 mrg /* FIXME: Special case for two limb iteration. */
132 1.1 mrg rnp = TMP_ALLOC_LIMBS (2*n + 1);
133 1.1 mrg ep = rnp + n;
134 1.1 mrg
135 1.1 mrg /* FIXME: Possible to this on the fly with some bit fiddling. */
136 1.1 mrg for (i = 0; n > 1; n = (n + 1)/2)
137 1.1 mrg sizes[i++] = n;
138 1.1 mrg
139 1.1 mrg rn = 1;
140 1.1 mrg
141 1.1 mrg while (i-- > 0)
142 1.1 mrg {
143 1.1 mrg /* Compute x^{k+1}. */
144 1.1 mrg mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
145 1.1 mrg final iteration.*/
146 1.1 mrg mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
147 1.1 mrg
148 1.1 mrg /* Multiply by a^{k-1}. Can use wraparound; low part equals
149 1.1 mrg r. */
150 1.1 mrg
151 1.1 mrg mpn_mullo_n (ep, rnp, akm1, sizes[i]);
152 1.1 mrg ASSERT (mpn_cmp (ep, rp, rn) == 0);
153 1.1 mrg
154 1.1 mrg ASSERT (sizes[i] <= 2*rn);
155 1.1 mrg mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
156 1.1 mrg mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
157 1.1 mrg rn = sizes[i];
158 1.1 mrg }
159 1.1 mrg TMP_FREE;
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
163 1.1 mrg void
164 1.1 mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
165 1.1 mrg {
166 1.1 mrg mp_ptr tp;
167 1.1 mrg TMP_DECL;
168 1.1 mrg
169 1.1 mrg ASSERT (n > 0);
170 1.1 mrg ASSERT (ap[0] & 1);
171 1.1 mrg ASSERT (k & 1);
172 1.1 mrg
173 1.1 mrg if (k == 1)
174 1.1 mrg {
175 1.1 mrg MPN_COPY (rp, ap, n);
176 1.1 mrg return;
177 1.1 mrg }
178 1.1 mrg
179 1.1 mrg TMP_MARK;
180 1.1 mrg tp = TMP_ALLOC_LIMBS (n);
181 1.1 mrg
182 1.1 mrg mpn_broot_invm1 (tp, ap, n, k);
183 1.1 mrg mpn_mullo_n (rp, tp, ap, n);
184 1.1 mrg
185 1.1 mrg TMP_FREE;
186 1.1 mrg }
187