broot.c revision 1.1 1 /* mpn_broot -- Compute hensel sqrt
2
3 Contributed to the GNU project by Niels Mller
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9 Copyright 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26 #include "gmp.h"
27 #include "gmp-impl.h"
28
29 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
30 typical use will have e small. */
31 static mp_limb_t
32 powlimb (mp_limb_t a, mp_limb_t e)
33 {
34 mp_limb_t r = 1;
35 mp_limb_t s = a;
36
37 for (r = 1, s = a; e > 0; e >>= 1, s *= s)
38 if (e & 1)
39 r *= s;
40
41 return r;
42 }
43
44 /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
45
46 Iterates
47
48 r' <-- r - r * (a^{k-1} r^k - 1) / n
49
50 If
51
52 a^{k-1} r^k = 1 (mod 2^m),
53
54 then
55
56 a^{k-1} r'^k = 1 (mod 2^{2m}),
57
58 Compute the update term as
59
60 r' = r - (a^{k-1} r^{k+1} - r) / k
61
62 where we still have cancelation of low limbs.
63
64 */
65 void
66 mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
67 {
68 mp_size_t sizes[GMP_LIMB_BITS * 2];
69 mp_ptr akm1, tp, rnp, ep, scratch;
70 mp_limb_t a0, r0, km1, kp1h, kinv;
71 mp_size_t rn;
72 unsigned i;
73
74 TMP_DECL;
75
76 ASSERT (n > 0);
77 ASSERT (ap[0] & 1);
78 ASSERT (k & 1);
79 ASSERT (k >= 3);
80
81 TMP_MARK;
82
83 akm1 = TMP_ALLOC_LIMBS (4*n);
84 tp = akm1 + n;
85
86 km1 = k-1;
87 /* FIXME: Could arrange the iteration so we don't need to compute
88 this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
89 that we can use wraparound also for a*r, since the low half is
90 unchanged from the previous iteration. Or possibly mulmid. Also,
91 a r = a^{1/k}, so we get that value too, for free? */
92 mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
93
94 a0 = ap[0];
95 binvert_limb (kinv, k);
96
97 /* 4 bits: a^{1/k - 1} (mod 16):
98
99 a % 8
100 1 3 5 7
101 k%4 +-------
102 1 |1 1 1 1
103 3 |1 9 9 1
104 */
105 r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
106 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
107 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
108 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
109 #if GMP_NUMB_BITS > 32
110 {
111 unsigned prec = 32;
112 do
113 {
114 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
115 prec *= 2;
116 }
117 while (prec < GMP_NUMB_BITS);
118 }
119 #endif
120
121 rp[0] = r0;
122 if (n == 1)
123 {
124 TMP_FREE;
125 return;
126 }
127
128 /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
129 kp1h = k/2 + 1;
130
131 /* FIXME: Special case for two limb iteration. */
132 rnp = TMP_ALLOC_LIMBS (2*n + 1);
133 ep = rnp + n;
134
135 /* FIXME: Possible to this on the fly with some bit fiddling. */
136 for (i = 0; n > 1; n = (n + 1)/2)
137 sizes[i++] = n;
138
139 rn = 1;
140
141 while (i-- > 0)
142 {
143 /* Compute x^{k+1}. */
144 mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
145 final iteration.*/
146 mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
147
148 /* Multiply by a^{k-1}. Can use wraparound; low part equals
149 r. */
150
151 mpn_mullo_n (ep, rnp, akm1, sizes[i]);
152 ASSERT (mpn_cmp (ep, rp, rn) == 0);
153
154 ASSERT (sizes[i] <= 2*rn);
155 mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
156 mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
157 rn = sizes[i];
158 }
159 TMP_FREE;
160 }
161
162 /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
163 void
164 mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
165 {
166 mp_ptr tp;
167 TMP_DECL;
168
169 ASSERT (n > 0);
170 ASSERT (ap[0] & 1);
171 ASSERT (k & 1);
172
173 if (k == 1)
174 {
175 MPN_COPY (rp, ap, n);
176 return;
177 }
178
179 TMP_MARK;
180 tp = TMP_ALLOC_LIMBS (n);
181
182 mpn_broot_invm1 (tp, ap, n, k);
183 mpn_mullo_n (rp, tp, ap, n);
184
185 TMP_FREE;
186 }
187