1 1.1 mrg /* mpn_broot -- Compute hensel sqrt 2 1.1 mrg 3 1.1.1.2 mrg Contributed to the GNU project by Niels Mller 4 1.1 mrg 5 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 1.1 mrg GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 1.1 mrg 9 1.1 mrg Copyright 2012 Free Software Foundation, Inc. 10 1.1 mrg 11 1.1 mrg This file is part of the GNU MP Library. 12 1.1 mrg 13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 14 1.1.1.2 mrg it under the terms of either: 15 1.1.1.2 mrg 16 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 17 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 18 1.1.1.2 mrg option) any later version. 19 1.1.1.2 mrg 20 1.1.1.2 mrg or 21 1.1.1.2 mrg 22 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 23 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 24 1.1.1.2 mrg later version. 25 1.1.1.2 mrg 26 1.1.1.2 mrg or both in parallel, as here. 27 1.1 mrg 28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 1.1.1.2 mrg for more details. 32 1.1 mrg 33 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 34 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 35 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 36 1.1 mrg 37 1.1 mrg #include "gmp-impl.h" 38 1.1 mrg 39 1.1 mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since 40 1.1 mrg typical use will have e small. */ 41 1.1 mrg static mp_limb_t 42 1.1 mrg powlimb (mp_limb_t a, mp_limb_t e) 43 1.1 mrg { 44 1.1 mrg mp_limb_t r = 1; 45 1.1 mrg mp_limb_t s = a; 46 1.1 mrg 47 1.1 mrg for (r = 1, s = a; e > 0; e >>= 1, s *= s) 48 1.1 mrg if (e & 1) 49 1.1 mrg r *= s; 50 1.1 mrg 51 1.1 mrg return r; 52 1.1 mrg } 53 1.1 mrg 54 1.1 mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd. 55 1.1 mrg 56 1.1 mrg Iterates 57 1.1 mrg 58 1.1 mrg r' <-- r - r * (a^{k-1} r^k - 1) / n 59 1.1 mrg 60 1.1 mrg If 61 1.1 mrg 62 1.1 mrg a^{k-1} r^k = 1 (mod 2^m), 63 1.1 mrg 64 1.1 mrg then 65 1.1 mrg 66 1.1 mrg a^{k-1} r'^k = 1 (mod 2^{2m}), 67 1.1 mrg 68 1.1 mrg Compute the update term as 69 1.1 mrg 70 1.1 mrg r' = r - (a^{k-1} r^{k+1} - r) / k 71 1.1 mrg 72 1.1.1.2 mrg where we still have cancellation of low limbs. 73 1.1 mrg 74 1.1 mrg */ 75 1.1 mrg void 76 1.1 mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) 77 1.1 mrg { 78 1.1 mrg mp_size_t sizes[GMP_LIMB_BITS * 2]; 79 1.1.1.2 mrg mp_ptr akm1, tp, rnp, ep; 80 1.1 mrg mp_limb_t a0, r0, km1, kp1h, kinv; 81 1.1 mrg mp_size_t rn; 82 1.1 mrg unsigned i; 83 1.1 mrg 84 1.1 mrg TMP_DECL; 85 1.1 mrg 86 1.1 mrg ASSERT (n > 0); 87 1.1 mrg ASSERT (ap[0] & 1); 88 1.1 mrg ASSERT (k & 1); 89 1.1 mrg ASSERT (k >= 3); 90 1.1 mrg 91 1.1 mrg TMP_MARK; 92 1.1 mrg 93 1.1 mrg akm1 = TMP_ALLOC_LIMBS (4*n); 94 1.1 mrg tp = akm1 + n; 95 1.1 mrg 96 1.1 mrg km1 = k-1; 97 1.1 mrg /* FIXME: Could arrange the iteration so we don't need to compute 98 1.1 mrg this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note 99 1.1 mrg that we can use wraparound also for a*r, since the low half is 100 1.1 mrg unchanged from the previous iteration. Or possibly mulmid. Also, 101 1.1 mrg a r = a^{1/k}, so we get that value too, for free? */ 102 1.1 mrg mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */ 103 1.1 mrg 104 1.1 mrg a0 = ap[0]; 105 1.1 mrg binvert_limb (kinv, k); 106 1.1 mrg 107 1.1 mrg /* 4 bits: a^{1/k - 1} (mod 16): 108 1.1 mrg 109 1.1 mrg a % 8 110 1.1 mrg 1 3 5 7 111 1.1 mrg k%4 +------- 112 1.1 mrg 1 |1 1 1 1 113 1.1 mrg 3 |1 9 9 1 114 1.1 mrg */ 115 1.1 mrg r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8); 116 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */ 117 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */ 118 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */ 119 1.1 mrg #if GMP_NUMB_BITS > 32 120 1.1 mrg { 121 1.1 mrg unsigned prec = 32; 122 1.1 mrg do 123 1.1 mrg { 124 1.1 mrg r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); 125 1.1 mrg prec *= 2; 126 1.1 mrg } 127 1.1 mrg while (prec < GMP_NUMB_BITS); 128 1.1 mrg } 129 1.1 mrg #endif 130 1.1 mrg 131 1.1 mrg rp[0] = r0; 132 1.1 mrg if (n == 1) 133 1.1 mrg { 134 1.1 mrg TMP_FREE; 135 1.1 mrg return; 136 1.1 mrg } 137 1.1 mrg 138 1.1 mrg /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ 139 1.1 mrg kp1h = k/2 + 1; 140 1.1 mrg 141 1.1 mrg /* FIXME: Special case for two limb iteration. */ 142 1.1 mrg rnp = TMP_ALLOC_LIMBS (2*n + 1); 143 1.1 mrg ep = rnp + n; 144 1.1 mrg 145 1.1 mrg /* FIXME: Possible to this on the fly with some bit fiddling. */ 146 1.1 mrg for (i = 0; n > 1; n = (n + 1)/2) 147 1.1 mrg sizes[i++] = n; 148 1.1 mrg 149 1.1 mrg rn = 1; 150 1.1 mrg 151 1.1 mrg while (i-- > 0) 152 1.1 mrg { 153 1.1 mrg /* Compute x^{k+1}. */ 154 1.1 mrg mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the 155 1.1.1.2 mrg final iteration. */ 156 1.1 mrg mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp); 157 1.1 mrg 158 1.1.1.2 mrg /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */ 159 1.1 mrg 160 1.1 mrg mpn_mullo_n (ep, rnp, akm1, sizes[i]); 161 1.1 mrg ASSERT (mpn_cmp (ep, rp, rn) == 0); 162 1.1 mrg 163 1.1 mrg ASSERT (sizes[i] <= 2*rn); 164 1.1 mrg mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0); 165 1.1 mrg mpn_neg (rp + rn, rp + rn, sizes[i] - rn); 166 1.1 mrg rn = sizes[i]; 167 1.1 mrg } 168 1.1 mrg TMP_FREE; 169 1.1 mrg } 170 1.1 mrg 171 1.1 mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */ 172 1.1 mrg void 173 1.1 mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) 174 1.1 mrg { 175 1.1 mrg mp_ptr tp; 176 1.1 mrg TMP_DECL; 177 1.1 mrg 178 1.1 mrg ASSERT (n > 0); 179 1.1 mrg ASSERT (ap[0] & 1); 180 1.1 mrg ASSERT (k & 1); 181 1.1 mrg 182 1.1 mrg if (k == 1) 183 1.1 mrg { 184 1.1 mrg MPN_COPY (rp, ap, n); 185 1.1 mrg return; 186 1.1 mrg } 187 1.1 mrg 188 1.1 mrg TMP_MARK; 189 1.1 mrg tp = TMP_ALLOC_LIMBS (n); 190 1.1 mrg 191 1.1 mrg mpn_broot_invm1 (tp, ap, n, k); 192 1.1 mrg mpn_mullo_n (rp, tp, ap, n); 193 1.1 mrg 194 1.1 mrg TMP_FREE; 195 1.1 mrg } 196