Home | History | Annotate | Line # | Download | only in generic
broot.c revision 1.1
      1  1.1  mrg /* mpn_broot -- Compute hensel sqrt
      2  1.1  mrg 
      3  1.1  mrg    Contributed to the GNU project by Niels Mller
      4  1.1  mrg 
      5  1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      6  1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7  1.1  mrg    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      8  1.1  mrg 
      9  1.1  mrg Copyright 2012 Free Software Foundation, Inc.
     10  1.1  mrg 
     11  1.1  mrg This file is part of the GNU MP Library.
     12  1.1  mrg 
     13  1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     15  1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     16  1.1  mrg option) any later version.
     17  1.1  mrg 
     18  1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     19  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20  1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21  1.1  mrg License for more details.
     22  1.1  mrg 
     23  1.1  mrg You should have received a copy of the GNU Lesser General Public License
     24  1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25  1.1  mrg 
     26  1.1  mrg #include "gmp.h"
     27  1.1  mrg #include "gmp-impl.h"
     28  1.1  mrg 
     29  1.1  mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
     30  1.1  mrg    typical use will have e small. */
     31  1.1  mrg static mp_limb_t
     32  1.1  mrg powlimb (mp_limb_t a, mp_limb_t e)
     33  1.1  mrg {
     34  1.1  mrg   mp_limb_t r = 1;
     35  1.1  mrg   mp_limb_t s = a;
     36  1.1  mrg 
     37  1.1  mrg   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
     38  1.1  mrg     if (e & 1)
     39  1.1  mrg       r *= s;
     40  1.1  mrg 
     41  1.1  mrg   return r;
     42  1.1  mrg }
     43  1.1  mrg 
     44  1.1  mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
     45  1.1  mrg 
     46  1.1  mrg    Iterates
     47  1.1  mrg 
     48  1.1  mrg      r' <-- r - r * (a^{k-1} r^k - 1) / n
     49  1.1  mrg 
     50  1.1  mrg    If
     51  1.1  mrg 
     52  1.1  mrg      a^{k-1} r^k = 1 (mod 2^m),
     53  1.1  mrg 
     54  1.1  mrg    then
     55  1.1  mrg 
     56  1.1  mrg      a^{k-1} r'^k = 1 (mod 2^{2m}),
     57  1.1  mrg 
     58  1.1  mrg    Compute the update term as
     59  1.1  mrg 
     60  1.1  mrg      r' = r - (a^{k-1} r^{k+1} - r) / k
     61  1.1  mrg 
     62  1.1  mrg    where we still have cancelation of low limbs.
     63  1.1  mrg 
     64  1.1  mrg  */
     65  1.1  mrg void
     66  1.1  mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
     67  1.1  mrg {
     68  1.1  mrg   mp_size_t sizes[GMP_LIMB_BITS * 2];
     69  1.1  mrg   mp_ptr akm1, tp, rnp, ep, scratch;
     70  1.1  mrg   mp_limb_t a0, r0, km1, kp1h, kinv;
     71  1.1  mrg   mp_size_t rn;
     72  1.1  mrg   unsigned i;
     73  1.1  mrg 
     74  1.1  mrg   TMP_DECL;
     75  1.1  mrg 
     76  1.1  mrg   ASSERT (n > 0);
     77  1.1  mrg   ASSERT (ap[0] & 1);
     78  1.1  mrg   ASSERT (k & 1);
     79  1.1  mrg   ASSERT (k >= 3);
     80  1.1  mrg 
     81  1.1  mrg   TMP_MARK;
     82  1.1  mrg 
     83  1.1  mrg   akm1 = TMP_ALLOC_LIMBS (4*n);
     84  1.1  mrg   tp = akm1 + n;
     85  1.1  mrg 
     86  1.1  mrg   km1 = k-1;
     87  1.1  mrg   /* FIXME: Could arrange the iteration so we don't need to compute
     88  1.1  mrg      this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     89  1.1  mrg      that we can use wraparound also for a*r, since the low half is
     90  1.1  mrg      unchanged from the previous iteration. Or possibly mulmid. Also,
     91  1.1  mrg      a r = a^{1/k}, so we get that value too, for free? */
     92  1.1  mrg   mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
     93  1.1  mrg 
     94  1.1  mrg   a0 = ap[0];
     95  1.1  mrg   binvert_limb (kinv, k);
     96  1.1  mrg 
     97  1.1  mrg   /* 4 bits: a^{1/k - 1} (mod 16):
     98  1.1  mrg 
     99  1.1  mrg 	a % 8
    100  1.1  mrg 	1 3 5 7
    101  1.1  mrg    k%4 +-------
    102  1.1  mrg      1 |1 1 1 1
    103  1.1  mrg      3 |1 9 9 1
    104  1.1  mrg   */
    105  1.1  mrg   r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
    106  1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
    107  1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
    108  1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
    109  1.1  mrg #if GMP_NUMB_BITS > 32
    110  1.1  mrg   {
    111  1.1  mrg     unsigned prec = 32;
    112  1.1  mrg     do
    113  1.1  mrg       {
    114  1.1  mrg 	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
    115  1.1  mrg 	prec *= 2;
    116  1.1  mrg       }
    117  1.1  mrg     while (prec < GMP_NUMB_BITS);
    118  1.1  mrg   }
    119  1.1  mrg #endif
    120  1.1  mrg 
    121  1.1  mrg   rp[0] = r0;
    122  1.1  mrg   if (n == 1)
    123  1.1  mrg     {
    124  1.1  mrg       TMP_FREE;
    125  1.1  mrg       return;
    126  1.1  mrg     }
    127  1.1  mrg 
    128  1.1  mrg   /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
    129  1.1  mrg   kp1h = k/2 + 1;
    130  1.1  mrg 
    131  1.1  mrg   /* FIXME: Special case for two limb iteration. */
    132  1.1  mrg   rnp = TMP_ALLOC_LIMBS (2*n + 1);
    133  1.1  mrg   ep = rnp + n;
    134  1.1  mrg 
    135  1.1  mrg   /* FIXME: Possible to this on the fly with some bit fiddling. */
    136  1.1  mrg   for (i = 0; n > 1; n = (n + 1)/2)
    137  1.1  mrg     sizes[i++] = n;
    138  1.1  mrg 
    139  1.1  mrg   rn = 1;
    140  1.1  mrg 
    141  1.1  mrg   while (i-- > 0)
    142  1.1  mrg     {
    143  1.1  mrg       /* Compute x^{k+1}. */
    144  1.1  mrg       mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
    145  1.1  mrg 			       final iteration.*/
    146  1.1  mrg       mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
    147  1.1  mrg 
    148  1.1  mrg       /* Multiply by a^{k-1}. Can use wraparound; low part equals
    149  1.1  mrg 	 r. */
    150  1.1  mrg 
    151  1.1  mrg       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
    152  1.1  mrg       ASSERT (mpn_cmp (ep, rp, rn) == 0);
    153  1.1  mrg 
    154  1.1  mrg       ASSERT (sizes[i] <= 2*rn);
    155  1.1  mrg       mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
    156  1.1  mrg       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
    157  1.1  mrg       rn = sizes[i];
    158  1.1  mrg     }
    159  1.1  mrg   TMP_FREE;
    160  1.1  mrg }
    161  1.1  mrg 
    162  1.1  mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
    163  1.1  mrg void
    164  1.1  mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
    165  1.1  mrg {
    166  1.1  mrg   mp_ptr tp;
    167  1.1  mrg   TMP_DECL;
    168  1.1  mrg 
    169  1.1  mrg   ASSERT (n > 0);
    170  1.1  mrg   ASSERT (ap[0] & 1);
    171  1.1  mrg   ASSERT (k & 1);
    172  1.1  mrg 
    173  1.1  mrg   if (k == 1)
    174  1.1  mrg     {
    175  1.1  mrg       MPN_COPY (rp, ap, n);
    176  1.1  mrg       return;
    177  1.1  mrg     }
    178  1.1  mrg 
    179  1.1  mrg   TMP_MARK;
    180  1.1  mrg   tp = TMP_ALLOC_LIMBS (n);
    181  1.1  mrg 
    182  1.1  mrg   mpn_broot_invm1 (tp, ap, n, k);
    183  1.1  mrg   mpn_mullo_n (rp, tp, ap, n);
    184  1.1  mrg 
    185  1.1  mrg   TMP_FREE;
    186  1.1  mrg }
    187