Home | History | Annotate | Line # | Download | only in generic
broot.c revision 1.1.1.2
      1      1.1  mrg /* mpn_broot -- Compute hensel sqrt
      2      1.1  mrg 
      3  1.1.1.2  mrg    Contributed to the GNU project by Niels Mller
      4      1.1  mrg 
      5      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      6      1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7      1.1  mrg    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      8      1.1  mrg 
      9      1.1  mrg Copyright 2012 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1.1.2  mrg it under the terms of either:
     15  1.1.1.2  mrg 
     16  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     17  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     18  1.1.1.2  mrg     option) any later version.
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg or
     21  1.1.1.2  mrg 
     22  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     23  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     24  1.1.1.2  mrg     later version.
     25  1.1.1.2  mrg 
     26  1.1.1.2  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31  1.1.1.2  mrg for more details.
     32      1.1  mrg 
     33  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     34  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg #include "gmp.h"
     38      1.1  mrg #include "gmp-impl.h"
     39      1.1  mrg 
     40      1.1  mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
     41      1.1  mrg    typical use will have e small. */
     42      1.1  mrg static mp_limb_t
     43      1.1  mrg powlimb (mp_limb_t a, mp_limb_t e)
     44      1.1  mrg {
     45      1.1  mrg   mp_limb_t r = 1;
     46      1.1  mrg   mp_limb_t s = a;
     47      1.1  mrg 
     48      1.1  mrg   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
     49      1.1  mrg     if (e & 1)
     50      1.1  mrg       r *= s;
     51      1.1  mrg 
     52      1.1  mrg   return r;
     53      1.1  mrg }
     54      1.1  mrg 
     55      1.1  mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
     56      1.1  mrg 
     57      1.1  mrg    Iterates
     58      1.1  mrg 
     59      1.1  mrg      r' <-- r - r * (a^{k-1} r^k - 1) / n
     60      1.1  mrg 
     61      1.1  mrg    If
     62      1.1  mrg 
     63      1.1  mrg      a^{k-1} r^k = 1 (mod 2^m),
     64      1.1  mrg 
     65      1.1  mrg    then
     66      1.1  mrg 
     67      1.1  mrg      a^{k-1} r'^k = 1 (mod 2^{2m}),
     68      1.1  mrg 
     69      1.1  mrg    Compute the update term as
     70      1.1  mrg 
     71      1.1  mrg      r' = r - (a^{k-1} r^{k+1} - r) / k
     72      1.1  mrg 
     73  1.1.1.2  mrg    where we still have cancellation of low limbs.
     74      1.1  mrg 
     75      1.1  mrg  */
     76      1.1  mrg void
     77      1.1  mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
     78      1.1  mrg {
     79      1.1  mrg   mp_size_t sizes[GMP_LIMB_BITS * 2];
     80  1.1.1.2  mrg   mp_ptr akm1, tp, rnp, ep;
     81      1.1  mrg   mp_limb_t a0, r0, km1, kp1h, kinv;
     82      1.1  mrg   mp_size_t rn;
     83      1.1  mrg   unsigned i;
     84      1.1  mrg 
     85      1.1  mrg   TMP_DECL;
     86      1.1  mrg 
     87      1.1  mrg   ASSERT (n > 0);
     88      1.1  mrg   ASSERT (ap[0] & 1);
     89      1.1  mrg   ASSERT (k & 1);
     90      1.1  mrg   ASSERT (k >= 3);
     91      1.1  mrg 
     92      1.1  mrg   TMP_MARK;
     93      1.1  mrg 
     94      1.1  mrg   akm1 = TMP_ALLOC_LIMBS (4*n);
     95      1.1  mrg   tp = akm1 + n;
     96      1.1  mrg 
     97      1.1  mrg   km1 = k-1;
     98      1.1  mrg   /* FIXME: Could arrange the iteration so we don't need to compute
     99      1.1  mrg      this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
    100      1.1  mrg      that we can use wraparound also for a*r, since the low half is
    101      1.1  mrg      unchanged from the previous iteration. Or possibly mulmid. Also,
    102      1.1  mrg      a r = a^{1/k}, so we get that value too, for free? */
    103      1.1  mrg   mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
    104      1.1  mrg 
    105      1.1  mrg   a0 = ap[0];
    106      1.1  mrg   binvert_limb (kinv, k);
    107      1.1  mrg 
    108      1.1  mrg   /* 4 bits: a^{1/k - 1} (mod 16):
    109      1.1  mrg 
    110      1.1  mrg 	a % 8
    111      1.1  mrg 	1 3 5 7
    112      1.1  mrg    k%4 +-------
    113      1.1  mrg      1 |1 1 1 1
    114      1.1  mrg      3 |1 9 9 1
    115      1.1  mrg   */
    116      1.1  mrg   r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
    117      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
    118      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
    119      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
    120      1.1  mrg #if GMP_NUMB_BITS > 32
    121      1.1  mrg   {
    122      1.1  mrg     unsigned prec = 32;
    123      1.1  mrg     do
    124      1.1  mrg       {
    125      1.1  mrg 	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
    126      1.1  mrg 	prec *= 2;
    127      1.1  mrg       }
    128      1.1  mrg     while (prec < GMP_NUMB_BITS);
    129      1.1  mrg   }
    130      1.1  mrg #endif
    131      1.1  mrg 
    132      1.1  mrg   rp[0] = r0;
    133      1.1  mrg   if (n == 1)
    134      1.1  mrg     {
    135      1.1  mrg       TMP_FREE;
    136      1.1  mrg       return;
    137      1.1  mrg     }
    138      1.1  mrg 
    139      1.1  mrg   /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
    140      1.1  mrg   kp1h = k/2 + 1;
    141      1.1  mrg 
    142      1.1  mrg   /* FIXME: Special case for two limb iteration. */
    143      1.1  mrg   rnp = TMP_ALLOC_LIMBS (2*n + 1);
    144      1.1  mrg   ep = rnp + n;
    145      1.1  mrg 
    146      1.1  mrg   /* FIXME: Possible to this on the fly with some bit fiddling. */
    147      1.1  mrg   for (i = 0; n > 1; n = (n + 1)/2)
    148      1.1  mrg     sizes[i++] = n;
    149      1.1  mrg 
    150      1.1  mrg   rn = 1;
    151      1.1  mrg 
    152      1.1  mrg   while (i-- > 0)
    153      1.1  mrg     {
    154      1.1  mrg       /* Compute x^{k+1}. */
    155      1.1  mrg       mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
    156  1.1.1.2  mrg 			       final iteration. */
    157      1.1  mrg       mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
    158      1.1  mrg 
    159  1.1.1.2  mrg       /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
    160      1.1  mrg 
    161      1.1  mrg       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
    162      1.1  mrg       ASSERT (mpn_cmp (ep, rp, rn) == 0);
    163      1.1  mrg 
    164      1.1  mrg       ASSERT (sizes[i] <= 2*rn);
    165      1.1  mrg       mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
    166      1.1  mrg       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
    167      1.1  mrg       rn = sizes[i];
    168      1.1  mrg     }
    169      1.1  mrg   TMP_FREE;
    170      1.1  mrg }
    171      1.1  mrg 
    172      1.1  mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
    173      1.1  mrg void
    174      1.1  mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
    175      1.1  mrg {
    176      1.1  mrg   mp_ptr tp;
    177      1.1  mrg   TMP_DECL;
    178      1.1  mrg 
    179      1.1  mrg   ASSERT (n > 0);
    180      1.1  mrg   ASSERT (ap[0] & 1);
    181      1.1  mrg   ASSERT (k & 1);
    182      1.1  mrg 
    183      1.1  mrg   if (k == 1)
    184      1.1  mrg     {
    185      1.1  mrg       MPN_COPY (rp, ap, n);
    186      1.1  mrg       return;
    187      1.1  mrg     }
    188      1.1  mrg 
    189      1.1  mrg   TMP_MARK;
    190      1.1  mrg   tp = TMP_ALLOC_LIMBS (n);
    191      1.1  mrg 
    192      1.1  mrg   mpn_broot_invm1 (tp, ap, n, k);
    193      1.1  mrg   mpn_mullo_n (rp, tp, ap, n);
    194      1.1  mrg 
    195      1.1  mrg   TMP_FREE;
    196      1.1  mrg }
    197