Home | History | Annotate | Line # | Download | only in generic
      1      1.1  mrg /* mpn_broot -- Compute hensel sqrt
      2      1.1  mrg 
      3  1.1.1.2  mrg    Contributed to the GNU project by Niels Mller
      4      1.1  mrg 
      5      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      6      1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7      1.1  mrg    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      8      1.1  mrg 
      9      1.1  mrg Copyright 2012 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1.1.2  mrg it under the terms of either:
     15  1.1.1.2  mrg 
     16  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     17  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     18  1.1.1.2  mrg     option) any later version.
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg or
     21  1.1.1.2  mrg 
     22  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     23  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     24  1.1.1.2  mrg     later version.
     25  1.1.1.2  mrg 
     26  1.1.1.2  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31  1.1.1.2  mrg for more details.
     32      1.1  mrg 
     33  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     34  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg #include "gmp-impl.h"
     38      1.1  mrg 
     39      1.1  mrg /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
     40      1.1  mrg    typical use will have e small. */
     41      1.1  mrg static mp_limb_t
     42      1.1  mrg powlimb (mp_limb_t a, mp_limb_t e)
     43      1.1  mrg {
     44      1.1  mrg   mp_limb_t r = 1;
     45      1.1  mrg   mp_limb_t s = a;
     46      1.1  mrg 
     47      1.1  mrg   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
     48      1.1  mrg     if (e & 1)
     49      1.1  mrg       r *= s;
     50      1.1  mrg 
     51      1.1  mrg   return r;
     52      1.1  mrg }
     53      1.1  mrg 
     54      1.1  mrg /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
     55      1.1  mrg 
     56      1.1  mrg    Iterates
     57      1.1  mrg 
     58      1.1  mrg      r' <-- r - r * (a^{k-1} r^k - 1) / n
     59      1.1  mrg 
     60      1.1  mrg    If
     61      1.1  mrg 
     62      1.1  mrg      a^{k-1} r^k = 1 (mod 2^m),
     63      1.1  mrg 
     64      1.1  mrg    then
     65      1.1  mrg 
     66      1.1  mrg      a^{k-1} r'^k = 1 (mod 2^{2m}),
     67      1.1  mrg 
     68      1.1  mrg    Compute the update term as
     69      1.1  mrg 
     70      1.1  mrg      r' = r - (a^{k-1} r^{k+1} - r) / k
     71      1.1  mrg 
     72  1.1.1.2  mrg    where we still have cancellation of low limbs.
     73      1.1  mrg 
     74      1.1  mrg  */
     75      1.1  mrg void
     76      1.1  mrg mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
     77      1.1  mrg {
     78      1.1  mrg   mp_size_t sizes[GMP_LIMB_BITS * 2];
     79  1.1.1.2  mrg   mp_ptr akm1, tp, rnp, ep;
     80      1.1  mrg   mp_limb_t a0, r0, km1, kp1h, kinv;
     81      1.1  mrg   mp_size_t rn;
     82      1.1  mrg   unsigned i;
     83      1.1  mrg 
     84      1.1  mrg   TMP_DECL;
     85      1.1  mrg 
     86      1.1  mrg   ASSERT (n > 0);
     87      1.1  mrg   ASSERT (ap[0] & 1);
     88      1.1  mrg   ASSERT (k & 1);
     89      1.1  mrg   ASSERT (k >= 3);
     90      1.1  mrg 
     91      1.1  mrg   TMP_MARK;
     92      1.1  mrg 
     93      1.1  mrg   akm1 = TMP_ALLOC_LIMBS (4*n);
     94      1.1  mrg   tp = akm1 + n;
     95      1.1  mrg 
     96      1.1  mrg   km1 = k-1;
     97      1.1  mrg   /* FIXME: Could arrange the iteration so we don't need to compute
     98      1.1  mrg      this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     99      1.1  mrg      that we can use wraparound also for a*r, since the low half is
    100      1.1  mrg      unchanged from the previous iteration. Or possibly mulmid. Also,
    101      1.1  mrg      a r = a^{1/k}, so we get that value too, for free? */
    102      1.1  mrg   mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
    103      1.1  mrg 
    104      1.1  mrg   a0 = ap[0];
    105      1.1  mrg   binvert_limb (kinv, k);
    106      1.1  mrg 
    107      1.1  mrg   /* 4 bits: a^{1/k - 1} (mod 16):
    108      1.1  mrg 
    109      1.1  mrg 	a % 8
    110      1.1  mrg 	1 3 5 7
    111      1.1  mrg    k%4 +-------
    112      1.1  mrg      1 |1 1 1 1
    113      1.1  mrg      3 |1 9 9 1
    114      1.1  mrg   */
    115      1.1  mrg   r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
    116      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
    117      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
    118      1.1  mrg   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
    119      1.1  mrg #if GMP_NUMB_BITS > 32
    120      1.1  mrg   {
    121      1.1  mrg     unsigned prec = 32;
    122      1.1  mrg     do
    123      1.1  mrg       {
    124      1.1  mrg 	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
    125      1.1  mrg 	prec *= 2;
    126      1.1  mrg       }
    127      1.1  mrg     while (prec < GMP_NUMB_BITS);
    128      1.1  mrg   }
    129      1.1  mrg #endif
    130      1.1  mrg 
    131      1.1  mrg   rp[0] = r0;
    132      1.1  mrg   if (n == 1)
    133      1.1  mrg     {
    134      1.1  mrg       TMP_FREE;
    135      1.1  mrg       return;
    136      1.1  mrg     }
    137      1.1  mrg 
    138      1.1  mrg   /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
    139      1.1  mrg   kp1h = k/2 + 1;
    140      1.1  mrg 
    141      1.1  mrg   /* FIXME: Special case for two limb iteration. */
    142      1.1  mrg   rnp = TMP_ALLOC_LIMBS (2*n + 1);
    143      1.1  mrg   ep = rnp + n;
    144      1.1  mrg 
    145      1.1  mrg   /* FIXME: Possible to this on the fly with some bit fiddling. */
    146      1.1  mrg   for (i = 0; n > 1; n = (n + 1)/2)
    147      1.1  mrg     sizes[i++] = n;
    148      1.1  mrg 
    149      1.1  mrg   rn = 1;
    150      1.1  mrg 
    151      1.1  mrg   while (i-- > 0)
    152      1.1  mrg     {
    153      1.1  mrg       /* Compute x^{k+1}. */
    154      1.1  mrg       mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
    155  1.1.1.2  mrg 			       final iteration. */
    156      1.1  mrg       mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
    157      1.1  mrg 
    158  1.1.1.2  mrg       /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
    159      1.1  mrg 
    160      1.1  mrg       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
    161      1.1  mrg       ASSERT (mpn_cmp (ep, rp, rn) == 0);
    162      1.1  mrg 
    163      1.1  mrg       ASSERT (sizes[i] <= 2*rn);
    164      1.1  mrg       mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
    165      1.1  mrg       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
    166      1.1  mrg       rn = sizes[i];
    167      1.1  mrg     }
    168      1.1  mrg   TMP_FREE;
    169      1.1  mrg }
    170      1.1  mrg 
    171      1.1  mrg /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
    172      1.1  mrg void
    173      1.1  mrg mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
    174      1.1  mrg {
    175      1.1  mrg   mp_ptr tp;
    176      1.1  mrg   TMP_DECL;
    177      1.1  mrg 
    178      1.1  mrg   ASSERT (n > 0);
    179      1.1  mrg   ASSERT (ap[0] & 1);
    180      1.1  mrg   ASSERT (k & 1);
    181      1.1  mrg 
    182      1.1  mrg   if (k == 1)
    183      1.1  mrg     {
    184      1.1  mrg       MPN_COPY (rp, ap, n);
    185      1.1  mrg       return;
    186      1.1  mrg     }
    187      1.1  mrg 
    188      1.1  mrg   TMP_MARK;
    189      1.1  mrg   tp = TMP_ALLOC_LIMBS (n);
    190      1.1  mrg 
    191      1.1  mrg   mpn_broot_invm1 (tp, ap, n, k);
    192      1.1  mrg   mpn_mullo_n (rp, tp, ap, n);
    193      1.1  mrg 
    194      1.1  mrg   TMP_FREE;
    195      1.1  mrg }
    196