Home | History | Annotate | Line # | Download | only in generic
      1 /* mpn_broot -- Compute hensel sqrt
      2 
      3    Contributed to the GNU project by Niels Mller
      4 
      5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      8 
      9 Copyright 2012 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 #include "gmp-impl.h"
     38 
     39 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
     40    typical use will have e small. */
     41 static mp_limb_t
     42 powlimb (mp_limb_t a, mp_limb_t e)
     43 {
     44   mp_limb_t r = 1;
     45   mp_limb_t s = a;
     46 
     47   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
     48     if (e & 1)
     49       r *= s;
     50 
     51   return r;
     52 }
     53 
     54 /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
     55 
     56    Iterates
     57 
     58      r' <-- r - r * (a^{k-1} r^k - 1) / n
     59 
     60    If
     61 
     62      a^{k-1} r^k = 1 (mod 2^m),
     63 
     64    then
     65 
     66      a^{k-1} r'^k = 1 (mod 2^{2m}),
     67 
     68    Compute the update term as
     69 
     70      r' = r - (a^{k-1} r^{k+1} - r) / k
     71 
     72    where we still have cancellation of low limbs.
     73 
     74  */
     75 void
     76 mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
     77 {
     78   mp_size_t sizes[GMP_LIMB_BITS * 2];
     79   mp_ptr akm1, tp, rnp, ep;
     80   mp_limb_t a0, r0, km1, kp1h, kinv;
     81   mp_size_t rn;
     82   unsigned i;
     83 
     84   TMP_DECL;
     85 
     86   ASSERT (n > 0);
     87   ASSERT (ap[0] & 1);
     88   ASSERT (k & 1);
     89   ASSERT (k >= 3);
     90 
     91   TMP_MARK;
     92 
     93   akm1 = TMP_ALLOC_LIMBS (4*n);
     94   tp = akm1 + n;
     95 
     96   km1 = k-1;
     97   /* FIXME: Could arrange the iteration so we don't need to compute
     98      this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     99      that we can use wraparound also for a*r, since the low half is
    100      unchanged from the previous iteration. Or possibly mulmid. Also,
    101      a r = a^{1/k}, so we get that value too, for free? */
    102   mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
    103 
    104   a0 = ap[0];
    105   binvert_limb (kinv, k);
    106 
    107   /* 4 bits: a^{1/k - 1} (mod 16):
    108 
    109 	a % 8
    110 	1 3 5 7
    111    k%4 +-------
    112      1 |1 1 1 1
    113      3 |1 9 9 1
    114   */
    115   r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
    116   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
    117   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
    118   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
    119 #if GMP_NUMB_BITS > 32
    120   {
    121     unsigned prec = 32;
    122     do
    123       {
    124 	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
    125 	prec *= 2;
    126       }
    127     while (prec < GMP_NUMB_BITS);
    128   }
    129 #endif
    130 
    131   rp[0] = r0;
    132   if (n == 1)
    133     {
    134       TMP_FREE;
    135       return;
    136     }
    137 
    138   /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
    139   kp1h = k/2 + 1;
    140 
    141   /* FIXME: Special case for two limb iteration. */
    142   rnp = TMP_ALLOC_LIMBS (2*n + 1);
    143   ep = rnp + n;
    144 
    145   /* FIXME: Possible to this on the fly with some bit fiddling. */
    146   for (i = 0; n > 1; n = (n + 1)/2)
    147     sizes[i++] = n;
    148 
    149   rn = 1;
    150 
    151   while (i-- > 0)
    152     {
    153       /* Compute x^{k+1}. */
    154       mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
    155 			       final iteration. */
    156       mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
    157 
    158       /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
    159 
    160       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
    161       ASSERT (mpn_cmp (ep, rp, rn) == 0);
    162 
    163       ASSERT (sizes[i] <= 2*rn);
    164       mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
    165       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
    166       rn = sizes[i];
    167     }
    168   TMP_FREE;
    169 }
    170 
    171 /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
    172 void
    173 mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
    174 {
    175   mp_ptr tp;
    176   TMP_DECL;
    177 
    178   ASSERT (n > 0);
    179   ASSERT (ap[0] & 1);
    180   ASSERT (k & 1);
    181 
    182   if (k == 1)
    183     {
    184       MPN_COPY (rp, ap, n);
    185       return;
    186     }
    187 
    188   TMP_MARK;
    189   tp = TMP_ALLOC_LIMBS (n);
    190 
    191   mpn_broot_invm1 (tp, ap, n, k);
    192   mpn_mullo_n (rp, tp, ap, n);
    193 
    194   TMP_FREE;
    195 }
    196