mpn/generic/bsqrtinv.c

    1.1  mrg /* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
    1.1  mrg
    1.1  mrg    Contributed to the GNU project by Martin Boij (as part of perfpow.c).
    1.1  mrg
1.1.1.2  mrg Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg This file is part of the GNU MP Library.
    1.1  mrg
    1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2  mrg it under the terms of either:
1.1.1.2  mrg
1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
1.1.1.2  mrg     option) any later version.
1.1.1.2  mrg
1.1.1.2  mrg or
1.1.1.2  mrg
1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
1.1.1.2  mrg     later version.
1.1.1.2  mrg
1.1.1.2  mrg or both in parallel, as here.
    1.1  mrg
    1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1.1.2  mrg for more details.
    1.1  mrg
1.1.1.2  mrg You should have received copies of the GNU General Public License and the
1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
    1.1  mrg
    1.1  mrg #include "gmp.h"
    1.1  mrg #include "gmp-impl.h"
    1.1  mrg
    1.1  mrg /* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
    1.1  mrg    Return non-zero if such an integer r exists.
    1.1  mrg
    1.1  mrg    Iterates
    1.1  mrg      r' <-- (3r - r^3 y) / 2
    1.1  mrg    using Hensel lifting.  Since we divide by two, the Hensel lifting is
    1.1  mrg    somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
    1.1  mrg
    1.1  mrg    FIXME:
    1.1  mrg      (1) Simplify to do precision book-keeping in limbs rather than bits.
    1.1  mrg
    1.1  mrg      (2) Rewrite iteration as
    1.1  mrg 	   r' <-- r - r (r^2 y - 1) / 2
    1.1  mrg 	 and take advantage of zero low part of r^2 y - 1.
    1.1  mrg
    1.1  mrg      (3) Use wrap-around trick.
    1.1  mrg
    1.1  mrg      (4) Use a small table to get starting value.
    1.1  mrg */
    1.1  mrg int
    1.1  mrg mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
    1.1  mrg {
1.1.1.2  mrg   mp_ptr tp2;
    1.1  mrg   mp_size_t bn, order[GMP_LIMB_BITS + 1];
    1.1  mrg   int i, d;
    1.1  mrg
    1.1  mrg   ASSERT (bnb > 0);
    1.1  mrg
    1.1  mrg   bn = 1 + bnb / GMP_LIMB_BITS;
    1.1  mrg
    1.1  mrg   tp2 = tp + bn;
    1.1  mrg
    1.1  mrg   rp[0] = 1;
    1.1  mrg   if (bnb == 1)
    1.1  mrg     {
    1.1  mrg       if ((yp[0] & 3) != 1)
    1.1  mrg 	return 0;
    1.1  mrg     }
    1.1  mrg   else
    1.1  mrg     {
    1.1  mrg       if ((yp[0] & 7) != 1)
    1.1  mrg 	return 0;
    1.1  mrg
    1.1  mrg       d = 0;
    1.1  mrg       for (; bnb != 2; bnb = (bnb + 2) >> 1)
    1.1  mrg 	order[d++] = bnb;
    1.1  mrg
    1.1  mrg       for (i = d - 1; i >= 0; i--)
    1.1  mrg 	{
    1.1  mrg 	  bnb = order[i];
    1.1  mrg 	  bn = 1 + bnb / GMP_LIMB_BITS;
    1.1  mrg
1.1.1.2  mrg 	  mpn_sqrlo (tp, rp, bn);
1.1.1.2  mrg 	  mpn_mullo_n (tp2, rp, tp, bn); /* tp2 <- rp ^ 3 */
1.1.1.2  mrg
1.1.1.2  mrg 	  mpn_mul_1 (tp, rp, bn, 3);
    1.1  mrg
    1.1  mrg 	  mpn_mullo_n (rp, yp, tp2, bn);
    1.1  mrg
    1.1  mrg #if HAVE_NATIVE_mpn_rsh1sub_n
    1.1  mrg 	  mpn_rsh1sub_n (rp, tp, rp, bn);
    1.1  mrg #else
    1.1  mrg 	  mpn_sub_n (tp2, tp, rp, bn);
    1.1  mrg 	  mpn_rshift (rp, tp2, bn, 1);
    1.1  mrg #endif
    1.1  mrg 	}
    1.1  mrg     }
    1.1  mrg   return 1;
    1.1  mrg }