mpn/generic/toom_eval_pm2.c

    1.1  mrg /* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
    1.1  mrg
1.1.1.3  mrg    Contributed to the GNU project by Niels Mller and Marco Bodrato
    1.1  mrg
    1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    1.1  mrg    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    1.1  mrg
    1.1  mrg Copyright 2009 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg This file is part of the GNU MP Library.
    1.1  mrg
    1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.3  mrg it under the terms of either:
1.1.1.3  mrg
1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
1.1.1.3  mrg     option) any later version.
1.1.1.3  mrg
1.1.1.3  mrg or
1.1.1.3  mrg
1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
1.1.1.3  mrg     later version.
1.1.1.3  mrg
1.1.1.3  mrg or both in parallel, as here.
    1.1  mrg
    1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1.1.3  mrg for more details.
    1.1  mrg
1.1.1.3  mrg You should have received copies of the GNU General Public License and the
1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
    1.1  mrg
    1.1  mrg #include "gmp-impl.h"
    1.1  mrg
    1.1  mrg /* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
    1.1  mrg    can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
    1.1  mrg #if HAVE_NATIVE_mpn_addlsh2_n
    1.1  mrg #define DO_addlsh2(d, a, b, n, cy)	\
    1.1  mrg do {					\
    1.1  mrg   (cy) <<= 2;				\
    1.1  mrg   (cy) += mpn_addlsh2_n(d, a, b, n);	\
    1.1  mrg } while (0)
    1.1  mrg #else
    1.1  mrg #if HAVE_NATIVE_mpn_addlsh_n
    1.1  mrg #define DO_addlsh2(d, a, b, n, cy)	\
    1.1  mrg do {					\
    1.1  mrg   (cy) <<= 2;				\
    1.1  mrg   (cy) += mpn_addlsh_n(d, a, b, n, 2);	\
    1.1  mrg } while (0)
    1.1  mrg #else
    1.1  mrg /* The following is not a general substitute for addlsh2.
1.1.1.2  mrg    It is correct if d == b, but it is not if d == a.  */
    1.1  mrg #define DO_addlsh2(d, a, b, n, cy)	\
    1.1  mrg do {					\
    1.1  mrg   (cy) <<= 2;				\
    1.1  mrg   (cy) += mpn_lshift(d, b, n, 2);	\
    1.1  mrg   (cy) += mpn_add_n(d, d, a, n);	\
    1.1  mrg } while (0)
    1.1  mrg #endif
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg /* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
    1.1  mrg    points +2 and -2. */
    1.1  mrg int
    1.1  mrg mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
    1.1  mrg 		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
    1.1  mrg {
    1.1  mrg   int i;
    1.1  mrg   int neg;
    1.1  mrg   mp_limb_t cy;
    1.1  mrg
    1.1  mrg   ASSERT (k >= 3);
    1.1  mrg   ASSERT (k < GMP_NUMB_BITS);
    1.1  mrg
    1.1  mrg   ASSERT (hn > 0);
    1.1  mrg   ASSERT (hn <= n);
    1.1  mrg
    1.1  mrg   /* The degree k is also the number of full-size coefficients, so
    1.1  mrg    * that last coefficient, of size hn, starts at xp + k*n. */
    1.1  mrg
    1.1  mrg   cy = 0;
    1.1  mrg   DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
    1.1  mrg   if (hn != n)
    1.1  mrg     cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
    1.1  mrg   for (i = k - 4; i >= 0; i -= 2)
    1.1  mrg     DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
    1.1  mrg   xp2[n] = cy;
    1.1  mrg
    1.1  mrg   k--;
    1.1  mrg
    1.1  mrg   cy = 0;
    1.1  mrg   DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
    1.1  mrg   for (i = k - 4; i >= 0; i -= 2)
    1.1  mrg     DO_addlsh2 (tp, xp + i * n, tp, n, cy);
    1.1  mrg   tp[n] = cy;
    1.1  mrg
    1.1  mrg   if (k & 1)
    1.1  mrg     ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
    1.1  mrg   else
    1.1  mrg     ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
    1.1  mrg
    1.1  mrg   neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
    1.1  mrg
    1.1  mrg #if HAVE_NATIVE_mpn_add_n_sub_n
    1.1  mrg   if (neg)
    1.1  mrg     mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
    1.1  mrg   else
    1.1  mrg     mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
    1.1  mrg #else /* !HAVE_NATIVE_mpn_add_n_sub_n */
    1.1  mrg   if (neg)
    1.1  mrg     mpn_sub_n (xm2, tp, xp2, n + 1);
    1.1  mrg   else
    1.1  mrg     mpn_sub_n (xm2, xp2, tp, n + 1);
    1.1  mrg
    1.1  mrg   mpn_add_n (xp2, xp2, tp, n + 1);
    1.1  mrg #endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
    1.1  mrg
    1.1  mrg   ASSERT (xp2[n] < (1<<(k+2))-1);
    1.1  mrg   ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
    1.1  mrg
    1.1  mrg   neg ^= ((k & 1) - 1);
    1.1  mrg
    1.1  mrg   return neg;
    1.1  mrg }
    1.1  mrg
    1.1  mrg #undef DO_addlsh2