mpn/generic/toom2_sqr.c

    1.1  mrg /* mpn_toom2_sqr -- Square {ap,an}.
    1.1  mrg
    1.1  mrg    Contributed to the GNU project by Torbjorn Granlund.
    1.1  mrg
    1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    1.1  mrg    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    1.1  mrg
1.1.1.4  mrg Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg This file is part of the GNU MP Library.
    1.1  mrg
    1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.3  mrg it under the terms of either:
1.1.1.3  mrg
1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
1.1.1.3  mrg     option) any later version.
1.1.1.3  mrg
1.1.1.3  mrg or
1.1.1.3  mrg
1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
1.1.1.3  mrg     later version.
1.1.1.3  mrg
1.1.1.3  mrg or both in parallel, as here.
    1.1  mrg
    1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1.1.3  mrg for more details.
    1.1  mrg
1.1.1.3  mrg You should have received copies of the GNU General Public License and the
1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
    1.1  mrg
    1.1  mrg
    1.1  mrg #include "gmp-impl.h"
    1.1  mrg
    1.1  mrg /* Evaluate in: -1, 0, +inf
    1.1  mrg
    1.1  mrg   <-s--><--n-->
    1.1  mrg    ____ ______
    1.1  mrg   |_a1_|___a0_|
    1.1  mrg
    1.1  mrg   v0  =  a0     ^2  #   A(0)^2
    1.1  mrg   vm1 = (a0- a1)^2  #  A(-1)^2
    1.1  mrg   vinf=      a1 ^2  # A(inf)^2
    1.1  mrg */
    1.1  mrg
1.1.1.2  mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    1.1  mrg #define MAYBE_sqr_toom2   1
    1.1  mrg #else
    1.1  mrg #define MAYBE_sqr_toom2							\
    1.1  mrg   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg #define TOOM2_SQR_REC(p, a, n, ws)					\
    1.1  mrg   do {									\
    1.1  mrg     if (! MAYBE_sqr_toom2						\
    1.1  mrg 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
    1.1  mrg       mpn_sqr_basecase (p, a, n);					\
    1.1  mrg     else								\
    1.1  mrg       mpn_toom2_sqr (p, a, n, ws);					\
    1.1  mrg   } while (0)
    1.1  mrg
    1.1  mrg void
    1.1  mrg mpn_toom2_sqr (mp_ptr pp,
    1.1  mrg 	       mp_srcptr ap, mp_size_t an,
    1.1  mrg 	       mp_ptr scratch)
    1.1  mrg {
1.1.1.2  mrg   const int __gmpn_cpuvec_initialized = 1;
    1.1  mrg   mp_size_t n, s;
    1.1  mrg   mp_limb_t cy, cy2;
    1.1  mrg   mp_ptr asm1;
    1.1  mrg
    1.1  mrg #define a0  ap
    1.1  mrg #define a1  (ap + n)
    1.1  mrg
    1.1  mrg   s = an >> 1;
    1.1  mrg   n = an - s;
    1.1  mrg
1.1.1.3  mrg   ASSERT (0 < s && s <= n && s >= n - 1);
    1.1  mrg
    1.1  mrg   asm1 = pp;
    1.1  mrg
    1.1  mrg   /* Compute asm1.  */
    1.1  mrg   if (s == n)
    1.1  mrg     {
    1.1  mrg       if (mpn_cmp (a0, a1, n) < 0)
    1.1  mrg 	{
    1.1  mrg 	  mpn_sub_n (asm1, a1, a0, n);
    1.1  mrg 	}
    1.1  mrg       else
    1.1  mrg 	{
    1.1  mrg 	  mpn_sub_n (asm1, a0, a1, n);
    1.1  mrg 	}
    1.1  mrg     }
1.1.1.3  mrg   else /* n - s == 1 */
    1.1  mrg     {
1.1.1.3  mrg       if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
    1.1  mrg 	{
    1.1  mrg 	  mpn_sub_n (asm1, a1, a0, s);
1.1.1.3  mrg 	  asm1[s] = 0;
    1.1  mrg 	}
    1.1  mrg       else
    1.1  mrg 	{
1.1.1.3  mrg 	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
    1.1  mrg 	}
    1.1  mrg     }
    1.1  mrg
    1.1  mrg #define v0	pp				/* 2n */
    1.1  mrg #define vinf	(pp + 2 * n)			/* s+s */
    1.1  mrg #define vm1	scratch				/* 2n */
    1.1  mrg #define scratch_out	scratch + 2 * n
    1.1  mrg
    1.1  mrg   /* vm1, 2n limbs */
    1.1  mrg   TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
    1.1  mrg
    1.1  mrg   /* vinf, s+s limbs */
    1.1  mrg   TOOM2_SQR_REC (vinf, a1, s, scratch_out);
    1.1  mrg
    1.1  mrg   /* v0, 2n limbs */
    1.1  mrg   TOOM2_SQR_REC (v0, ap, n, scratch_out);
    1.1  mrg
    1.1  mrg   /* H(v0) + L(vinf) */
    1.1  mrg   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
    1.1  mrg
    1.1  mrg   /* L(v0) + H(v0) */
    1.1  mrg   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
    1.1  mrg
    1.1  mrg   /* L(vinf) + H(vinf) */
    1.1  mrg   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
    1.1  mrg
    1.1  mrg   cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
    1.1  mrg
1.1.1.4  mrg   ASSERT (cy + 1 <= 3);
    1.1  mrg   ASSERT (cy2 <= 2);
    1.1  mrg
1.1.1.4  mrg   if (LIKELY (cy <= 2)) {
1.1.1.4  mrg     MPN_INCR_U (pp + 2 * n, s + s, cy2);
1.1.1.3  mrg     MPN_INCR_U (pp + 3 * n, s + s - n, cy);
1.1.1.4  mrg   } else { /* cy is negative */
1.1.1.4  mrg     /* The total contribution of v0+vinf-vm1 can not be negative. */
1.1.1.4  mrg #if WANT_ASSERT
1.1.1.4  mrg     /* The borrow in cy stops the propagation of the carry cy2, */
1.1.1.4  mrg     ASSERT (cy2 == 1);
1.1.1.4  mrg     cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
1.1.1.4  mrg     ASSERT (cy == 0);
1.1.1.4  mrg #else
1.1.1.4  mrg     /* we simply fill the area with zeros. */
1.1.1.4  mrg     MPN_FILL (pp + 2 * n, n, 0);
1.1.1.4  mrg #endif
1.1.1.4  mrg   }
    1.1  mrg }