Home | History | Annotate | Line # | Download | only in generic
toom2_sqr.c revision 1.1.1.2
      1      1.1  mrg /* mpn_toom2_sqr -- Square {ap,an}.
      2      1.1  mrg 
      3      1.1  mrg    Contributed to the GNU project by Torbjorn Granlund.
      4      1.1  mrg 
      5      1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6      1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7      1.1  mrg    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8      1.1  mrg 
      9  1.1.1.2  mrg Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14      1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     15      1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     16      1.1  mrg option) any later version.
     17      1.1  mrg 
     18      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     19      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20      1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21      1.1  mrg License for more details.
     22      1.1  mrg 
     23      1.1  mrg You should have received a copy of the GNU Lesser General Public License
     24      1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25      1.1  mrg 
     26      1.1  mrg 
     27      1.1  mrg #include "gmp.h"
     28      1.1  mrg #include "gmp-impl.h"
     29      1.1  mrg 
     30      1.1  mrg /* Evaluate in: -1, 0, +inf
     31      1.1  mrg 
     32      1.1  mrg   <-s--><--n-->
     33      1.1  mrg    ____ ______
     34      1.1  mrg   |_a1_|___a0_|
     35      1.1  mrg 
     36      1.1  mrg   v0  =  a0     ^2  #   A(0)^2
     37      1.1  mrg   vm1 = (a0- a1)^2  #  A(-1)^2
     38      1.1  mrg   vinf=      a1 ^2  # A(inf)^2
     39      1.1  mrg */
     40      1.1  mrg 
     41  1.1.1.2  mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
     42      1.1  mrg #define MAYBE_sqr_toom2   1
     43      1.1  mrg #else
     44      1.1  mrg #define MAYBE_sqr_toom2							\
     45      1.1  mrg   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
     46      1.1  mrg #endif
     47      1.1  mrg 
     48      1.1  mrg #define TOOM2_SQR_REC(p, a, n, ws)					\
     49      1.1  mrg   do {									\
     50      1.1  mrg     if (! MAYBE_sqr_toom2						\
     51      1.1  mrg 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
     52      1.1  mrg       mpn_sqr_basecase (p, a, n);					\
     53      1.1  mrg     else								\
     54      1.1  mrg       mpn_toom2_sqr (p, a, n, ws);					\
     55      1.1  mrg   } while (0)
     56      1.1  mrg 
     57      1.1  mrg void
     58      1.1  mrg mpn_toom2_sqr (mp_ptr pp,
     59      1.1  mrg 	       mp_srcptr ap, mp_size_t an,
     60      1.1  mrg 	       mp_ptr scratch)
     61      1.1  mrg {
     62  1.1.1.2  mrg   const int __gmpn_cpuvec_initialized = 1;
     63      1.1  mrg   mp_size_t n, s;
     64      1.1  mrg   mp_limb_t cy, cy2;
     65      1.1  mrg   mp_ptr asm1;
     66      1.1  mrg 
     67      1.1  mrg #define a0  ap
     68      1.1  mrg #define a1  (ap + n)
     69      1.1  mrg 
     70      1.1  mrg   s = an >> 1;
     71      1.1  mrg   n = an - s;
     72      1.1  mrg 
     73      1.1  mrg   ASSERT (0 < s && s <= n);
     74      1.1  mrg 
     75      1.1  mrg   asm1 = pp;
     76      1.1  mrg 
     77      1.1  mrg   /* Compute asm1.  */
     78      1.1  mrg   if (s == n)
     79      1.1  mrg     {
     80      1.1  mrg       if (mpn_cmp (a0, a1, n) < 0)
     81      1.1  mrg 	{
     82      1.1  mrg 	  mpn_sub_n (asm1, a1, a0, n);
     83      1.1  mrg 	}
     84      1.1  mrg       else
     85      1.1  mrg 	{
     86      1.1  mrg 	  mpn_sub_n (asm1, a0, a1, n);
     87      1.1  mrg 	}
     88      1.1  mrg     }
     89      1.1  mrg   else
     90      1.1  mrg     {
     91      1.1  mrg       if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
     92      1.1  mrg 	{
     93      1.1  mrg 	  mpn_sub_n (asm1, a1, a0, s);
     94      1.1  mrg 	  MPN_ZERO (asm1 + s, n - s);
     95      1.1  mrg 	}
     96      1.1  mrg       else
     97      1.1  mrg 	{
     98      1.1  mrg 	  mpn_sub (asm1, a0, n, a1, s);
     99      1.1  mrg 	}
    100      1.1  mrg     }
    101      1.1  mrg 
    102      1.1  mrg #define v0	pp				/* 2n */
    103      1.1  mrg #define vinf	(pp + 2 * n)			/* s+s */
    104      1.1  mrg #define vm1	scratch				/* 2n */
    105      1.1  mrg #define scratch_out	scratch + 2 * n
    106      1.1  mrg 
    107      1.1  mrg   /* vm1, 2n limbs */
    108      1.1  mrg   TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
    109      1.1  mrg 
    110      1.1  mrg   /* vinf, s+s limbs */
    111      1.1  mrg   TOOM2_SQR_REC (vinf, a1, s, scratch_out);
    112      1.1  mrg 
    113      1.1  mrg   /* v0, 2n limbs */
    114      1.1  mrg   TOOM2_SQR_REC (v0, ap, n, scratch_out);
    115      1.1  mrg 
    116      1.1  mrg   /* H(v0) + L(vinf) */
    117      1.1  mrg   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
    118      1.1  mrg 
    119      1.1  mrg   /* L(v0) + H(v0) */
    120      1.1  mrg   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
    121      1.1  mrg 
    122      1.1  mrg   /* L(vinf) + H(vinf) */
    123      1.1  mrg   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
    124      1.1  mrg 
    125      1.1  mrg   cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
    126      1.1  mrg 
    127      1.1  mrg   ASSERT (cy + 1  <= 3);
    128      1.1  mrg   ASSERT (cy2 <= 2);
    129      1.1  mrg 
    130      1.1  mrg   mpn_incr_u (pp + 2 * n, cy2);
    131      1.1  mrg   if (LIKELY (cy <= 2))
    132      1.1  mrg     mpn_incr_u (pp + 3 * n, cy);
    133      1.1  mrg   else
    134      1.1  mrg     mpn_decr_u (pp + 3 * n, 1);
    135      1.1  mrg }
    136