Home | History | Annotate | Line # | Download | only in generic
toom2_sqr.c revision 1.1.1.2
      1 /* mpn_toom2_sqr -- Square {ap,an}.
      2 
      3    Contributed to the GNU project by Torbjorn Granlund.
      4 
      5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8 
      9 Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of the GNU Lesser General Public License as published by
     15 the Free Software Foundation; either version 3 of the License, or (at your
     16 option) any later version.
     17 
     18 The GNU MP Library is distributed in the hope that it will be useful, but
     19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21 License for more details.
     22 
     23 You should have received a copy of the GNU Lesser General Public License
     24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25 
     26 
     27 #include "gmp.h"
     28 #include "gmp-impl.h"
     29 
     30 /* Evaluate in: -1, 0, +inf
     31 
     32   <-s--><--n-->
     33    ____ ______
     34   |_a1_|___a0_|
     35 
     36   v0  =  a0     ^2  #   A(0)^2
     37   vm1 = (a0- a1)^2  #  A(-1)^2
     38   vinf=      a1 ^2  # A(inf)^2
     39 */
     40 
     41 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
     42 #define MAYBE_sqr_toom2   1
     43 #else
     44 #define MAYBE_sqr_toom2							\
     45   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
     46 #endif
     47 
     48 #define TOOM2_SQR_REC(p, a, n, ws)					\
     49   do {									\
     50     if (! MAYBE_sqr_toom2						\
     51 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
     52       mpn_sqr_basecase (p, a, n);					\
     53     else								\
     54       mpn_toom2_sqr (p, a, n, ws);					\
     55   } while (0)
     56 
     57 void
     58 mpn_toom2_sqr (mp_ptr pp,
     59 	       mp_srcptr ap, mp_size_t an,
     60 	       mp_ptr scratch)
     61 {
     62   const int __gmpn_cpuvec_initialized = 1;
     63   mp_size_t n, s;
     64   mp_limb_t cy, cy2;
     65   mp_ptr asm1;
     66 
     67 #define a0  ap
     68 #define a1  (ap + n)
     69 
     70   s = an >> 1;
     71   n = an - s;
     72 
     73   ASSERT (0 < s && s <= n);
     74 
     75   asm1 = pp;
     76 
     77   /* Compute asm1.  */
     78   if (s == n)
     79     {
     80       if (mpn_cmp (a0, a1, n) < 0)
     81 	{
     82 	  mpn_sub_n (asm1, a1, a0, n);
     83 	}
     84       else
     85 	{
     86 	  mpn_sub_n (asm1, a0, a1, n);
     87 	}
     88     }
     89   else
     90     {
     91       if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
     92 	{
     93 	  mpn_sub_n (asm1, a1, a0, s);
     94 	  MPN_ZERO (asm1 + s, n - s);
     95 	}
     96       else
     97 	{
     98 	  mpn_sub (asm1, a0, n, a1, s);
     99 	}
    100     }
    101 
    102 #define v0	pp				/* 2n */
    103 #define vinf	(pp + 2 * n)			/* s+s */
    104 #define vm1	scratch				/* 2n */
    105 #define scratch_out	scratch + 2 * n
    106 
    107   /* vm1, 2n limbs */
    108   TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
    109 
    110   /* vinf, s+s limbs */
    111   TOOM2_SQR_REC (vinf, a1, s, scratch_out);
    112 
    113   /* v0, 2n limbs */
    114   TOOM2_SQR_REC (v0, ap, n, scratch_out);
    115 
    116   /* H(v0) + L(vinf) */
    117   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
    118 
    119   /* L(v0) + H(v0) */
    120   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
    121 
    122   /* L(vinf) + H(vinf) */
    123   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
    124 
    125   cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
    126 
    127   ASSERT (cy + 1  <= 3);
    128   ASSERT (cy2 <= 2);
    129 
    130   mpn_incr_u (pp + 2 * n, cy2);
    131   if (LIKELY (cy <= 2))
    132     mpn_incr_u (pp + 3 * n, cy);
    133   else
    134     mpn_decr_u (pp + 3 * n, 1);
    135 }
    136