Home | History | Annotate | Line # | Download | only in generic
toom3_sqr.c revision 1.1.1.1
      1 /* mpn_toom3_sqr -- Square {ap,an}.
      2 
      3    Contributed to the GNU project by Torbjorn Granlund.
      4    Additional improvements by Marco Bodrato.
      5 
      6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      9 
     10 Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of the GNU Lesser General Public License as published by
     16 the Free Software Foundation; either version 3 of the License, or (at your
     17 option) any later version.
     18 
     19 The GNU MP Library is distributed in the hope that it will be useful, but
     20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     22 License for more details.
     23 
     24 You should have received a copy of the GNU Lesser General Public License
     25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     26 
     27 
     28 #include "gmp.h"
     29 #include "gmp-impl.h"
     30 
     31 /* Evaluate in: -1, 0, +1, +2, +inf
     32 
     33   <-s--><--n--><--n-->
     34    ____ ______ ______
     35   |_a2_|___a1_|___a0_|
     36 
     37   v0  =  a0         ^2 #   A(0)^2
     38   v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
     39   vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
     40   v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
     41   vinf=          a2 ^2 # A(inf)^2
     42 */
     43 
     44 #if TUNE_PROGRAM_BUILD
     45 #define MAYBE_sqr_basecase 1
     46 #define MAYBE_sqr_toom3   1
     47 #else
     48 #define MAYBE_sqr_basecase						\
     49   (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
     50 #define MAYBE_sqr_toom3							\
     51   (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
     52 #endif
     53 
     54 #define TOOM3_SQR_REC(p, a, n, ws)					\
     55   do {									\
     56     if (MAYBE_sqr_basecase						\
     57 	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
     58       mpn_sqr_basecase (p, a, n);					\
     59     else if (! MAYBE_sqr_toom3						\
     60 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
     61       mpn_toom2_sqr (p, a, n, ws);					\
     62     else								\
     63       mpn_toom3_sqr (p, a, n, ws);					\
     64   } while (0)
     65 
     66 void
     67 mpn_toom3_sqr (mp_ptr pp,
     68 	       mp_srcptr ap, mp_size_t an,
     69 	       mp_ptr scratch)
     70 {
     71   mp_size_t n, s;
     72   mp_limb_t cy, vinf0;
     73   mp_ptr gp;
     74   mp_ptr as1, asm1, as2;
     75 
     76 #define a0  ap
     77 #define a1  (ap + n)
     78 #define a2  (ap + 2*n)
     79 
     80   n = (an + 2) / (size_t) 3;
     81 
     82   s = an - 2 * n;
     83 
     84   ASSERT (0 < s && s <= n);
     85 
     86   as1 = scratch + 4 * n + 4;
     87   asm1 = scratch + 2 * n + 2;
     88   as2 = pp + n + 1;
     89 
     90   gp = scratch;
     91 
     92   /* Compute as1 and asm1.  */
     93   cy = mpn_add (gp, a0, n, a2, s);
     94 #if HAVE_NATIVE_mpn_add_n_sub_n
     95   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
     96     {
     97       cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
     98       as1[n] = cy >> 1;
     99       asm1[n] = 0;
    100     }
    101   else
    102     {
    103       mp_limb_t cy2;
    104       cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
    105       as1[n] = cy + (cy2 >> 1);
    106       asm1[n] = cy - (cy2 & 1);
    107     }
    108 #else
    109   as1[n] = cy + mpn_add_n (as1, gp, a1, n);
    110   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    111     {
    112       mpn_sub_n (asm1, a1, gp, n);
    113       asm1[n] = 0;
    114     }
    115   else
    116     {
    117       cy -= mpn_sub_n (asm1, gp, a1, n);
    118       asm1[n] = cy;
    119     }
    120 #endif
    121 
    122   /* Compute as2.  */
    123 #if HAVE_NATIVE_mpn_rsblsh1_n
    124   cy = mpn_add_n (as2, a2, as1, s);
    125   if (s != n)
    126     cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
    127   cy += as1[n];
    128   cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
    129 #else
    130 #if HAVE_NATIVE_mpn_addlsh1_n
    131   cy  = mpn_addlsh1_n (as2, a1, a2, s);
    132   if (s != n)
    133     cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
    134   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
    135 #else
    136   cy = mpn_add_n (as2, a2, as1, s);
    137   if (s != n)
    138     cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
    139   cy += as1[n];
    140   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
    141   cy -= mpn_sub_n (as2, as2, a0, n);
    142 #endif
    143 #endif
    144   as2[n] = cy;
    145 
    146   ASSERT (as1[n] <= 2);
    147   ASSERT (asm1[n] <= 1);
    148 
    149 #define v0    pp				/* 2n */
    150 #define v1    (pp + 2 * n)			/* 2n+1 */
    151 #define vinf  (pp + 4 * n)			/* s+s */
    152 #define vm1   scratch				/* 2n+1 */
    153 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
    154 #define scratch_out  (scratch + 5 * n + 5)
    155 
    156   /* vm1, 2n+1 limbs */
    157 #ifdef SMALLER_RECURSION
    158   TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
    159   cy = 0;
    160   if (asm1[n] != 0)
    161     cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
    162   if (asm1[n] != 0)
    163     cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
    164   vm1[2 * n] = cy;
    165 #else
    166   TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
    167 #endif
    168 
    169   TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
    170 
    171   TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
    172 
    173   vinf0 = vinf[0];				/* v1 overlaps with this */
    174 
    175 #ifdef SMALLER_RECURSION
    176   /* v1, 2n+1 limbs */
    177   TOOM3_SQR_REC (v1, as1, n, scratch_out);
    178   if (as1[n] == 1)
    179     {
    180       cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
    181     }
    182   else if (as1[n] != 0)
    183     {
    184 #if HAVE_NATIVE_mpn_addlsh1_n
    185       cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
    186 #else
    187       cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
    188 #endif
    189     }
    190   else
    191     cy = 0;
    192   if (as1[n] == 1)
    193     {
    194       cy += mpn_add_n (v1 + n, v1 + n, as1, n);
    195     }
    196   else if (as1[n] != 0)
    197     {
    198 #if HAVE_NATIVE_mpn_addlsh1_n
    199       cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
    200 #else
    201       cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
    202 #endif
    203     }
    204   v1[2 * n] = cy;
    205 #else
    206   cy = vinf[1];
    207   TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
    208   vinf[1] = cy;
    209 #endif
    210 
    211   TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
    212 
    213   mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
    214 }
    215