Home | History | Annotate | Line # | Download | only in generic
toom2_sqr.c revision 1.1.1.4
      1      1.1  mrg /* mpn_toom2_sqr -- Square {ap,an}.
      2      1.1  mrg 
      3      1.1  mrg    Contributed to the GNU project by Torbjorn Granlund.
      4      1.1  mrg 
      5      1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6      1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7      1.1  mrg    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8      1.1  mrg 
      9  1.1.1.4  mrg Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1.1.3  mrg it under the terms of either:
     15  1.1.1.3  mrg 
     16  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     17  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     18  1.1.1.3  mrg     option) any later version.
     19  1.1.1.3  mrg 
     20  1.1.1.3  mrg or
     21  1.1.1.3  mrg 
     22  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     23  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     24  1.1.1.3  mrg     later version.
     25  1.1.1.3  mrg 
     26  1.1.1.3  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31  1.1.1.3  mrg for more details.
     32      1.1  mrg 
     33  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     34  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg 
     38      1.1  mrg #include "gmp-impl.h"
     39      1.1  mrg 
     40      1.1  mrg /* Evaluate in: -1, 0, +inf
     41      1.1  mrg 
     42      1.1  mrg   <-s--><--n-->
     43      1.1  mrg    ____ ______
     44      1.1  mrg   |_a1_|___a0_|
     45      1.1  mrg 
     46      1.1  mrg   v0  =  a0     ^2  #   A(0)^2
     47      1.1  mrg   vm1 = (a0- a1)^2  #  A(-1)^2
     48      1.1  mrg   vinf=      a1 ^2  # A(inf)^2
     49      1.1  mrg */
     50      1.1  mrg 
     51  1.1.1.2  mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
     52      1.1  mrg #define MAYBE_sqr_toom2   1
     53      1.1  mrg #else
     54      1.1  mrg #define MAYBE_sqr_toom2							\
     55      1.1  mrg   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
     56      1.1  mrg #endif
     57      1.1  mrg 
     58      1.1  mrg #define TOOM2_SQR_REC(p, a, n, ws)					\
     59      1.1  mrg   do {									\
     60      1.1  mrg     if (! MAYBE_sqr_toom2						\
     61      1.1  mrg 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
     62      1.1  mrg       mpn_sqr_basecase (p, a, n);					\
     63      1.1  mrg     else								\
     64      1.1  mrg       mpn_toom2_sqr (p, a, n, ws);					\
     65      1.1  mrg   } while (0)
     66      1.1  mrg 
     67      1.1  mrg void
     68      1.1  mrg mpn_toom2_sqr (mp_ptr pp,
     69      1.1  mrg 	       mp_srcptr ap, mp_size_t an,
     70      1.1  mrg 	       mp_ptr scratch)
     71      1.1  mrg {
     72  1.1.1.2  mrg   const int __gmpn_cpuvec_initialized = 1;
     73      1.1  mrg   mp_size_t n, s;
     74      1.1  mrg   mp_limb_t cy, cy2;
     75      1.1  mrg   mp_ptr asm1;
     76      1.1  mrg 
     77      1.1  mrg #define a0  ap
     78      1.1  mrg #define a1  (ap + n)
     79      1.1  mrg 
     80      1.1  mrg   s = an >> 1;
     81      1.1  mrg   n = an - s;
     82      1.1  mrg 
     83  1.1.1.3  mrg   ASSERT (0 < s && s <= n && s >= n - 1);
     84      1.1  mrg 
     85      1.1  mrg   asm1 = pp;
     86      1.1  mrg 
     87      1.1  mrg   /* Compute asm1.  */
     88      1.1  mrg   if (s == n)
     89      1.1  mrg     {
     90      1.1  mrg       if (mpn_cmp (a0, a1, n) < 0)
     91      1.1  mrg 	{
     92      1.1  mrg 	  mpn_sub_n (asm1, a1, a0, n);
     93      1.1  mrg 	}
     94      1.1  mrg       else
     95      1.1  mrg 	{
     96      1.1  mrg 	  mpn_sub_n (asm1, a0, a1, n);
     97      1.1  mrg 	}
     98      1.1  mrg     }
     99  1.1.1.3  mrg   else /* n - s == 1 */
    100      1.1  mrg     {
    101  1.1.1.3  mrg       if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
    102      1.1  mrg 	{
    103      1.1  mrg 	  mpn_sub_n (asm1, a1, a0, s);
    104  1.1.1.3  mrg 	  asm1[s] = 0;
    105      1.1  mrg 	}
    106      1.1  mrg       else
    107      1.1  mrg 	{
    108  1.1.1.3  mrg 	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
    109      1.1  mrg 	}
    110      1.1  mrg     }
    111      1.1  mrg 
    112      1.1  mrg #define v0	pp				/* 2n */
    113      1.1  mrg #define vinf	(pp + 2 * n)			/* s+s */
    114      1.1  mrg #define vm1	scratch				/* 2n */
    115      1.1  mrg #define scratch_out	scratch + 2 * n
    116      1.1  mrg 
    117      1.1  mrg   /* vm1, 2n limbs */
    118      1.1  mrg   TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
    119      1.1  mrg 
    120      1.1  mrg   /* vinf, s+s limbs */
    121      1.1  mrg   TOOM2_SQR_REC (vinf, a1, s, scratch_out);
    122      1.1  mrg 
    123      1.1  mrg   /* v0, 2n limbs */
    124      1.1  mrg   TOOM2_SQR_REC (v0, ap, n, scratch_out);
    125      1.1  mrg 
    126      1.1  mrg   /* H(v0) + L(vinf) */
    127      1.1  mrg   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
    128      1.1  mrg 
    129      1.1  mrg   /* L(v0) + H(v0) */
    130      1.1  mrg   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
    131      1.1  mrg 
    132      1.1  mrg   /* L(vinf) + H(vinf) */
    133      1.1  mrg   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
    134      1.1  mrg 
    135      1.1  mrg   cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
    136      1.1  mrg 
    137  1.1.1.4  mrg   ASSERT (cy + 1 <= 3);
    138      1.1  mrg   ASSERT (cy2 <= 2);
    139      1.1  mrg 
    140  1.1.1.4  mrg   if (LIKELY (cy <= 2)) {
    141  1.1.1.4  mrg     MPN_INCR_U (pp + 2 * n, s + s, cy2);
    142  1.1.1.3  mrg     MPN_INCR_U (pp + 3 * n, s + s - n, cy);
    143  1.1.1.4  mrg   } else { /* cy is negative */
    144  1.1.1.4  mrg     /* The total contribution of v0+vinf-vm1 can not be negative. */
    145  1.1.1.4  mrg #if WANT_ASSERT
    146  1.1.1.4  mrg     /* The borrow in cy stops the propagation of the carry cy2, */
    147  1.1.1.4  mrg     ASSERT (cy2 == 1);
    148  1.1.1.4  mrg     cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
    149  1.1.1.4  mrg     ASSERT (cy == 0);
    150  1.1.1.4  mrg #else
    151  1.1.1.4  mrg     /* we simply fill the area with zeros. */
    152  1.1.1.4  mrg     MPN_FILL (pp + 2 * n, n, 0);
    153  1.1.1.4  mrg #endif
    154  1.1.1.4  mrg   }
    155      1.1  mrg }
    156