Home | History | Annotate | Line # | Download | only in generic
      1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
      2 
      3    Contributed to the GNU project by Robert Harley.
      4    Improvements by Paul Zimmermann and Marco Bodrato.
      5 
      6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      9 
     10 Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of either:
     16 
     17   * the GNU Lesser General Public License as published by the Free
     18     Software Foundation; either version 3 of the License, or (at your
     19     option) any later version.
     20 
     21 or
     22 
     23   * the GNU General Public License as published by the Free Software
     24     Foundation; either version 2 of the License, or (at your option) any
     25     later version.
     26 
     27 or both in parallel, as here.
     28 
     29 The GNU MP Library is distributed in the hope that it will be useful, but
     30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     32 for more details.
     33 
     34 You should have received copies of the GNU General Public License and the
     35 GNU Lesser General Public License along with the GNU MP Library.  If not,
     36 see https://www.gnu.org/licenses/.  */
     37 
     38 #include "gmp-impl.h"
     39 
     40 void
     41 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
     42 			   mp_size_t k, mp_size_t twor, int sa,
     43 			   mp_limb_t vinf0)
     44 {
     45   mp_limb_t cy, saved;
     46   mp_size_t twok;
     47   mp_size_t kk1;
     48   mp_ptr c1, v1, c3, vinf;
     49 
     50   twok = k + k;
     51   kk1 = twok + 1;
     52 
     53   c1 = c  + k;
     54   v1 = c1 + k;
     55   c3 = v1 + k;
     56   vinf = c3 + k;
     57 
     58 #define v0 (c)
     59   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
     60      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
     61   */
     62   if (sa)
     63     ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
     64   else
     65     ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
     66 
     67   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
     68        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
     69 
     70   ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
     71 						      /* (5 3 1 1 0)*/
     72 
     73   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
     74        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
     75 
     76   /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
     77      tm1 >= 0                                         (0  1 0  1 0)
     78      No carry comes out from {v1, kk1} +/- {vm1, kk1},
     79      and the division by two is exact.
     80      If (sa!=0) the sign of vm1 is negative */
     81   if (sa)
     82     {
     83 #ifdef HAVE_NATIVE_mpn_rsh1add_n
     84       mpn_rsh1add_n (vm1, v1, vm1, kk1);
     85 #else
     86       ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
     87       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
     88 #endif
     89     }
     90   else
     91     {
     92 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
     93       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
     94 #else
     95       ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
     96       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
     97 #endif
     98     }
     99 
    100   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
    101        v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
    102 
    103   /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
    104      t1 >= 0
    105   */
    106   vinf[0] -= mpn_sub_n (v1, v1, c, twok);
    107 
    108   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
    109        v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
    110 
    111   /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
    112      t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
    113   */
    114 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
    115   mpn_rsh1sub_n (v2, v2, v1, kk1);
    116 #else
    117   ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
    118   ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
    119 #endif
    120 
    121   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
    122        v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
    123 
    124   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
    125      result is v1 >= 0
    126   */
    127   ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
    128 
    129   /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
    130   cy = mpn_add_n (c1, c1, vm1, kk1);
    131   MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
    132   /* Memory allocated for vm1 is now free, it can be recycled ...*/
    133 
    134   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
    135      result is v2 >= 0 */
    136   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
    137   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
    138 #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
    139   cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
    140 #else
    141   /* Overwrite unused vm1 */
    142   cy = mpn_lshift (vm1, vinf, twor, 1);
    143   cy += mpn_sub_n (v2, v2, vm1, twor);
    144 #endif
    145   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
    146 
    147   /* Current matrix is
    148      [1 0 0 0 0; vinf
    149       0 1 0 0 0; v2
    150       1 0 1 0 0; v1
    151       0 1 0 1 0; vm1
    152       0 0 0 0 1] v0
    153      Some values already are in-place (we added vm1 in the correct position)
    154      | vinf|  v1 |  v0 |
    155 	      | vm1 |
    156      One still is in a separated area
    157 	| +v2 |
    158      We have to compute v1-=vinf; vm1 -= v2,
    159 	   |-vinf|
    160 	      | -v2 |
    161      Carefully reordering operations we can avoid to compute twice the sum
    162      of the high half of v2 plus the low half of vinf.
    163   */
    164 
    165   /* Add the high half of t2 in {vinf} */
    166   if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
    167     cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
    168     MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
    169   } else { /* triggered only by very unbalanced cases like
    170 	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
    171     ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
    172   }
    173   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
    174      result is >= 0 */
    175   /* Side effect: we also subtracted (high half) vm1 -= v2 */
    176   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
    177   vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
    178   vinf[0] = saved;
    179   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
    180 
    181   /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
    182      Operate only on the low half.
    183   */
    184   cy = mpn_sub_n (c1, c1, v2, k);
    185   MPN_DECR_U (v1, kk1, cy);
    186 
    187   /********************* Beginning the final phase **********************/
    188 
    189   /* Most of the recomposition was done */
    190 
    191   /* add t2 in {c+3k, ...}, but only the low half */
    192   cy = mpn_add_n (c3, c3, v2, k);
    193   vinf[0] += cy;
    194   ASSERT(vinf[0] >= cy); /* No carry */
    195   MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
    196 
    197 #undef v0
    198 }
    199