Home | History | Annotate | Line # | Download | only in generic
      1      1.1  mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
      2      1.1  mrg    of length n.
      3      1.1  mrg 
      4      1.1  mrg    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5      1.1  mrg    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6      1.1  mrg 
      7      1.1  mrg 
      8  1.1.1.4  mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
      9  1.1.1.4  mrg Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1.1.3  mrg it under the terms of either:
     15  1.1.1.3  mrg 
     16  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     17  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     18  1.1.1.3  mrg     option) any later version.
     19  1.1.1.3  mrg 
     20  1.1.1.3  mrg or
     21  1.1.1.3  mrg 
     22  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     23  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     24  1.1.1.3  mrg     later version.
     25  1.1.1.3  mrg 
     26  1.1.1.3  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31  1.1.1.3  mrg for more details.
     32      1.1  mrg 
     33  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     34  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg #include "gmp-impl.h"
     38      1.1  mrg #include "longlong.h"
     39      1.1  mrg 
     40      1.1  mrg 
     41      1.1  mrg #if HAVE_NATIVE_mpn_sqr_diagonal
     42      1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     43      1.1  mrg   mpn_sqr_diagonal (rp, up, n)
     44      1.1  mrg #else
     45      1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     46      1.1  mrg   do {									\
     47      1.1  mrg     mp_size_t _i;							\
     48      1.1  mrg     for (_i = 0; _i < (n); _i++)					\
     49      1.1  mrg       {									\
     50      1.1  mrg 	mp_limb_t ul, lpl;						\
     51      1.1  mrg 	ul = (up)[_i];							\
     52      1.1  mrg 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     53      1.1  mrg 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     54      1.1  mrg       }									\
     55      1.1  mrg   } while (0)
     56      1.1  mrg #endif
     57      1.1  mrg 
     58  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
     59  1.1.1.2  mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     60  1.1.1.2  mrg   mpn_sqr_diag_addlsh1 (rp, tp, up, n)
     61  1.1.1.2  mrg #else
     62  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh1_n
     63  1.1.1.2  mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     64  1.1.1.2  mrg   do {									\
     65  1.1.1.2  mrg     mp_limb_t cy;							\
     66  1.1.1.2  mrg     MPN_SQR_DIAGONAL (rp, up, n);					\
     67  1.1.1.2  mrg     cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     68  1.1.1.2  mrg     rp[2 * n - 1] += cy;						\
     69  1.1.1.2  mrg   } while (0)
     70  1.1.1.2  mrg #else
     71  1.1.1.2  mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     72  1.1.1.2  mrg   do {									\
     73  1.1.1.2  mrg     mp_limb_t cy;							\
     74  1.1.1.2  mrg     MPN_SQR_DIAGONAL (rp, up, n);					\
     75  1.1.1.2  mrg     cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
     76  1.1.1.2  mrg     cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     77  1.1.1.2  mrg     rp[2 * n - 1] += cy;						\
     78  1.1.1.2  mrg   } while (0)
     79  1.1.1.2  mrg #endif
     80  1.1.1.2  mrg #endif
     81  1.1.1.2  mrg 
     82      1.1  mrg 
     83      1.1  mrg #undef READY_WITH_mpn_sqr_basecase
     84      1.1  mrg 
     85      1.1  mrg 
     86      1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
     87      1.1  mrg void
     88      1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     89      1.1  mrg {
     90      1.1  mrg   mp_size_t i;
     91      1.1  mrg   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
     92      1.1  mrg   mp_ptr tp = tarr;
     93      1.1  mrg   mp_limb_t cy;
     94      1.1  mrg 
     95      1.1  mrg   /* must fit 2*n limbs in tarr */
     96      1.1  mrg   ASSERT (n <= SQR_TOOM2_THRESHOLD);
     97      1.1  mrg 
     98      1.1  mrg   if ((n & 1) != 0)
     99      1.1  mrg     {
    100      1.1  mrg       if (n == 1)
    101      1.1  mrg 	{
    102      1.1  mrg 	  mp_limb_t ul, lpl;
    103      1.1  mrg 	  ul = up[0];
    104      1.1  mrg 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    105      1.1  mrg 	  rp[0] = lpl >> GMP_NAIL_BITS;
    106      1.1  mrg 	  return;
    107      1.1  mrg 	}
    108      1.1  mrg 
    109      1.1  mrg       MPN_ZERO (tp, n);
    110      1.1  mrg 
    111      1.1  mrg       for (i = 0; i <= n - 2; i += 2)
    112      1.1  mrg 	{
    113      1.1  mrg 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    114      1.1  mrg 	  tp[n + i] = cy;
    115      1.1  mrg 	}
    116      1.1  mrg     }
    117      1.1  mrg   else
    118      1.1  mrg     {
    119      1.1  mrg       if (n == 2)
    120      1.1  mrg 	{
    121  1.1.1.2  mrg #if HAVE_NATIVE_mpn_mul_2
    122  1.1.1.2  mrg 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    123  1.1.1.2  mrg #else
    124      1.1  mrg 	  rp[0] = 0;
    125      1.1  mrg 	  rp[1] = 0;
    126      1.1  mrg 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    127  1.1.1.2  mrg #endif
    128      1.1  mrg 	  return;
    129      1.1  mrg 	}
    130      1.1  mrg 
    131      1.1  mrg       MPN_ZERO (tp, n);
    132      1.1  mrg 
    133      1.1  mrg       for (i = 0; i <= n - 4; i += 2)
    134      1.1  mrg 	{
    135      1.1  mrg 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    136      1.1  mrg 	  tp[n + i] = cy;
    137      1.1  mrg 	}
    138      1.1  mrg       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    139      1.1  mrg       tp[2 * n - 3] = cy;
    140      1.1  mrg     }
    141      1.1  mrg 
    142  1.1.1.2  mrg   MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    143      1.1  mrg }
    144      1.1  mrg #define READY_WITH_mpn_sqr_basecase
    145      1.1  mrg #endif
    146      1.1  mrg 
    147      1.1  mrg 
    148      1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
    149      1.1  mrg 
    150      1.1  mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
    151      1.1  mrg 
    152      1.1  mrg    This is tricky, since we have to let mpn_addmul_2 make some undesirable
    153      1.1  mrg    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
    154      1.1  mrg    This forces us to conditionally add or subtract the mpn_sqr_diagonal
    155      1.1  mrg    results.  Examples of the product we form:
    156      1.1  mrg 
    157      1.1  mrg    n = 4              n = 5		n = 6
    158      1.1  mrg    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
    159      1.1  mrg    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
    160      1.1  mrg 					u4 * u5
    161      1.1  mrg    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
    162      1.1  mrg    sub: u1	      sub: u1 u3	sub: u1 u3
    163      1.1  mrg */
    164      1.1  mrg 
    165      1.1  mrg void
    166      1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    167      1.1  mrg {
    168      1.1  mrg   mp_size_t i;
    169      1.1  mrg   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    170      1.1  mrg   mp_ptr tp = tarr;
    171      1.1  mrg   mp_limb_t cy;
    172      1.1  mrg 
    173      1.1  mrg   /* must fit 2*n limbs in tarr */
    174      1.1  mrg   ASSERT (n <= SQR_TOOM2_THRESHOLD);
    175      1.1  mrg 
    176      1.1  mrg   if ((n & 1) != 0)
    177      1.1  mrg     {
    178      1.1  mrg       mp_limb_t x0, x1;
    179      1.1  mrg 
    180      1.1  mrg       if (n == 1)
    181      1.1  mrg 	{
    182      1.1  mrg 	  mp_limb_t ul, lpl;
    183      1.1  mrg 	  ul = up[0];
    184      1.1  mrg 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    185      1.1  mrg 	  rp[0] = lpl >> GMP_NAIL_BITS;
    186      1.1  mrg 	  return;
    187      1.1  mrg 	}
    188      1.1  mrg 
    189      1.1  mrg       /* The code below doesn't like unnormalized operands.  Since such
    190      1.1  mrg 	 operands are unusual, handle them with a dumb recursion.  */
    191      1.1  mrg       if (up[n - 1] == 0)
    192      1.1  mrg 	{
    193      1.1  mrg 	  rp[2 * n - 2] = 0;
    194      1.1  mrg 	  rp[2 * n - 1] = 0;
    195      1.1  mrg 	  mpn_sqr_basecase (rp, up, n - 1);
    196      1.1  mrg 	  return;
    197      1.1  mrg 	}
    198      1.1  mrg 
    199      1.1  mrg       MPN_ZERO (tp, n);
    200      1.1  mrg 
    201      1.1  mrg       for (i = 0; i <= n - 2; i += 2)
    202      1.1  mrg 	{
    203      1.1  mrg 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    204      1.1  mrg 	  tp[n + i] = cy;
    205      1.1  mrg 	}
    206      1.1  mrg 
    207      1.1  mrg       MPN_SQR_DIAGONAL (rp, up, n);
    208      1.1  mrg 
    209      1.1  mrg       for (i = 2;; i += 4)
    210      1.1  mrg 	{
    211      1.1  mrg 	  x0 = rp[i + 0];
    212      1.1  mrg 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    213      1.1  mrg 	  x1 = rp[i + 1];
    214      1.1  mrg 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    215      1.1  mrg 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    216      1.1  mrg 	  if (i + 4 >= 2 * n)
    217      1.1  mrg 	    break;
    218      1.1  mrg 	  mpn_incr_u (rp + i + 4, cy);
    219      1.1  mrg 	}
    220      1.1  mrg     }
    221      1.1  mrg   else
    222      1.1  mrg     {
    223      1.1  mrg       mp_limb_t x0, x1;
    224      1.1  mrg 
    225      1.1  mrg       if (n == 2)
    226      1.1  mrg 	{
    227  1.1.1.2  mrg #if HAVE_NATIVE_mpn_mul_2
    228  1.1.1.2  mrg 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    229  1.1.1.2  mrg #else
    230      1.1  mrg 	  rp[0] = 0;
    231      1.1  mrg 	  rp[1] = 0;
    232      1.1  mrg 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    233  1.1.1.2  mrg #endif
    234      1.1  mrg 	  return;
    235      1.1  mrg 	}
    236      1.1  mrg 
    237      1.1  mrg       /* The code below doesn't like unnormalized operands.  Since such
    238      1.1  mrg 	 operands are unusual, handle them with a dumb recursion.  */
    239      1.1  mrg       if (up[n - 1] == 0)
    240      1.1  mrg 	{
    241      1.1  mrg 	  rp[2 * n - 2] = 0;
    242      1.1  mrg 	  rp[2 * n - 1] = 0;
    243      1.1  mrg 	  mpn_sqr_basecase (rp, up, n - 1);
    244      1.1  mrg 	  return;
    245      1.1  mrg 	}
    246      1.1  mrg 
    247      1.1  mrg       MPN_ZERO (tp, n);
    248      1.1  mrg 
    249      1.1  mrg       for (i = 0; i <= n - 4; i += 2)
    250      1.1  mrg 	{
    251      1.1  mrg 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    252      1.1  mrg 	  tp[n + i] = cy;
    253      1.1  mrg 	}
    254      1.1  mrg       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    255      1.1  mrg       tp[2 * n - 3] = cy;
    256      1.1  mrg 
    257      1.1  mrg       MPN_SQR_DIAGONAL (rp, up, n);
    258      1.1  mrg 
    259      1.1  mrg       for (i = 2;; i += 4)
    260      1.1  mrg 	{
    261      1.1  mrg 	  x0 = rp[i + 0];
    262      1.1  mrg 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    263      1.1  mrg 	  x1 = rp[i + 1];
    264      1.1  mrg 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    265      1.1  mrg 	  if (i + 6 >= 2 * n)
    266      1.1  mrg 	    break;
    267      1.1  mrg 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    268      1.1  mrg 	  mpn_incr_u (rp + i + 4, cy);
    269      1.1  mrg 	}
    270      1.1  mrg       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
    271      1.1  mrg     }
    272      1.1  mrg 
    273      1.1  mrg #if HAVE_NATIVE_mpn_addlsh1_n
    274      1.1  mrg   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    275      1.1  mrg #else
    276      1.1  mrg   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    277      1.1  mrg   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    278      1.1  mrg #endif
    279      1.1  mrg   rp[2 * n - 1] += cy;
    280      1.1  mrg }
    281      1.1  mrg #define READY_WITH_mpn_sqr_basecase
    282      1.1  mrg #endif
    283      1.1  mrg 
    284      1.1  mrg 
    285  1.1.1.4  mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
    286  1.1.1.4  mrg 
    287  1.1.1.4  mrg /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
    288  1.1.1.4  mrg    allocation.  */
    289  1.1.1.4  mrg void
    290  1.1.1.4  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    291  1.1.1.4  mrg {
    292  1.1.1.4  mrg   if (n == 1)
    293  1.1.1.4  mrg     {
    294  1.1.1.4  mrg       mp_limb_t ul, lpl;
    295  1.1.1.4  mrg       ul = up[0];
    296  1.1.1.4  mrg       umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    297  1.1.1.4  mrg       rp[0] = lpl >> GMP_NAIL_BITS;
    298  1.1.1.4  mrg     }
    299  1.1.1.4  mrg   else
    300  1.1.1.4  mrg     {
    301  1.1.1.4  mrg       mp_size_t i;
    302  1.1.1.4  mrg       mp_ptr xp;
    303  1.1.1.4  mrg 
    304  1.1.1.4  mrg       rp += 1;
    305  1.1.1.4  mrg       rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
    306  1.1.1.4  mrg       for (i = n - 2; i != 0; i--)
    307  1.1.1.4  mrg 	{
    308  1.1.1.4  mrg 	  up += 1;
    309  1.1.1.4  mrg 	  rp += 2;
    310  1.1.1.4  mrg 	  rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
    311  1.1.1.4  mrg 	}
    312  1.1.1.4  mrg 
    313  1.1.1.4  mrg       xp = rp - 2 * n + 3;
    314  1.1.1.4  mrg       mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
    315  1.1.1.4  mrg     }
    316  1.1.1.4  mrg }
    317  1.1.1.4  mrg #define READY_WITH_mpn_sqr_basecase
    318  1.1.1.4  mrg #endif
    319  1.1.1.4  mrg 
    320  1.1.1.4  mrg 
    321      1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
    322      1.1  mrg 
    323      1.1  mrg /* Default mpn_sqr_basecase using mpn_addmul_1.  */
    324      1.1  mrg void
    325      1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    326      1.1  mrg {
    327      1.1  mrg   mp_size_t i;
    328      1.1  mrg 
    329      1.1  mrg   ASSERT (n >= 1);
    330      1.1  mrg   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
    331      1.1  mrg 
    332  1.1.1.4  mrg   if (n == 1)
    333  1.1.1.4  mrg     {
    334  1.1.1.4  mrg       mp_limb_t ul, lpl;
    335  1.1.1.4  mrg       ul = up[0];
    336  1.1.1.4  mrg       umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    337  1.1.1.4  mrg       rp[0] = lpl >> GMP_NAIL_BITS;
    338  1.1.1.4  mrg     }
    339  1.1.1.4  mrg   else
    340      1.1  mrg     {
    341      1.1  mrg       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    342      1.1  mrg       mp_ptr tp = tarr;
    343      1.1  mrg       mp_limb_t cy;
    344      1.1  mrg 
    345      1.1  mrg       /* must fit 2*n limbs in tarr */
    346      1.1  mrg       ASSERT (n <= SQR_TOOM2_THRESHOLD);
    347      1.1  mrg 
    348      1.1  mrg       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
    349      1.1  mrg       tp[n - 1] = cy;
    350      1.1  mrg       for (i = 2; i < n; i++)
    351      1.1  mrg 	{
    352      1.1  mrg 	  mp_limb_t cy;
    353      1.1  mrg 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
    354      1.1  mrg 	  tp[n + i - 2] = cy;
    355      1.1  mrg 	}
    356      1.1  mrg 
    357  1.1.1.2  mrg       MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    358      1.1  mrg     }
    359      1.1  mrg }
    360  1.1.1.4  mrg #define READY_WITH_mpn_sqr_basecase
    361      1.1  mrg #endif
    362