Home | History | Annotate | Line # | Download | only in generic
      1      1.1  mrg /* mpn_sqrlo_basecase -- Internal routine to square a natural number
      2      1.1  mrg    of length n.
      3      1.1  mrg 
      4      1.1  mrg    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5      1.1  mrg    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6      1.1  mrg 
      7      1.1  mrg 
      8  1.1.1.2  mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
      9  1.1.1.2  mrg 2016 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14      1.1  mrg it under the terms of either:
     15      1.1  mrg 
     16      1.1  mrg   * the GNU Lesser General Public License as published by the Free
     17      1.1  mrg     Software Foundation; either version 3 of the License, or (at your
     18      1.1  mrg     option) any later version.
     19      1.1  mrg 
     20      1.1  mrg or
     21      1.1  mrg 
     22      1.1  mrg   * the GNU General Public License as published by the Free Software
     23      1.1  mrg     Foundation; either version 2 of the License, or (at your option) any
     24      1.1  mrg     later version.
     25      1.1  mrg 
     26      1.1  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30      1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31      1.1  mrg for more details.
     32      1.1  mrg 
     33      1.1  mrg You should have received copies of the GNU General Public License and the
     34      1.1  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35      1.1  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg #include "gmp-impl.h"
     38      1.1  mrg #include "longlong.h"
     39      1.1  mrg 
     40      1.1  mrg #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
     41      1.1  mrg #if HAVE_NATIVE_mpn_addmul_1
     42      1.1  mrg #define SQRLO_SHORTCUT_MULTIPLICATIONS 0
     43      1.1  mrg #else
     44      1.1  mrg #define SQRLO_SHORTCUT_MULTIPLICATIONS 1
     45      1.1  mrg #endif
     46      1.1  mrg #endif
     47      1.1  mrg 
     48      1.1  mrg #if HAVE_NATIVE_mpn_sqr_diagonal
     49      1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     50      1.1  mrg   mpn_sqr_diagonal (rp, up, n)
     51      1.1  mrg #else
     52      1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     53      1.1  mrg   do {									\
     54      1.1  mrg     mp_size_t _i;							\
     55      1.1  mrg     for (_i = 0; _i < (n); _i++)					\
     56      1.1  mrg       {									\
     57      1.1  mrg 	mp_limb_t ul, lpl;						\
     58      1.1  mrg 	ul = (up)[_i];							\
     59      1.1  mrg 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     60      1.1  mrg 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     61      1.1  mrg       }									\
     62      1.1  mrg   } while (0)
     63      1.1  mrg #endif
     64      1.1  mrg 
     65      1.1  mrg #define MPN_SQRLO_DIAGONAL(rp, up, n)					\
     66      1.1  mrg   do {									\
     67      1.1  mrg     mp_size_t nhalf;							\
     68      1.1  mrg     nhalf = (n) >> 1;							\
     69      1.1  mrg     MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
     70      1.1  mrg     if (((n) & 1) != 0)							\
     71      1.1  mrg       {									\
     72      1.1  mrg 	mp_limb_t op;							\
     73      1.1  mrg 	op = (up)[nhalf];						\
     74      1.1  mrg 	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
     75      1.1  mrg       }									\
     76      1.1  mrg   } while (0)
     77      1.1  mrg 
     78      1.1  mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip1
     79      1.1  mrg #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
     80      1.1  mrg   do {									\
     81      1.1  mrg     MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     82      1.1  mrg     mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
     83      1.1  mrg   } while (0)
     84      1.1  mrg #else
     85      1.1  mrg #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
     86      1.1  mrg   do {									\
     87      1.1  mrg     MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     88      1.1  mrg     mpn_lshift ((tp), (tp), (n) - 1, 1);				\
     89      1.1  mrg     mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
     90      1.1  mrg   } while (0)
     91      1.1  mrg #endif
     92      1.1  mrg 
     93      1.1  mrg /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
     94      1.1  mrg #define SQRLO_BASECASE_ALLOC						\
     95      1.1  mrg   (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
     96      1.1  mrg 
     97      1.1  mrg /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
     98      1.1  mrg #ifndef SQRLO_SPECIAL_CASES
     99      1.1  mrg #define SQRLO_SPECIAL_CASES 2
    100      1.1  mrg #endif
    101  1.1.1.2  mrg 
    102  1.1.1.2  mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    103  1.1.1.2  mrg #define MAYBE_special_cases 1
    104  1.1.1.2  mrg #else
    105  1.1.1.2  mrg #define MAYBE_special_cases \
    106  1.1.1.2  mrg   ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
    107  1.1.1.2  mrg #endif
    108  1.1.1.2  mrg 
    109      1.1  mrg void
    110      1.1  mrg mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    111      1.1  mrg {
    112      1.1  mrg   mp_limb_t ul;
    113      1.1  mrg 
    114      1.1  mrg   ASSERT (n >= 1);
    115      1.1  mrg   ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
    116      1.1  mrg 
    117      1.1  mrg   ul = up[0];
    118      1.1  mrg 
    119  1.1.1.2  mrg   if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
    120      1.1  mrg     {
    121      1.1  mrg #if SQRLO_SPECIAL_CASES == 1
    122      1.1  mrg       rp[0] = (ul * ul) & GMP_NUMB_MASK;
    123      1.1  mrg #else
    124      1.1  mrg       if (n == 1)
    125      1.1  mrg 	rp[0] = (ul * ul) & GMP_NUMB_MASK;
    126      1.1  mrg       else
    127      1.1  mrg 	{
    128      1.1  mrg 	  mp_limb_t hi, lo, ul1;
    129      1.1  mrg 	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
    130      1.1  mrg 	  rp[0] = lo >> GMP_NAIL_BITS;
    131      1.1  mrg 	  ul1 = up[1];
    132      1.1  mrg #if SQRLO_SPECIAL_CASES == 2
    133      1.1  mrg 	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
    134      1.1  mrg #else
    135      1.1  mrg 	  if (n == 2)
    136      1.1  mrg 	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
    137      1.1  mrg 	  else
    138      1.1  mrg 	    {
    139      1.1  mrg 	      mp_limb_t hi1;
    140      1.1  mrg #if GMP_NAIL_BITS != 0
    141      1.1  mrg 	      ul <<= 1;
    142      1.1  mrg #endif
    143      1.1  mrg 	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
    144      1.1  mrg 	      hi1 += ul * up[2];
    145      1.1  mrg #if GMP_NAIL_BITS == 0
    146      1.1  mrg 	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
    147      1.1  mrg 	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
    148      1.1  mrg #else
    149      1.1  mrg 	      hi += lo >> GMP_NAIL_BITS;
    150      1.1  mrg 	      rp[1] = hi & GMP_NUMB_MASK;
    151      1.1  mrg 	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
    152      1.1  mrg #endif
    153      1.1  mrg 	    }
    154      1.1  mrg #endif
    155      1.1  mrg 	}
    156      1.1  mrg #endif
    157      1.1  mrg     }
    158      1.1  mrg   else
    159      1.1  mrg     {
    160      1.1  mrg       mp_limb_t tp[SQRLO_BASECASE_ALLOC];
    161      1.1  mrg       mp_size_t i;
    162      1.1  mrg 
    163      1.1  mrg       /* must fit n-1 limbs in tp */
    164      1.1  mrg       ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
    165      1.1  mrg 
    166      1.1  mrg       --n;
    167      1.1  mrg #if SQRLO_SHORTCUT_MULTIPLICATIONS
    168      1.1  mrg       {
    169      1.1  mrg 	mp_limb_t cy;
    170      1.1  mrg 
    171      1.1  mrg 	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
    172      1.1  mrg 	for (i = 1; 2 * i + 1 < n; ++i)
    173      1.1  mrg 	  {
    174      1.1  mrg 	    ul = up[i];
    175      1.1  mrg 	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
    176      1.1  mrg 	  }
    177      1.1  mrg 	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
    178      1.1  mrg       }
    179      1.1  mrg #else
    180      1.1  mrg       mpn_mul_1 (tp, up + 1, n, ul);
    181      1.1  mrg       for (i = 1; 2 * i < n; ++i)
    182      1.1  mrg 	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
    183      1.1  mrg #endif
    184      1.1  mrg 
    185      1.1  mrg       MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
    186      1.1  mrg     }
    187      1.1  mrg }
    188      1.1  mrg #undef SQRLO_SPECIAL_CASES
    189  1.1.1.2  mrg #undef MAYBE_special_cases
    190  1.1.1.2  mrg #undef SQRLO_BASECASE_ALLOC
    191  1.1.1.2  mrg #undef SQRLO_SHORTCUT_MULTIPLICATIONS
    192  1.1.1.2  mrg #undef MPN_SQR_DIAGONAL
    193  1.1.1.2  mrg #undef MPN_SQRLO_DIAGONAL
    194  1.1.1.2  mrg #undef MPN_SQRLO_DIAG_ADDLSH1
    195