Home | History | Annotate | Line # | Download | only in generic
      1 /* mpn_sqrlo_basecase -- Internal routine to square a natural number
      2    of length n.
      3 
      4    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6 
      7 
      8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
      9 2016 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 #include "gmp-impl.h"
     38 #include "longlong.h"
     39 
     40 #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
     41 #if HAVE_NATIVE_mpn_addmul_1
     42 #define SQRLO_SHORTCUT_MULTIPLICATIONS 0
     43 #else
     44 #define SQRLO_SHORTCUT_MULTIPLICATIONS 1
     45 #endif
     46 #endif
     47 
     48 #if HAVE_NATIVE_mpn_sqr_diagonal
     49 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     50   mpn_sqr_diagonal (rp, up, n)
     51 #else
     52 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     53   do {									\
     54     mp_size_t _i;							\
     55     for (_i = 0; _i < (n); _i++)					\
     56       {									\
     57 	mp_limb_t ul, lpl;						\
     58 	ul = (up)[_i];							\
     59 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     60 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     61       }									\
     62   } while (0)
     63 #endif
     64 
     65 #define MPN_SQRLO_DIAGONAL(rp, up, n)					\
     66   do {									\
     67     mp_size_t nhalf;							\
     68     nhalf = (n) >> 1;							\
     69     MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
     70     if (((n) & 1) != 0)							\
     71       {									\
     72 	mp_limb_t op;							\
     73 	op = (up)[nhalf];						\
     74 	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
     75       }									\
     76   } while (0)
     77 
     78 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
     79 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
     80   do {									\
     81     MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     82     mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
     83   } while (0)
     84 #else
     85 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
     86   do {									\
     87     MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     88     mpn_lshift ((tp), (tp), (n) - 1, 1);				\
     89     mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
     90   } while (0)
     91 #endif
     92 
     93 /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
     94 #define SQRLO_BASECASE_ALLOC						\
     95   (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
     96 
     97 /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
     98 #ifndef SQRLO_SPECIAL_CASES
     99 #define SQRLO_SPECIAL_CASES 2
    100 #endif
    101 
    102 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    103 #define MAYBE_special_cases 1
    104 #else
    105 #define MAYBE_special_cases \
    106   ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
    107 #endif
    108 
    109 void
    110 mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    111 {
    112   mp_limb_t ul;
    113 
    114   ASSERT (n >= 1);
    115   ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
    116 
    117   ul = up[0];
    118 
    119   if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
    120     {
    121 #if SQRLO_SPECIAL_CASES == 1
    122       rp[0] = (ul * ul) & GMP_NUMB_MASK;
    123 #else
    124       if (n == 1)
    125 	rp[0] = (ul * ul) & GMP_NUMB_MASK;
    126       else
    127 	{
    128 	  mp_limb_t hi, lo, ul1;
    129 	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
    130 	  rp[0] = lo >> GMP_NAIL_BITS;
    131 	  ul1 = up[1];
    132 #if SQRLO_SPECIAL_CASES == 2
    133 	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
    134 #else
    135 	  if (n == 2)
    136 	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
    137 	  else
    138 	    {
    139 	      mp_limb_t hi1;
    140 #if GMP_NAIL_BITS != 0
    141 	      ul <<= 1;
    142 #endif
    143 	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
    144 	      hi1 += ul * up[2];
    145 #if GMP_NAIL_BITS == 0
    146 	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
    147 	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
    148 #else
    149 	      hi += lo >> GMP_NAIL_BITS;
    150 	      rp[1] = hi & GMP_NUMB_MASK;
    151 	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
    152 #endif
    153 	    }
    154 #endif
    155 	}
    156 #endif
    157     }
    158   else
    159     {
    160       mp_limb_t tp[SQRLO_BASECASE_ALLOC];
    161       mp_size_t i;
    162 
    163       /* must fit n-1 limbs in tp */
    164       ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
    165 
    166       --n;
    167 #if SQRLO_SHORTCUT_MULTIPLICATIONS
    168       {
    169 	mp_limb_t cy;
    170 
    171 	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
    172 	for (i = 1; 2 * i + 1 < n; ++i)
    173 	  {
    174 	    ul = up[i];
    175 	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
    176 	  }
    177 	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
    178       }
    179 #else
    180       mpn_mul_1 (tp, up + 1, n, ul);
    181       for (i = 1; 2 * i < n; ++i)
    182 	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
    183 #endif
    184 
    185       MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
    186     }
    187 }
    188 #undef SQRLO_SPECIAL_CASES
    189 #undef MAYBE_special_cases
    190 #undef SQRLO_BASECASE_ALLOC
    191 #undef SQRLO_SHORTCUT_MULTIPLICATIONS
    192 #undef MPN_SQR_DIAGONAL
    193 #undef MPN_SQRLO_DIAGONAL
    194 #undef MPN_SQRLO_DIAG_ADDLSH1
    195