Home | History | Annotate | Line # | Download | only in generic
sqr_basecase.c revision 1.1.1.3
      1 /* mpn_sqr_basecase -- Internal routine to square a natural number
      2    of length n.
      3 
      4    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6 
      7 
      8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
      9 Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 #include "gmp.h"
     38 #include "gmp-impl.h"
     39 #include "longlong.h"
     40 
     41 
     42 #if HAVE_NATIVE_mpn_sqr_diagonal
     43 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     44   mpn_sqr_diagonal (rp, up, n)
     45 #else
     46 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     47   do {									\
     48     mp_size_t _i;							\
     49     for (_i = 0; _i < (n); _i++)					\
     50       {									\
     51 	mp_limb_t ul, lpl;						\
     52 	ul = (up)[_i];							\
     53 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     54 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     55       }									\
     56   } while (0)
     57 #endif
     58 
     59 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
     60 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     61   mpn_sqr_diag_addlsh1 (rp, tp, up, n)
     62 #else
     63 #if HAVE_NATIVE_mpn_addlsh1_n
     64 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     65   do {									\
     66     mp_limb_t cy;							\
     67     MPN_SQR_DIAGONAL (rp, up, n);					\
     68     cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     69     rp[2 * n - 1] += cy;						\
     70   } while (0)
     71 #else
     72 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     73   do {									\
     74     mp_limb_t cy;							\
     75     MPN_SQR_DIAGONAL (rp, up, n);					\
     76     cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
     77     cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     78     rp[2 * n - 1] += cy;						\
     79   } while (0)
     80 #endif
     81 #endif
     82 
     83 
     84 #undef READY_WITH_mpn_sqr_basecase
     85 
     86 
     87 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
     88 void
     89 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     90 {
     91   mp_size_t i;
     92   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
     93   mp_ptr tp = tarr;
     94   mp_limb_t cy;
     95 
     96   /* must fit 2*n limbs in tarr */
     97   ASSERT (n <= SQR_TOOM2_THRESHOLD);
     98 
     99   if ((n & 1) != 0)
    100     {
    101       if (n == 1)
    102 	{
    103 	  mp_limb_t ul, lpl;
    104 	  ul = up[0];
    105 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    106 	  rp[0] = lpl >> GMP_NAIL_BITS;
    107 	  return;
    108 	}
    109 
    110       MPN_ZERO (tp, n);
    111 
    112       for (i = 0; i <= n - 2; i += 2)
    113 	{
    114 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    115 	  tp[n + i] = cy;
    116 	}
    117     }
    118   else
    119     {
    120       if (n == 2)
    121 	{
    122 #if HAVE_NATIVE_mpn_mul_2
    123 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    124 #else
    125 	  rp[0] = 0;
    126 	  rp[1] = 0;
    127 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    128 #endif
    129 	  return;
    130 	}
    131 
    132       MPN_ZERO (tp, n);
    133 
    134       for (i = 0; i <= n - 4; i += 2)
    135 	{
    136 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    137 	  tp[n + i] = cy;
    138 	}
    139       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    140       tp[2 * n - 3] = cy;
    141     }
    142 
    143   MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    144 }
    145 #define READY_WITH_mpn_sqr_basecase
    146 #endif
    147 
    148 
    149 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
    150 
    151 /* mpn_sqr_basecase using plain mpn_addmul_2.
    152 
    153    This is tricky, since we have to let mpn_addmul_2 make some undesirable
    154    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
    155    This forces us to conditionally add or subtract the mpn_sqr_diagonal
    156    results.  Examples of the product we form:
    157 
    158    n = 4              n = 5		n = 6
    159    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
    160    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
    161 					u4 * u5
    162    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
    163    sub: u1	      sub: u1 u3	sub: u1 u3
    164 */
    165 
    166 void
    167 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    168 {
    169   mp_size_t i;
    170   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    171   mp_ptr tp = tarr;
    172   mp_limb_t cy;
    173 
    174   /* must fit 2*n limbs in tarr */
    175   ASSERT (n <= SQR_TOOM2_THRESHOLD);
    176 
    177   if ((n & 1) != 0)
    178     {
    179       mp_limb_t x0, x1;
    180 
    181       if (n == 1)
    182 	{
    183 	  mp_limb_t ul, lpl;
    184 	  ul = up[0];
    185 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    186 	  rp[0] = lpl >> GMP_NAIL_BITS;
    187 	  return;
    188 	}
    189 
    190       /* The code below doesn't like unnormalized operands.  Since such
    191 	 operands are unusual, handle them with a dumb recursion.  */
    192       if (up[n - 1] == 0)
    193 	{
    194 	  rp[2 * n - 2] = 0;
    195 	  rp[2 * n - 1] = 0;
    196 	  mpn_sqr_basecase (rp, up, n - 1);
    197 	  return;
    198 	}
    199 
    200       MPN_ZERO (tp, n);
    201 
    202       for (i = 0; i <= n - 2; i += 2)
    203 	{
    204 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    205 	  tp[n + i] = cy;
    206 	}
    207 
    208       MPN_SQR_DIAGONAL (rp, up, n);
    209 
    210       for (i = 2;; i += 4)
    211 	{
    212 	  x0 = rp[i + 0];
    213 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    214 	  x1 = rp[i + 1];
    215 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    216 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    217 	  if (i + 4 >= 2 * n)
    218 	    break;
    219 	  mpn_incr_u (rp + i + 4, cy);
    220 	}
    221     }
    222   else
    223     {
    224       mp_limb_t x0, x1;
    225 
    226       if (n == 2)
    227 	{
    228 #if HAVE_NATIVE_mpn_mul_2
    229 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    230 #else
    231 	  rp[0] = 0;
    232 	  rp[1] = 0;
    233 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    234 #endif
    235 	  return;
    236 	}
    237 
    238       /* The code below doesn't like unnormalized operands.  Since such
    239 	 operands are unusual, handle them with a dumb recursion.  */
    240       if (up[n - 1] == 0)
    241 	{
    242 	  rp[2 * n - 2] = 0;
    243 	  rp[2 * n - 1] = 0;
    244 	  mpn_sqr_basecase (rp, up, n - 1);
    245 	  return;
    246 	}
    247 
    248       MPN_ZERO (tp, n);
    249 
    250       for (i = 0; i <= n - 4; i += 2)
    251 	{
    252 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    253 	  tp[n + i] = cy;
    254 	}
    255       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    256       tp[2 * n - 3] = cy;
    257 
    258       MPN_SQR_DIAGONAL (rp, up, n);
    259 
    260       for (i = 2;; i += 4)
    261 	{
    262 	  x0 = rp[i + 0];
    263 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    264 	  x1 = rp[i + 1];
    265 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    266 	  if (i + 6 >= 2 * n)
    267 	    break;
    268 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    269 	  mpn_incr_u (rp + i + 4, cy);
    270 	}
    271       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
    272     }
    273 
    274 #if HAVE_NATIVE_mpn_addlsh1_n
    275   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    276 #else
    277   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    278   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    279 #endif
    280   rp[2 * n - 1] += cy;
    281 }
    282 #define READY_WITH_mpn_sqr_basecase
    283 #endif
    284 
    285 
    286 #if ! defined (READY_WITH_mpn_sqr_basecase)
    287 
    288 /* Default mpn_sqr_basecase using mpn_addmul_1.  */
    289 
    290 void
    291 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    292 {
    293   mp_size_t i;
    294 
    295   ASSERT (n >= 1);
    296   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
    297 
    298   {
    299     mp_limb_t ul, lpl;
    300     ul = up[0];
    301     umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    302     rp[0] = lpl >> GMP_NAIL_BITS;
    303   }
    304   if (n > 1)
    305     {
    306       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    307       mp_ptr tp = tarr;
    308       mp_limb_t cy;
    309 
    310       /* must fit 2*n limbs in tarr */
    311       ASSERT (n <= SQR_TOOM2_THRESHOLD);
    312 
    313       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
    314       tp[n - 1] = cy;
    315       for (i = 2; i < n; i++)
    316 	{
    317 	  mp_limb_t cy;
    318 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
    319 	  tp[n + i - 2] = cy;
    320 	}
    321 
    322       MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    323     }
    324 }
    325 #endif
    326