Home | History | Annotate | Line # | Download | only in generic
sqr_basecase.c revision 1.1.1.1.2.1
      1 /* mpn_sqr_basecase -- Internal routine to square a natural number
      2    of length n.
      3 
      4    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6 
      7 
      8 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
      9 2005, 2008, 2010, 2011 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of the GNU Lesser General Public License as published by
     15 the Free Software Foundation; either version 3 of the License, or (at your
     16 option) any later version.
     17 
     18 The GNU MP Library is distributed in the hope that it will be useful, but
     19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21 License for more details.
     22 
     23 You should have received a copy of the GNU Lesser General Public License
     24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25 
     26 #include "gmp.h"
     27 #include "gmp-impl.h"
     28 #include "longlong.h"
     29 
     30 
     31 #if HAVE_NATIVE_mpn_sqr_diagonal
     32 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     33   mpn_sqr_diagonal (rp, up, n)
     34 #else
     35 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     36   do {									\
     37     mp_size_t _i;							\
     38     for (_i = 0; _i < (n); _i++)					\
     39       {									\
     40 	mp_limb_t ul, lpl;						\
     41 	ul = (up)[_i];							\
     42 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     43 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     44       }									\
     45   } while (0)
     46 #endif
     47 
     48 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
     49 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     50   mpn_sqr_diag_addlsh1 (rp, tp, up, n)
     51 #else
     52 #if HAVE_NATIVE_mpn_addlsh1_n
     53 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     54   do {									\
     55     mp_limb_t cy;							\
     56     MPN_SQR_DIAGONAL (rp, up, n);					\
     57     cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     58     rp[2 * n - 1] += cy;						\
     59   } while (0)
     60 #else
     61 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
     62   do {									\
     63     mp_limb_t cy;							\
     64     MPN_SQR_DIAGONAL (rp, up, n);					\
     65     cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
     66     cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
     67     rp[2 * n - 1] += cy;						\
     68   } while (0)
     69 #endif
     70 #endif
     71 
     72 
     73 #undef READY_WITH_mpn_sqr_basecase
     74 
     75 
     76 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
     77 void
     78 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     79 {
     80   mp_size_t i;
     81   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
     82   mp_ptr tp = tarr;
     83   mp_limb_t cy;
     84 
     85   /* must fit 2*n limbs in tarr */
     86   ASSERT (n <= SQR_TOOM2_THRESHOLD);
     87 
     88   if ((n & 1) != 0)
     89     {
     90       if (n == 1)
     91 	{
     92 	  mp_limb_t ul, lpl;
     93 	  ul = up[0];
     94 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
     95 	  rp[0] = lpl >> GMP_NAIL_BITS;
     96 	  return;
     97 	}
     98 
     99       MPN_ZERO (tp, n);
    100 
    101       for (i = 0; i <= n - 2; i += 2)
    102 	{
    103 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    104 	  tp[n + i] = cy;
    105 	}
    106     }
    107   else
    108     {
    109       if (n == 2)
    110 	{
    111 #if HAVE_NATIVE_mpn_mul_2
    112 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    113 #else
    114 	  rp[0] = 0;
    115 	  rp[1] = 0;
    116 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    117 #endif
    118 	  return;
    119 	}
    120 
    121       MPN_ZERO (tp, n);
    122 
    123       for (i = 0; i <= n - 4; i += 2)
    124 	{
    125 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    126 	  tp[n + i] = cy;
    127 	}
    128       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    129       tp[2 * n - 3] = cy;
    130     }
    131 
    132   MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    133 }
    134 #define READY_WITH_mpn_sqr_basecase
    135 #endif
    136 
    137 
    138 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
    139 
    140 /* mpn_sqr_basecase using plain mpn_addmul_2.
    141 
    142    This is tricky, since we have to let mpn_addmul_2 make some undesirable
    143    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
    144    This forces us to conditionally add or subtract the mpn_sqr_diagonal
    145    results.  Examples of the product we form:
    146 
    147    n = 4              n = 5		n = 6
    148    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
    149    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
    150 					u4 * u5
    151    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
    152    sub: u1	      sub: u1 u3	sub: u1 u3
    153 */
    154 
    155 void
    156 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    157 {
    158   mp_size_t i;
    159   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    160   mp_ptr tp = tarr;
    161   mp_limb_t cy;
    162 
    163   /* must fit 2*n limbs in tarr */
    164   ASSERT (n <= SQR_TOOM2_THRESHOLD);
    165 
    166   if ((n & 1) != 0)
    167     {
    168       mp_limb_t x0, x1;
    169 
    170       if (n == 1)
    171 	{
    172 	  mp_limb_t ul, lpl;
    173 	  ul = up[0];
    174 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    175 	  rp[0] = lpl >> GMP_NAIL_BITS;
    176 	  return;
    177 	}
    178 
    179       /* The code below doesn't like unnormalized operands.  Since such
    180 	 operands are unusual, handle them with a dumb recursion.  */
    181       if (up[n - 1] == 0)
    182 	{
    183 	  rp[2 * n - 2] = 0;
    184 	  rp[2 * n - 1] = 0;
    185 	  mpn_sqr_basecase (rp, up, n - 1);
    186 	  return;
    187 	}
    188 
    189       MPN_ZERO (tp, n);
    190 
    191       for (i = 0; i <= n - 2; i += 2)
    192 	{
    193 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    194 	  tp[n + i] = cy;
    195 	}
    196 
    197       MPN_SQR_DIAGONAL (rp, up, n);
    198 
    199       for (i = 2;; i += 4)
    200 	{
    201 	  x0 = rp[i + 0];
    202 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    203 	  x1 = rp[i + 1];
    204 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    205 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    206 	  if (i + 4 >= 2 * n)
    207 	    break;
    208 	  mpn_incr_u (rp + i + 4, cy);
    209 	}
    210     }
    211   else
    212     {
    213       mp_limb_t x0, x1;
    214 
    215       if (n == 2)
    216 	{
    217 #if HAVE_NATIVE_mpn_mul_2
    218 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
    219 #else
    220 	  rp[0] = 0;
    221 	  rp[1] = 0;
    222 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    223 #endif
    224 	  return;
    225 	}
    226 
    227       /* The code below doesn't like unnormalized operands.  Since such
    228 	 operands are unusual, handle them with a dumb recursion.  */
    229       if (up[n - 1] == 0)
    230 	{
    231 	  rp[2 * n - 2] = 0;
    232 	  rp[2 * n - 1] = 0;
    233 	  mpn_sqr_basecase (rp, up, n - 1);
    234 	  return;
    235 	}
    236 
    237       MPN_ZERO (tp, n);
    238 
    239       for (i = 0; i <= n - 4; i += 2)
    240 	{
    241 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    242 	  tp[n + i] = cy;
    243 	}
    244       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    245       tp[2 * n - 3] = cy;
    246 
    247       MPN_SQR_DIAGONAL (rp, up, n);
    248 
    249       for (i = 2;; i += 4)
    250 	{
    251 	  x0 = rp[i + 0];
    252 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    253 	  x1 = rp[i + 1];
    254 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    255 	  if (i + 6 >= 2 * n)
    256 	    break;
    257 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    258 	  mpn_incr_u (rp + i + 4, cy);
    259 	}
    260       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
    261     }
    262 
    263 #if HAVE_NATIVE_mpn_addlsh1_n
    264   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    265 #else
    266   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    267   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    268 #endif
    269   rp[2 * n - 1] += cy;
    270 }
    271 #define READY_WITH_mpn_sqr_basecase
    272 #endif
    273 
    274 
    275 #if ! defined (READY_WITH_mpn_sqr_basecase)
    276 
    277 /* Default mpn_sqr_basecase using mpn_addmul_1.  */
    278 
    279 void
    280 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    281 {
    282   mp_size_t i;
    283 
    284   ASSERT (n >= 1);
    285   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
    286 
    287   {
    288     mp_limb_t ul, lpl;
    289     ul = up[0];
    290     umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    291     rp[0] = lpl >> GMP_NAIL_BITS;
    292   }
    293   if (n > 1)
    294     {
    295       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    296       mp_ptr tp = tarr;
    297       mp_limb_t cy;
    298 
    299       /* must fit 2*n limbs in tarr */
    300       ASSERT (n <= SQR_TOOM2_THRESHOLD);
    301 
    302       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
    303       tp[n - 1] = cy;
    304       for (i = 2; i < n; i++)
    305 	{
    306 	  mp_limb_t cy;
    307 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
    308 	  tp[n + i - 2] = cy;
    309 	}
    310 
    311       MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
    312     }
    313 }
    314 #endif
    315