Home | History | Annotate | Line # | Download | only in generic
sqr_basecase.c revision 1.1.1.1
      1 /* mpn_sqr_basecase -- Internal routine to square a natural number
      2    of length n.
      3 
      4    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6 
      7 
      8 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
      9 2005, 2008 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of the GNU Lesser General Public License as published by
     15 the Free Software Foundation; either version 3 of the License, or (at your
     16 option) any later version.
     17 
     18 The GNU MP Library is distributed in the hope that it will be useful, but
     19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21 License for more details.
     22 
     23 You should have received a copy of the GNU Lesser General Public License
     24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25 
     26 #include "gmp.h"
     27 #include "gmp-impl.h"
     28 #include "longlong.h"
     29 
     30 
     31 #if HAVE_NATIVE_mpn_sqr_diagonal
     32 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     33   mpn_sqr_diagonal (rp, up, n)
     34 #else
     35 #define MPN_SQR_DIAGONAL(rp, up, n)					\
     36   do {									\
     37     mp_size_t _i;							\
     38     for (_i = 0; _i < (n); _i++)					\
     39       {									\
     40 	mp_limb_t ul, lpl;						\
     41 	ul = (up)[_i];							\
     42 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     43 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     44       }									\
     45   } while (0)
     46 #endif
     47 
     48 
     49 #undef READY_WITH_mpn_sqr_basecase
     50 
     51 
     52 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
     53 void
     54 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     55 {
     56   mp_size_t i;
     57   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
     58   mp_ptr tp = tarr;
     59   mp_limb_t cy;
     60 
     61   /* must fit 2*n limbs in tarr */
     62   ASSERT (n <= SQR_TOOM2_THRESHOLD);
     63 
     64   if ((n & 1) != 0)
     65     {
     66       if (n == 1)
     67 	{
     68 	  mp_limb_t ul, lpl;
     69 	  ul = up[0];
     70 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
     71 	  rp[0] = lpl >> GMP_NAIL_BITS;
     72 	  return;
     73 	}
     74 
     75       MPN_ZERO (tp, n);
     76 
     77       for (i = 0; i <= n - 2; i += 2)
     78 	{
     79 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
     80 	  tp[n + i] = cy;
     81 	}
     82     }
     83   else
     84     {
     85       if (n == 2)
     86 	{
     87 	  rp[0] = 0;
     88 	  rp[1] = 0;
     89 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
     90 	  return;
     91 	}
     92 
     93       MPN_ZERO (tp, n);
     94 
     95       for (i = 0; i <= n - 4; i += 2)
     96 	{
     97 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
     98 	  tp[n + i] = cy;
     99 	}
    100       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    101       tp[2 * n - 3] = cy;
    102     }
    103 
    104   MPN_SQR_DIAGONAL (rp, up, n);
    105 
    106 #if HAVE_NATIVE_mpn_addlsh1_n
    107   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    108 #else
    109   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    110   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    111 #endif
    112   rp[2 * n - 1] += cy;
    113 }
    114 #define READY_WITH_mpn_sqr_basecase
    115 #endif
    116 
    117 
    118 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
    119 
    120 /* mpn_sqr_basecase using plain mpn_addmul_2.
    121 
    122    This is tricky, since we have to let mpn_addmul_2 make some undesirable
    123    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
    124    This forces us to conditionally add or subtract the mpn_sqr_diagonal
    125    results.  Examples of the product we form:
    126 
    127    n = 4              n = 5		n = 6
    128    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
    129    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
    130 					u4 * u5
    131    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
    132    sub: u1	      sub: u1 u3	sub: u1 u3
    133 */
    134 
    135 void
    136 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    137 {
    138   mp_size_t i;
    139   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    140   mp_ptr tp = tarr;
    141   mp_limb_t cy;
    142 
    143   /* must fit 2*n limbs in tarr */
    144   ASSERT (n <= SQR_TOOM2_THRESHOLD);
    145 
    146   if ((n & 1) != 0)
    147     {
    148       mp_limb_t x0, x1;
    149 
    150       if (n == 1)
    151 	{
    152 	  mp_limb_t ul, lpl;
    153 	  ul = up[0];
    154 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    155 	  rp[0] = lpl >> GMP_NAIL_BITS;
    156 	  return;
    157 	}
    158 
    159       /* The code below doesn't like unnormalized operands.  Since such
    160 	 operands are unusual, handle them with a dumb recursion.  */
    161       if (up[n - 1] == 0)
    162 	{
    163 	  rp[2 * n - 2] = 0;
    164 	  rp[2 * n - 1] = 0;
    165 	  mpn_sqr_basecase (rp, up, n - 1);
    166 	  return;
    167 	}
    168 
    169       MPN_ZERO (tp, n);
    170 
    171       for (i = 0; i <= n - 2; i += 2)
    172 	{
    173 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    174 	  tp[n + i] = cy;
    175 	}
    176 
    177       MPN_SQR_DIAGONAL (rp, up, n);
    178 
    179       for (i = 2;; i += 4)
    180 	{
    181 	  x0 = rp[i + 0];
    182 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    183 	  x1 = rp[i + 1];
    184 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    185 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    186 	  if (i + 4 >= 2 * n)
    187 	    break;
    188 	  mpn_incr_u (rp + i + 4, cy);
    189 	}
    190     }
    191   else
    192     {
    193       mp_limb_t x0, x1;
    194 
    195       if (n == 2)
    196 	{
    197 	  rp[0] = 0;
    198 	  rp[1] = 0;
    199 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    200 	  return;
    201 	}
    202 
    203       /* The code below doesn't like unnormalized operands.  Since such
    204 	 operands are unusual, handle them with a dumb recursion.  */
    205       if (up[n - 1] == 0)
    206 	{
    207 	  rp[2 * n - 2] = 0;
    208 	  rp[2 * n - 1] = 0;
    209 	  mpn_sqr_basecase (rp, up, n - 1);
    210 	  return;
    211 	}
    212 
    213       MPN_ZERO (tp, n);
    214 
    215       for (i = 0; i <= n - 4; i += 2)
    216 	{
    217 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    218 	  tp[n + i] = cy;
    219 	}
    220       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    221       tp[2 * n - 3] = cy;
    222 
    223       MPN_SQR_DIAGONAL (rp, up, n);
    224 
    225       for (i = 2;; i += 4)
    226 	{
    227 	  x0 = rp[i + 0];
    228 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    229 	  x1 = rp[i + 1];
    230 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    231 	  if (i + 6 >= 2 * n)
    232 	    break;
    233 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    234 	  mpn_incr_u (rp + i + 4, cy);
    235 	}
    236       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
    237     }
    238 
    239 #if HAVE_NATIVE_mpn_addlsh1_n
    240   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    241 #else
    242   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    243   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    244 #endif
    245   rp[2 * n - 1] += cy;
    246 }
    247 #define READY_WITH_mpn_sqr_basecase
    248 #endif
    249 
    250 
    251 #if ! defined (READY_WITH_mpn_sqr_basecase)
    252 
    253 /* Default mpn_sqr_basecase using mpn_addmul_1.  */
    254 
    255 void
    256 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    257 {
    258   mp_size_t i;
    259 
    260   ASSERT (n >= 1);
    261   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
    262 
    263   {
    264     mp_limb_t ul, lpl;
    265     ul = up[0];
    266     umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    267     rp[0] = lpl >> GMP_NAIL_BITS;
    268   }
    269   if (n > 1)
    270     {
    271       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    272       mp_ptr tp = tarr;
    273       mp_limb_t cy;
    274 
    275       /* must fit 2*n limbs in tarr */
    276       ASSERT (n <= SQR_TOOM2_THRESHOLD);
    277 
    278       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
    279       tp[n - 1] = cy;
    280       for (i = 2; i < n; i++)
    281 	{
    282 	  mp_limb_t cy;
    283 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
    284 	  tp[n + i - 2] = cy;
    285 	}
    286       MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
    287 
    288       {
    289 	mp_limb_t cy;
    290 #if HAVE_NATIVE_mpn_addlsh1_n
    291 	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    292 #else
    293 	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    294 	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    295 #endif
    296 	rp[2 * n - 1] += cy;
    297       }
    298     }
    299 }
    300 #endif
    301