Home | History | Annotate | Line # | Download | only in generic
sqr_basecase.c revision 1.1
      1  1.1  mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
      2  1.1  mrg    of length n.
      3  1.1  mrg 
      4  1.1  mrg    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
      5  1.1  mrg    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
      6  1.1  mrg 
      7  1.1  mrg 
      8  1.1  mrg Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
      9  1.1  mrg 2005, 2008 Free Software Foundation, Inc.
     10  1.1  mrg 
     11  1.1  mrg This file is part of the GNU MP Library.
     12  1.1  mrg 
     13  1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     15  1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     16  1.1  mrg option) any later version.
     17  1.1  mrg 
     18  1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     19  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     20  1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     21  1.1  mrg License for more details.
     22  1.1  mrg 
     23  1.1  mrg You should have received a copy of the GNU Lesser General Public License
     24  1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     25  1.1  mrg 
     26  1.1  mrg #include "gmp.h"
     27  1.1  mrg #include "gmp-impl.h"
     28  1.1  mrg #include "longlong.h"
     29  1.1  mrg 
     30  1.1  mrg 
     31  1.1  mrg #if HAVE_NATIVE_mpn_sqr_diagonal
     32  1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     33  1.1  mrg   mpn_sqr_diagonal (rp, up, n)
     34  1.1  mrg #else
     35  1.1  mrg #define MPN_SQR_DIAGONAL(rp, up, n)					\
     36  1.1  mrg   do {									\
     37  1.1  mrg     mp_size_t _i;							\
     38  1.1  mrg     for (_i = 0; _i < (n); _i++)					\
     39  1.1  mrg       {									\
     40  1.1  mrg 	mp_limb_t ul, lpl;						\
     41  1.1  mrg 	ul = (up)[_i];							\
     42  1.1  mrg 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
     43  1.1  mrg 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
     44  1.1  mrg       }									\
     45  1.1  mrg   } while (0)
     46  1.1  mrg #endif
     47  1.1  mrg 
     48  1.1  mrg 
     49  1.1  mrg #undef READY_WITH_mpn_sqr_basecase
     50  1.1  mrg 
     51  1.1  mrg 
     52  1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
     53  1.1  mrg void
     54  1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     55  1.1  mrg {
     56  1.1  mrg   mp_size_t i;
     57  1.1  mrg   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
     58  1.1  mrg   mp_ptr tp = tarr;
     59  1.1  mrg   mp_limb_t cy;
     60  1.1  mrg 
     61  1.1  mrg   /* must fit 2*n limbs in tarr */
     62  1.1  mrg   ASSERT (n <= SQR_TOOM2_THRESHOLD);
     63  1.1  mrg 
     64  1.1  mrg   if ((n & 1) != 0)
     65  1.1  mrg     {
     66  1.1  mrg       if (n == 1)
     67  1.1  mrg 	{
     68  1.1  mrg 	  mp_limb_t ul, lpl;
     69  1.1  mrg 	  ul = up[0];
     70  1.1  mrg 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
     71  1.1  mrg 	  rp[0] = lpl >> GMP_NAIL_BITS;
     72  1.1  mrg 	  return;
     73  1.1  mrg 	}
     74  1.1  mrg 
     75  1.1  mrg       MPN_ZERO (tp, n);
     76  1.1  mrg 
     77  1.1  mrg       for (i = 0; i <= n - 2; i += 2)
     78  1.1  mrg 	{
     79  1.1  mrg 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
     80  1.1  mrg 	  tp[n + i] = cy;
     81  1.1  mrg 	}
     82  1.1  mrg     }
     83  1.1  mrg   else
     84  1.1  mrg     {
     85  1.1  mrg       if (n == 2)
     86  1.1  mrg 	{
     87  1.1  mrg 	  rp[0] = 0;
     88  1.1  mrg 	  rp[1] = 0;
     89  1.1  mrg 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
     90  1.1  mrg 	  return;
     91  1.1  mrg 	}
     92  1.1  mrg 
     93  1.1  mrg       MPN_ZERO (tp, n);
     94  1.1  mrg 
     95  1.1  mrg       for (i = 0; i <= n - 4; i += 2)
     96  1.1  mrg 	{
     97  1.1  mrg 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
     98  1.1  mrg 	  tp[n + i] = cy;
     99  1.1  mrg 	}
    100  1.1  mrg       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    101  1.1  mrg       tp[2 * n - 3] = cy;
    102  1.1  mrg     }
    103  1.1  mrg 
    104  1.1  mrg   MPN_SQR_DIAGONAL (rp, up, n);
    105  1.1  mrg 
    106  1.1  mrg #if HAVE_NATIVE_mpn_addlsh1_n
    107  1.1  mrg   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    108  1.1  mrg #else
    109  1.1  mrg   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    110  1.1  mrg   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    111  1.1  mrg #endif
    112  1.1  mrg   rp[2 * n - 1] += cy;
    113  1.1  mrg }
    114  1.1  mrg #define READY_WITH_mpn_sqr_basecase
    115  1.1  mrg #endif
    116  1.1  mrg 
    117  1.1  mrg 
    118  1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
    119  1.1  mrg 
    120  1.1  mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
    121  1.1  mrg 
    122  1.1  mrg    This is tricky, since we have to let mpn_addmul_2 make some undesirable
    123  1.1  mrg    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
    124  1.1  mrg    This forces us to conditionally add or subtract the mpn_sqr_diagonal
    125  1.1  mrg    results.  Examples of the product we form:
    126  1.1  mrg 
    127  1.1  mrg    n = 4              n = 5		n = 6
    128  1.1  mrg    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
    129  1.1  mrg    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
    130  1.1  mrg 					u4 * u5
    131  1.1  mrg    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
    132  1.1  mrg    sub: u1	      sub: u1 u3	sub: u1 u3
    133  1.1  mrg */
    134  1.1  mrg 
    135  1.1  mrg void
    136  1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    137  1.1  mrg {
    138  1.1  mrg   mp_size_t i;
    139  1.1  mrg   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    140  1.1  mrg   mp_ptr tp = tarr;
    141  1.1  mrg   mp_limb_t cy;
    142  1.1  mrg 
    143  1.1  mrg   /* must fit 2*n limbs in tarr */
    144  1.1  mrg   ASSERT (n <= SQR_TOOM2_THRESHOLD);
    145  1.1  mrg 
    146  1.1  mrg   if ((n & 1) != 0)
    147  1.1  mrg     {
    148  1.1  mrg       mp_limb_t x0, x1;
    149  1.1  mrg 
    150  1.1  mrg       if (n == 1)
    151  1.1  mrg 	{
    152  1.1  mrg 	  mp_limb_t ul, lpl;
    153  1.1  mrg 	  ul = up[0];
    154  1.1  mrg 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    155  1.1  mrg 	  rp[0] = lpl >> GMP_NAIL_BITS;
    156  1.1  mrg 	  return;
    157  1.1  mrg 	}
    158  1.1  mrg 
    159  1.1  mrg       /* The code below doesn't like unnormalized operands.  Since such
    160  1.1  mrg 	 operands are unusual, handle them with a dumb recursion.  */
    161  1.1  mrg       if (up[n - 1] == 0)
    162  1.1  mrg 	{
    163  1.1  mrg 	  rp[2 * n - 2] = 0;
    164  1.1  mrg 	  rp[2 * n - 1] = 0;
    165  1.1  mrg 	  mpn_sqr_basecase (rp, up, n - 1);
    166  1.1  mrg 	  return;
    167  1.1  mrg 	}
    168  1.1  mrg 
    169  1.1  mrg       MPN_ZERO (tp, n);
    170  1.1  mrg 
    171  1.1  mrg       for (i = 0; i <= n - 2; i += 2)
    172  1.1  mrg 	{
    173  1.1  mrg 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    174  1.1  mrg 	  tp[n + i] = cy;
    175  1.1  mrg 	}
    176  1.1  mrg 
    177  1.1  mrg       MPN_SQR_DIAGONAL (rp, up, n);
    178  1.1  mrg 
    179  1.1  mrg       for (i = 2;; i += 4)
    180  1.1  mrg 	{
    181  1.1  mrg 	  x0 = rp[i + 0];
    182  1.1  mrg 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    183  1.1  mrg 	  x1 = rp[i + 1];
    184  1.1  mrg 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    185  1.1  mrg 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    186  1.1  mrg 	  if (i + 4 >= 2 * n)
    187  1.1  mrg 	    break;
    188  1.1  mrg 	  mpn_incr_u (rp + i + 4, cy);
    189  1.1  mrg 	}
    190  1.1  mrg     }
    191  1.1  mrg   else
    192  1.1  mrg     {
    193  1.1  mrg       mp_limb_t x0, x1;
    194  1.1  mrg 
    195  1.1  mrg       if (n == 2)
    196  1.1  mrg 	{
    197  1.1  mrg 	  rp[0] = 0;
    198  1.1  mrg 	  rp[1] = 0;
    199  1.1  mrg 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
    200  1.1  mrg 	  return;
    201  1.1  mrg 	}
    202  1.1  mrg 
    203  1.1  mrg       /* The code below doesn't like unnormalized operands.  Since such
    204  1.1  mrg 	 operands are unusual, handle them with a dumb recursion.  */
    205  1.1  mrg       if (up[n - 1] == 0)
    206  1.1  mrg 	{
    207  1.1  mrg 	  rp[2 * n - 2] = 0;
    208  1.1  mrg 	  rp[2 * n - 1] = 0;
    209  1.1  mrg 	  mpn_sqr_basecase (rp, up, n - 1);
    210  1.1  mrg 	  return;
    211  1.1  mrg 	}
    212  1.1  mrg 
    213  1.1  mrg       MPN_ZERO (tp, n);
    214  1.1  mrg 
    215  1.1  mrg       for (i = 0; i <= n - 4; i += 2)
    216  1.1  mrg 	{
    217  1.1  mrg 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
    218  1.1  mrg 	  tp[n + i] = cy;
    219  1.1  mrg 	}
    220  1.1  mrg       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
    221  1.1  mrg       tp[2 * n - 3] = cy;
    222  1.1  mrg 
    223  1.1  mrg       MPN_SQR_DIAGONAL (rp, up, n);
    224  1.1  mrg 
    225  1.1  mrg       for (i = 2;; i += 4)
    226  1.1  mrg 	{
    227  1.1  mrg 	  x0 = rp[i + 0];
    228  1.1  mrg 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
    229  1.1  mrg 	  x1 = rp[i + 1];
    230  1.1  mrg 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
    231  1.1  mrg 	  if (i + 6 >= 2 * n)
    232  1.1  mrg 	    break;
    233  1.1  mrg 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
    234  1.1  mrg 	  mpn_incr_u (rp + i + 4, cy);
    235  1.1  mrg 	}
    236  1.1  mrg       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
    237  1.1  mrg     }
    238  1.1  mrg 
    239  1.1  mrg #if HAVE_NATIVE_mpn_addlsh1_n
    240  1.1  mrg   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    241  1.1  mrg #else
    242  1.1  mrg   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    243  1.1  mrg   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    244  1.1  mrg #endif
    245  1.1  mrg   rp[2 * n - 1] += cy;
    246  1.1  mrg }
    247  1.1  mrg #define READY_WITH_mpn_sqr_basecase
    248  1.1  mrg #endif
    249  1.1  mrg 
    250  1.1  mrg 
    251  1.1  mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
    252  1.1  mrg 
    253  1.1  mrg /* Default mpn_sqr_basecase using mpn_addmul_1.  */
    254  1.1  mrg 
    255  1.1  mrg void
    256  1.1  mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    257  1.1  mrg {
    258  1.1  mrg   mp_size_t i;
    259  1.1  mrg 
    260  1.1  mrg   ASSERT (n >= 1);
    261  1.1  mrg   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
    262  1.1  mrg 
    263  1.1  mrg   {
    264  1.1  mrg     mp_limb_t ul, lpl;
    265  1.1  mrg     ul = up[0];
    266  1.1  mrg     umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    267  1.1  mrg     rp[0] = lpl >> GMP_NAIL_BITS;
    268  1.1  mrg   }
    269  1.1  mrg   if (n > 1)
    270  1.1  mrg     {
    271  1.1  mrg       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    272  1.1  mrg       mp_ptr tp = tarr;
    273  1.1  mrg       mp_limb_t cy;
    274  1.1  mrg 
    275  1.1  mrg       /* must fit 2*n limbs in tarr */
    276  1.1  mrg       ASSERT (n <= SQR_TOOM2_THRESHOLD);
    277  1.1  mrg 
    278  1.1  mrg       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
    279  1.1  mrg       tp[n - 1] = cy;
    280  1.1  mrg       for (i = 2; i < n; i++)
    281  1.1  mrg 	{
    282  1.1  mrg 	  mp_limb_t cy;
    283  1.1  mrg 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
    284  1.1  mrg 	  tp[n + i - 2] = cy;
    285  1.1  mrg 	}
    286  1.1  mrg       MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
    287  1.1  mrg 
    288  1.1  mrg       {
    289  1.1  mrg 	mp_limb_t cy;
    290  1.1  mrg #if HAVE_NATIVE_mpn_addlsh1_n
    291  1.1  mrg 	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
    292  1.1  mrg #else
    293  1.1  mrg 	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
    294  1.1  mrg 	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
    295  1.1  mrg #endif
    296  1.1  mrg 	rp[2 * n - 1] += cy;
    297  1.1  mrg       }
    298  1.1  mrg     }
    299  1.1  mrg }
    300  1.1  mrg #endif
    301