Home | History | Annotate | Line # | Download | only in generic
      1 /* mpn_powm -- Compute R = U^E mod M.
      2 
      3    Contributed to the GNU project by Torbjorn Granlund.
      4 
      5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8 
      9 Copyright 2007-2012, 2019 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 
     38 /*
     39   BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
     40 
     41   1. W <- U
     42 
     43   2. T <- (B^n * U) mod M                Convert to REDC form
     44 
     45   3. Compute table U^1, U^3, U^5... of E-dependent size
     46 
     47   4. While there are more bits in E
     48        W <- power left-to-right base-k
     49 
     50 
     51   TODO:
     52 
     53    * Make getbits a macro, thereby allowing it to update the index operand.
     54      That will simplify the code using getbits.  (Perhaps make getbits' sibling
     55      getbit then have similar form, for symmetry.)
     56 
     57    * Write an itch function.  Or perhaps get rid of tp parameter since the huge
     58      pp area is allocated locally anyway?
     59 
     60    * Choose window size without looping.  (Superoptimize or think(tm).)
     61 
     62    * Handle small bases with initial, reduction-free exponentiation.
     63 
     64    * Call new division functions, not mpn_tdiv_qr.
     65 
     66    * Consider special code for one-limb M.
     67 
     68    * How should we handle the redc1/redc2/redc_n choice?
     69      - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
     70      - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
     71      - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
     72      This disregards the addmul_N constant term, but we could think of
     73      that as part of the respective mullo.
     74 
     75    * When U (the base) is small, we should start the exponentiation with plain
     76      operations, then convert that partial result to REDC form.
     77 
     78    * When U is just one limb, should it be handled without the k-ary tricks?
     79      We could keep a factor of B^n in W, but use U' = BU as base.  After
     80      multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
     81      mod M.
     82 */
     83 
     84 #include "gmp-impl.h"
     85 #include "longlong.h"
     86 
     87 #undef MPN_REDC_0
     88 #define MPN_REDC_0(rp, up, mp, invm)					\
     89   do {									\
     90     mp_limb_t p1, r0, u0, _dummy;					\
     91     u0 = *(up);								\
     92     umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK);	\
     93     ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0);			\
     94     p1 += (u0 != 0);							\
     95     r0 = (up)[1] + p1;							\
     96     if (p1 > r0)							\
     97       r0 -= *(mp);							\
     98     *(rp) = r0;								\
     99   } while (0)
    100 
    101 #undef MPN_REDC_1
    102 #if HAVE_NATIVE_mpn_sbpi1_bdiv_r
    103 #define MPN_REDC_1(rp, up, mp, n, invm)					\
    104   do {									\
    105     mp_limb_t cy;							\
    106     cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
    107     if (cy != 0)							\
    108       mpn_sub_n (rp, up + n, mp, n);					\
    109     else								\
    110       MPN_COPY (rp, up + n, n);						\
    111   } while (0)
    112 #else
    113 #define MPN_REDC_1(rp, up, mp, n, invm)					\
    114   do {									\
    115     mp_limb_t cy;							\
    116     cy = mpn_redc_1 (rp, up, mp, n, invm);				\
    117     if (cy != 0)							\
    118       mpn_sub_n (rp, rp, mp, n);					\
    119   } while (0)
    120 #endif
    121 
    122 #undef MPN_REDC_2
    123 #define MPN_REDC_2(rp, up, mp, n, mip)					\
    124   do {									\
    125     mp_limb_t cy;							\
    126     cy = mpn_redc_2 (rp, up, mp, n, mip);				\
    127     if (cy != 0)							\
    128       mpn_sub_n (rp, rp, mp, n);					\
    129   } while (0)
    130 
    131 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
    132 #define WANT_REDC_2 1
    133 #endif
    134 
    135 #define getbit(p,bi) \
    136   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
    137 
    138 static inline mp_limb_t
    139 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
    140 {
    141   int nbits_in_r;
    142   mp_limb_t r;
    143   mp_size_t i;
    144 
    145   if (bi < nbits)
    146     {
    147       return p[0] & (((mp_limb_t) 1 << bi) - 1);
    148     }
    149   else
    150     {
    151       bi -= nbits;			/* bit index of low bit to extract */
    152       i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
    153       bi %= GMP_NUMB_BITS;		/* bit index in low word */
    154       r = p[i] >> bi;			/* extract (low) bits */
    155       nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
    156       if (nbits_in_r < nbits)		/* did we get enough bits? */
    157 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
    158       return r & (((mp_limb_t) 1 << nbits) - 1);
    159     }
    160 }
    161 
    162 static inline int
    163 win_size (mp_bitcnt_t eb)
    164 {
    165   int k;
    166   static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
    167   for (k = 1; eb > x[k]; k++)
    168     ;
    169   return k;
    170 }
    171 
    172 /* Convert U to REDC form, U_r = B^n * U mod M */
    173 static void
    174 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
    175 {
    176   mp_ptr tp, qp;
    177   TMP_DECL;
    178   TMP_MARK;
    179 
    180   TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
    181 
    182   MPN_ZERO (tp, n);
    183   MPN_COPY (tp + n, up, un);
    184   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
    185   TMP_FREE;
    186 }
    187 
    188 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
    189    Requires that mp[n-1..0] is odd.
    190    Requires that ep[en-1..0] is > 1.
    191    Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
    192 void
    193 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
    194 	  mp_srcptr ep, mp_size_t en,
    195 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
    196 {
    197   mp_limb_t ip[2], *mip;
    198   int cnt;
    199   mp_bitcnt_t ebi;
    200   int windowsize, this_windowsize;
    201   mp_limb_t expbits;
    202   mp_ptr pp, this_pp;
    203   long i;
    204   TMP_DECL;
    205 
    206   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
    207   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
    208 
    209   TMP_MARK;
    210 
    211   MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
    212 
    213 #if 0
    214   if (bn < n)
    215     {
    216       /* Do the first few exponent bits without mod reductions,
    217 	 until the result is greater than the mod argument.  */
    218       for (;;)
    219 	{
    220 	  mpn_sqr (tp, this_pp, tn);
    221 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
    222 	  if (getbit (ep, ebi) != 0)
    223 	    mpn_mul (..., tp, tn, bp, bn);
    224 	  ebi--;
    225 	}
    226     }
    227 #endif
    228 
    229   windowsize = win_size (ebi);
    230 
    231 #if WANT_REDC_2
    232   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    233     {
    234       mip = ip;
    235       binvert_limb (mip[0], mp[0]);
    236       mip[0] = -mip[0];
    237     }
    238   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    239     {
    240       mip = ip;
    241       mpn_binvert (mip, mp, 2, tp);
    242       mip[0] = -mip[0]; mip[1] = ~mip[1];
    243     }
    244 #else
    245   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    246     {
    247       mip = ip;
    248       binvert_limb (mip[0], mp[0]);
    249       mip[0] = -mip[0];
    250     }
    251 #endif
    252   else
    253     {
    254       mip = TMP_ALLOC_LIMBS (n);
    255       mpn_binvert (mip, mp, n, tp);
    256     }
    257 
    258   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
    259 
    260   this_pp = pp;
    261   redcify (this_pp, bp, bn, mp, n);
    262 
    263   /* Store b^2 at rp.  */
    264   mpn_sqr (tp, this_pp, n);
    265 #if 0
    266   if (n == 1) {
    267     MPN_REDC_0 (rp, tp, mp, mip[0]);
    268   } else
    269 #endif
    270 #if WANT_REDC_2
    271   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    272     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    273   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    274     MPN_REDC_2 (rp, tp, mp, n, mip);
    275 #else
    276   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    277     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    278 #endif
    279   else
    280     mpn_redc_n (rp, tp, mp, n, mip);
    281 
    282   /* Precompute odd powers of b and put them in the temporary area at pp.  */
    283   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
    284 #if 1
    285     if (n == 1) {
    286       umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
    287       ++this_pp ;
    288       MPN_REDC_0 (this_pp, tp, mp, mip[0]);
    289     } else
    290 #endif
    291     {
    292       mpn_mul_n (tp, this_pp, rp, n);
    293       this_pp += n;
    294 #if WANT_REDC_2
    295       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    296 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
    297       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    298 	MPN_REDC_2 (this_pp, tp, mp, n, mip);
    299 #else
    300       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    301 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
    302 #endif
    303       else
    304 	mpn_redc_n (this_pp, tp, mp, n, mip);
    305     }
    306 
    307   expbits = getbits (ep, ebi, windowsize);
    308   if (ebi < windowsize)
    309     ebi = 0;
    310   else
    311     ebi -= windowsize;
    312 
    313   count_trailing_zeros (cnt, expbits);
    314   ebi += cnt;
    315   expbits >>= cnt;
    316 
    317   MPN_COPY (rp, pp + n * (expbits >> 1), n);
    318 
    319 #define INNERLOOP							\
    320   while (ebi != 0)							\
    321     {									\
    322       while (getbit (ep, ebi) == 0)					\
    323 	{								\
    324 	  MPN_SQR (tp, rp, n);						\
    325 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
    326 	  if (--ebi == 0)						\
    327 	    goto done;							\
    328 	}								\
    329 									\
    330       /* The next bit of the exponent is 1.  Now extract the largest	\
    331 	 block of bits <= windowsize, and such that the least		\
    332 	 significant bit is 1.  */					\
    333 									\
    334       expbits = getbits (ep, ebi, windowsize);				\
    335       this_windowsize = windowsize;					\
    336       if (ebi < windowsize)						\
    337 	{								\
    338 	  this_windowsize -= windowsize - ebi;				\
    339 	  ebi = 0;							\
    340 	}								\
    341       else								\
    342         ebi -= windowsize;						\
    343 									\
    344       count_trailing_zeros (cnt, expbits);				\
    345       this_windowsize -= cnt;						\
    346       ebi += cnt;							\
    347       expbits >>= cnt;							\
    348 									\
    349       do								\
    350 	{								\
    351 	  MPN_SQR (tp, rp, n);						\
    352 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
    353 	}								\
    354       while (--this_windowsize != 0);					\
    355 									\
    356       MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
    357       MPN_REDUCE (rp, tp, mp, n, mip);					\
    358     }
    359 
    360 
    361   if (n == 1)
    362     {
    363 #undef MPN_MUL_N
    364 #undef MPN_SQR
    365 #undef MPN_REDUCE
    366 #define MPN_MUL_N(r,a,b,n)		umul_ppmm((r)[1], *(r), *(a), *(b))
    367 #define MPN_SQR(r,a,n)			umul_ppmm((r)[1], *(r), *(a), *(a))
    368 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_0(rp, tp, mp, mip[0])
    369       INNERLOOP;
    370     }
    371   else
    372 #if WANT_REDC_2
    373   if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
    374     {
    375       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    376 	{
    377 	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
    378 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    379 	    {
    380 #undef MPN_MUL_N
    381 #undef MPN_SQR
    382 #undef MPN_REDUCE
    383 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    384 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    385 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    386 	      INNERLOOP;
    387 	    }
    388 	  else
    389 	    {
    390 #undef MPN_MUL_N
    391 #undef MPN_SQR
    392 #undef MPN_REDUCE
    393 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    394 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    395 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    396 	      INNERLOOP;
    397 	    }
    398 	}
    399       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
    400 	{
    401 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
    402 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    403 	    {
    404 #undef MPN_MUL_N
    405 #undef MPN_SQR
    406 #undef MPN_REDUCE
    407 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    408 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    409 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
    410 	      INNERLOOP;
    411 	    }
    412 	  else
    413 	    {
    414 #undef MPN_MUL_N
    415 #undef MPN_SQR
    416 #undef MPN_REDUCE
    417 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    418 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    419 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
    420 	      INNERLOOP;
    421 	    }
    422 	}
    423       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    424 	{
    425 #undef MPN_MUL_N
    426 #undef MPN_SQR
    427 #undef MPN_REDUCE
    428 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    429 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    430 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
    431 	  INNERLOOP;
    432 	}
    433       else
    434 	{
    435 #undef MPN_MUL_N
    436 #undef MPN_SQR
    437 #undef MPN_REDUCE
    438 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    439 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    440 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    441 	  INNERLOOP;
    442 	}
    443     }
    444   else
    445     {
    446       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
    447 	{
    448 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
    449 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    450 	    {
    451 #undef MPN_MUL_N
    452 #undef MPN_SQR
    453 #undef MPN_REDUCE
    454 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    455 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    456 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    457 	      INNERLOOP;
    458 	    }
    459 	  else
    460 	    {
    461 #undef MPN_MUL_N
    462 #undef MPN_SQR
    463 #undef MPN_REDUCE
    464 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    465 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    466 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    467 	      INNERLOOP;
    468 	    }
    469 	}
    470       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    471 	{
    472 #undef MPN_MUL_N
    473 #undef MPN_SQR
    474 #undef MPN_REDUCE
    475 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    476 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    477 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    478 	  INNERLOOP;
    479 	}
    480       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    481 	{
    482 #undef MPN_MUL_N
    483 #undef MPN_SQR
    484 #undef MPN_REDUCE
    485 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    486 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    487 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
    488 	  INNERLOOP;
    489 	}
    490       else
    491 	{
    492 #undef MPN_MUL_N
    493 #undef MPN_SQR
    494 #undef MPN_REDUCE
    495 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    496 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    497 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    498 	  INNERLOOP;
    499 	}
    500     }
    501 
    502 #else  /* WANT_REDC_2 */
    503 
    504   if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
    505     {
    506       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    507 	{
    508 	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
    509 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    510 	    {
    511 #undef MPN_MUL_N
    512 #undef MPN_SQR
    513 #undef MPN_REDUCE
    514 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    515 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    516 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    517 	      INNERLOOP;
    518 	    }
    519 	  else
    520 	    {
    521 #undef MPN_MUL_N
    522 #undef MPN_SQR
    523 #undef MPN_REDUCE
    524 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    525 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    526 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    527 	      INNERLOOP;
    528 	    }
    529 	}
    530       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
    531 	{
    532 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
    533 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    534 	    {
    535 #undef MPN_MUL_N
    536 #undef MPN_SQR
    537 #undef MPN_REDUCE
    538 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    539 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    540 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    541 	      INNERLOOP;
    542 	    }
    543 	  else
    544 	    {
    545 #undef MPN_MUL_N
    546 #undef MPN_SQR
    547 #undef MPN_REDUCE
    548 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    549 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    550 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    551 	      INNERLOOP;
    552 	    }
    553 	}
    554       else
    555 	{
    556 #undef MPN_MUL_N
    557 #undef MPN_SQR
    558 #undef MPN_REDUCE
    559 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    560 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    561 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    562 	  INNERLOOP;
    563 	}
    564     }
    565   else
    566     {
    567       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
    568 	{
    569 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
    570 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    571 	    {
    572 #undef MPN_MUL_N
    573 #undef MPN_SQR
    574 #undef MPN_REDUCE
    575 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    576 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
    577 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    578 	      INNERLOOP;
    579 	    }
    580 	  else
    581 	    {
    582 #undef MPN_MUL_N
    583 #undef MPN_SQR
    584 #undef MPN_REDUCE
    585 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
    586 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
    587 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    588 	      INNERLOOP;
    589 	    }
    590 	}
    591       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    592 	{
    593 #undef MPN_MUL_N
    594 #undef MPN_SQR
    595 #undef MPN_REDUCE
    596 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    597 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    598 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
    599 	  INNERLOOP;
    600 	}
    601       else
    602 	{
    603 #undef MPN_MUL_N
    604 #undef MPN_SQR
    605 #undef MPN_REDUCE
    606 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
    607 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
    608 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
    609 	  INNERLOOP;
    610 	}
    611     }
    612 #endif  /* WANT_REDC_2 */
    613 
    614  done:
    615 
    616   MPN_COPY (tp, rp, n);
    617   MPN_ZERO (tp + n, n);
    618 
    619 #if WANT_REDC_2
    620   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
    621     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    622   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
    623     MPN_REDC_2 (rp, tp, mp, n, mip);
    624 #else
    625   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
    626     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    627 #endif
    628   else
    629     mpn_redc_n (rp, tp, mp, n, mip);
    630 
    631   if (mpn_cmp (rp, mp, n) >= 0)
    632     mpn_sub_n (rp, rp, mp, n);
    633 
    634   TMP_FREE;
    635 }
    636