Home | History | Annotate | Line # | Download | only in generic
      1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
      2 
      3    Contributed to the GNU project by Niels Mller, Torbjorn Granlund and
      4    Marco Bodrato.
      5 
      6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      9 
     10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of either:
     16 
     17   * the GNU Lesser General Public License as published by the Free
     18     Software Foundation; either version 3 of the License, or (at your
     19     option) any later version.
     20 
     21 or
     22 
     23   * the GNU General Public License as published by the Free Software
     24     Foundation; either version 2 of the License, or (at your option) any
     25     later version.
     26 
     27 or both in parallel, as here.
     28 
     29 The GNU MP Library is distributed in the hope that it will be useful, but
     30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     32 for more details.
     33 
     34 You should have received copies of the GNU General Public License and the
     35 GNU Lesser General Public License along with the GNU MP Library.  If not,
     36 see https://www.gnu.org/licenses/.  */
     37 
     38 
     39 #include "gmp-impl.h"
     40 #include "longlong.h"
     41 
     42 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
     43    mod B^rn - 1, and values are semi-normalised; zero is represented
     44    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
     45    tp==rp is allowed. */
     46 void
     47 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
     48 		    mp_ptr tp)
     49 {
     50   mp_limb_t cy;
     51 
     52   ASSERT (0 < rn);
     53 
     54   mpn_mul_n (tp, ap, bp, rn);
     55   cy = mpn_add_n (rp, tp, tp + rn, rn);
     56   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
     57    * be no overflow when adding in the carry. */
     58   MPN_INCR_U (rp, rn, cy);
     59 }
     60 
     61 
     62 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
     63    semi-normalised representation, computation is mod B^rn + 1. Needs
     64    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
     65    Output is normalised. */
     66 static void
     67 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
     68 		    mp_ptr tp)
     69 {
     70   mp_limb_t cy;
     71 
     72   ASSERT (0 < rn);
     73 
     74   mpn_mul_n (tp, ap, bp, rn + 1);
     75   ASSERT (tp[2*rn+1] == 0);
     76   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
     77   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
     78   rp[rn] = 0;
     79   MPN_INCR_U (rp, rn+1, cy);
     80 }
     81 
     82 
     83 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
     84  *
     85  * The result is expected to be ZERO if and only if one of the operand
     86  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
     87  * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
     88  * combine results and obtain a natural number when one knows in
     89  * advance that the final value is less than (B^rn-1).
     90  * Moreover it should not be a problem if mulmod_bnm1 is used to
     91  * compute the full product with an+bn <= rn, because this condition
     92  * implies (B^an-1)(B^bn-1) < (B^rn-1) .
     93  *
     94  * Requires 0 < bn <= an <= rn and an + bn > rn/2
     95  * Scratch need: rn + (need for recursive call OR rn + 4). This gives
     96  *
     97  * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
     98  */
     99 void
    100 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
    101 {
    102   ASSERT (0 < bn);
    103   ASSERT (bn <= an);
    104   ASSERT (an <= rn);
    105 
    106   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
    107     {
    108       if (UNLIKELY (bn < rn))
    109 	{
    110 	  if (UNLIKELY (an + bn <= rn))
    111 	    {
    112 	      mpn_mul (rp, ap, an, bp, bn);
    113 	    }
    114 	  else
    115 	    {
    116 	      mp_limb_t cy;
    117 	      mpn_mul (tp, ap, an, bp, bn);
    118 	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
    119 	      MPN_INCR_U (rp, rn, cy);
    120 	    }
    121 	}
    122       else
    123 	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
    124     }
    125   else
    126     {
    127       mp_size_t n;
    128       mp_limb_t cy;
    129       mp_limb_t hi;
    130 
    131       n = rn >> 1;
    132 
    133       /* We need at least an + bn >= n, to be able to fit one of the
    134 	 recursive products at rp. Requiring strict inequality makes
    135 	 the code slightly simpler. If desired, we could avoid this
    136 	 restriction by initially halving rn as long as rn is even and
    137 	 an + bn <= rn/2. */
    138 
    139       ASSERT (an + bn > n);
    140 
    141       /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
    142 	 and crt together as
    143 
    144 	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
    145       */
    146 
    147 #define a0 ap
    148 #define a1 (ap + n)
    149 #define b0 bp
    150 #define b1 (bp + n)
    151 
    152 #define xp  tp	/* 2n + 2 */
    153       /* am1  maybe in {xp, n} */
    154       /* bm1  maybe in {xp + n, n} */
    155 #define sp1 (tp + 2*n + 2)
    156       /* ap1  maybe in {sp1, n + 1} */
    157       /* bp1  maybe in {sp1 + n + 1, n + 1} */
    158 
    159       {
    160 	mp_srcptr am1, bm1;
    161 	mp_size_t anm, bnm;
    162 	mp_ptr so;
    163 
    164 	bm1 = b0;
    165 	bnm = bn;
    166 	if (LIKELY (an > n))
    167 	  {
    168 	    am1 = xp;
    169 	    cy = mpn_add (xp, a0, n, a1, an - n);
    170 	    MPN_INCR_U (xp, n, cy);
    171 	    anm = n;
    172 	    so = xp + n;
    173 	    if (LIKELY (bn > n))
    174 	      {
    175 		bm1 = so;
    176 		cy = mpn_add (so, b0, n, b1, bn - n);
    177 		MPN_INCR_U (so, n, cy);
    178 		bnm = n;
    179 		so += n;
    180 	      }
    181 	  }
    182 	else
    183 	  {
    184 	    so = xp;
    185 	    am1 = a0;
    186 	    anm = an;
    187 	  }
    188 
    189 	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
    190       }
    191 
    192       {
    193 	int       k;
    194 	mp_srcptr ap1, bp1;
    195 	mp_size_t anp, bnp;
    196 
    197 	bp1 = b0;
    198 	bnp = bn;
    199 	if (LIKELY (an > n)) {
    200 	  ap1 = sp1;
    201 	  cy = mpn_sub (sp1, a0, n, a1, an - n);
    202 	  sp1[n] = 0;
    203 	  MPN_INCR_U (sp1, n + 1, cy);
    204 	  anp = n + ap1[n];
    205 	  if (LIKELY (bn > n)) {
    206 	    bp1 = sp1 + n + 1;
    207 	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
    208 	    sp1[2*n+1] = 0;
    209 	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
    210 	    bnp = n + bp1[n];
    211 	  }
    212 	} else {
    213 	  ap1 = a0;
    214 	  anp = an;
    215 	}
    216 
    217 	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
    218 	  k=0;
    219 	else
    220 	  {
    221 	    int mask;
    222 	    k = mpn_fft_best_k (n, 0);
    223 	    mask = (1<<k) - 1;
    224 	    while (n & mask) {k--; mask >>=1;};
    225 	  }
    226 	if (k >= FFT_FIRST_K)
    227 	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
    228 	else if (UNLIKELY (bp1 == b0))
    229 	  {
    230 	    ASSERT (anp + bnp <= 2*n+1);
    231 	    ASSERT (anp + bnp > n);
    232 	    ASSERT (anp >= bnp);
    233 	    mpn_mul (xp, ap1, anp, bp1, bnp);
    234 	    anp = anp + bnp - n;
    235 	    ASSERT (anp <= n || xp[2*n]==0);
    236 	    anp-= anp > n;
    237 	    cy = mpn_sub (xp, xp, n, xp + n, anp);
    238 	    xp[n] = 0;
    239 	    MPN_INCR_U (xp, n+1, cy);
    240 	  }
    241 	else
    242 	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
    243       }
    244 
    245       /* Here the CRT recomposition begins.
    246 
    247 	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
    248 	 Division by 2 is a bitwise rotation.
    249 
    250 	 Assumes xp normalised mod (B^n+1).
    251 
    252 	 The residue class [0] is represented by [B^n-1]; except when
    253 	 both input are ZERO.
    254       */
    255 
    256 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
    257 #if HAVE_NATIVE_mpn_rsh1add_nc
    258       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
    259       hi = cy << (GMP_NUMB_BITS - 1);
    260       cy = 0;
    261       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
    262 	 overflows, i.e. a further increment will not overflow again. */
    263 #else /* ! _nc */
    264       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
    265       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
    266       cy >>= 1;
    267       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
    268 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
    269 #endif
    270 #if GMP_NAIL_BITS == 0
    271       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
    272 #else
    273       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
    274       rp[n-1] ^= hi;
    275 #endif
    276 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
    277 #if HAVE_NATIVE_mpn_add_nc
    278       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
    279 #else /* ! _nc */
    280       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
    281 #endif
    282       cy += (rp[0]&1);
    283       mpn_rshift(rp, rp, n, 1);
    284       ASSERT (cy <= 2);
    285       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
    286       cy >>= 1;
    287       /* We can have cy != 0 only if hi = 0... */
    288       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
    289       rp[n-1] |= hi;
    290       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
    291 #endif
    292       ASSERT (cy <= 1);
    293       /* Next increment can not overflow, read the previous comments about cy. */
    294       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
    295       MPN_INCR_U(rp, n, cy);
    296 
    297       /* Compute the highest half:
    298 	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
    299        */
    300       if (UNLIKELY (an + bn < rn))
    301 	{
    302 	  /* Note that in this case, the only way the result can equal
    303 	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
    304 	     then the output of both the recursive calls and this CRT
    305 	     reconstruction is zero, not B^{rn} - 1. Which is good,
    306 	     since the latter representation doesn't fit in the output
    307 	     area.*/
    308 	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
    309 
    310 	  /* FIXME: This subtraction of the high parts is not really
    311 	     necessary, we do it to get the carry out, and for sanity
    312 	     checking. */
    313 	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
    314 				   xp + an + bn - n, rn - (an + bn), cy);
    315 	  ASSERT (an + bn == rn - 1 ||
    316 		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
    317 	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
    318 	  ASSERT (cy == (xp + an + bn - n)[0]);
    319 	}
    320       else
    321 	{
    322 	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
    323 	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
    324 	     DECR will affect _at most_ the lowest n limbs. */
    325 	  MPN_DECR_U (rp, 2*n, cy);
    326 	}
    327 #undef a0
    328 #undef a1
    329 #undef b0
    330 #undef b1
    331 #undef xp
    332 #undef sp1
    333     }
    334 }
    335 
    336 mp_size_t
    337 mpn_mulmod_bnm1_next_size (mp_size_t n)
    338 {
    339   mp_size_t nh;
    340 
    341   if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
    342     return n;
    343   if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
    344     return (n + (2-1)) & (-2);
    345   if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
    346     return (n + (4-1)) & (-4);
    347 
    348   nh = (n + 1) >> 1;
    349 
    350   if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
    351     return (n + (8-1)) & (-8);
    352 
    353   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
    354 }
    355