1 /* Reference mpn functions, designed to be simple, portable and independent 2 of the normal gmp code. Speed isn't a consideration. 3 4 Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc. 5 6 This file is part of the GNU MP Library test suite. 7 8 The GNU MP Library test suite is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License as 10 published by the Free Software Foundation; either version 3 of the License, 11 or (at your option) any later version. 12 13 The GNU MP Library test suite is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 16 Public License for more details. 17 18 You should have received a copy of the GNU General Public License along with 19 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ 20 21 22 /* Most routines have assertions representing what the mpn routines are 23 supposed to accept. Many of these reference routines do sensible things 24 outside these ranges (eg. for size==0), but the assertions are present to 25 pick up bad parameters passed here that are about to be passed the same 26 to a real mpn routine being compared. */ 27 28 /* always do assertion checking */ 29 #define WANT_ASSERT 1 30 31 #include <stdio.h> /* for NULL */ 32 #include <stdlib.h> /* for malloc */ 33 34 #include "gmp-impl.h" 35 #include "longlong.h" 36 37 #include "tests.h" 38 39 40 41 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes 42 in bytes. */ 43 int 44 byte_overlap_p (const void *v_xp, mp_size_t xsize, 45 const void *v_yp, mp_size_t ysize) 46 { 47 const char *xp = (const char *) v_xp; 48 const char *yp = (const char *) v_yp; 49 50 ASSERT (xsize >= 0); 51 ASSERT (ysize >= 0); 52 53 /* no wraparounds */ 54 ASSERT (xp+xsize >= xp); 55 ASSERT (yp+ysize >= yp); 56 57 if (xp + xsize <= yp) 58 return 0; 59 60 if (yp + ysize <= xp) 61 return 0; 62 63 return 1; 64 } 65 66 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ 67 int 68 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) 69 { 70 return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES, 71 yp, ysize * GMP_LIMB_BYTES); 72 } 73 74 /* Check overlap for a routine defined to work low to high. */ 75 int 76 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 77 { 78 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); 79 } 80 81 /* Check overlap for a routine defined to work high to low. */ 82 int 83 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 84 { 85 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); 86 } 87 88 /* Check overlap for a standard routine requiring equal or separate. */ 89 int 90 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 91 { 92 return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); 93 } 94 int 95 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, 96 mp_size_t size) 97 { 98 return (refmpn_overlap_fullonly_p (dst, src1, size) 99 && refmpn_overlap_fullonly_p (dst, src2, size)); 100 } 101 102 103 mp_ptr 104 refmpn_malloc_limbs (mp_size_t size) 105 { 106 mp_ptr p; 107 ASSERT (size >= 0); 108 if (size == 0) 109 size = 1; 110 p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES)); 111 ASSERT (p != NULL); 112 return p; 113 } 114 115 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free 116 * memory allocated by refmpn_malloc_limbs_aligned. */ 117 void 118 refmpn_free_limbs (mp_ptr p) 119 { 120 free (p); 121 } 122 123 mp_ptr 124 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) 125 { 126 mp_ptr p; 127 p = refmpn_malloc_limbs (size); 128 refmpn_copyi (p, ptr, size); 129 return p; 130 } 131 132 /* malloc n limbs on a multiple of m bytes boundary */ 133 mp_ptr 134 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) 135 { 136 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); 137 } 138 139 140 void 141 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) 142 { 143 mp_size_t i; 144 ASSERT (size >= 0); 145 for (i = 0; i < size; i++) 146 ptr[i] = value; 147 } 148 149 void 150 refmpn_zero (mp_ptr ptr, mp_size_t size) 151 { 152 refmpn_fill (ptr, size, CNST_LIMB(0)); 153 } 154 155 void 156 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) 157 { 158 ASSERT (newsize >= oldsize); 159 refmpn_zero (ptr+oldsize, newsize-oldsize); 160 } 161 162 int 163 refmpn_zero_p (mp_srcptr ptr, mp_size_t size) 164 { 165 mp_size_t i; 166 for (i = 0; i < size; i++) 167 if (ptr[i] != 0) 168 return 0; 169 return 1; 170 } 171 172 mp_size_t 173 refmpn_normalize (mp_srcptr ptr, mp_size_t size) 174 { 175 ASSERT (size >= 0); 176 while (size > 0 && ptr[size-1] == 0) 177 size--; 178 return size; 179 } 180 181 /* the highest one bit in x */ 182 mp_limb_t 183 refmpn_msbone (mp_limb_t x) 184 { 185 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); 186 187 while (n != 0) 188 { 189 if (x & n) 190 break; 191 n >>= 1; 192 } 193 return n; 194 } 195 196 /* a mask of the highest one bit plus and all bits below */ 197 mp_limb_t 198 refmpn_msbone_mask (mp_limb_t x) 199 { 200 if (x == 0) 201 return 0; 202 203 return (refmpn_msbone (x) << 1) - 1; 204 } 205 206 /* How many digits in the given base will fit in a limb. 207 Notice that the product b is allowed to be equal to the limit 208 2^GMP_NUMB_BITS, this ensures the result for base==2 will be 209 GMP_NUMB_BITS (and similarly other powers of 2). */ 210 int 211 refmpn_chars_per_limb (int base) 212 { 213 mp_limb_t limit[2], b[2]; 214 int chars_per_limb; 215 216 ASSERT (base >= 2); 217 218 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ 219 limit[1] = 1; 220 b[0] = 1; /* b = 1 */ 221 b[1] = 0; 222 223 chars_per_limb = 0; 224 for (;;) 225 { 226 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) 227 break; 228 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) 229 break; 230 chars_per_limb++; 231 } 232 return chars_per_limb; 233 } 234 235 /* The biggest value base**n which fits in GMP_NUMB_BITS. */ 236 mp_limb_t 237 refmpn_big_base (int base) 238 { 239 int chars_per_limb = refmpn_chars_per_limb (base); 240 int i; 241 mp_limb_t bb; 242 243 ASSERT (base >= 2); 244 bb = 1; 245 for (i = 0; i < chars_per_limb; i++) 246 bb *= base; 247 return bb; 248 } 249 250 251 void 252 refmpn_setbit (mp_ptr ptr, unsigned long bit) 253 { 254 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); 255 } 256 257 void 258 refmpn_clrbit (mp_ptr ptr, unsigned long bit) 259 { 260 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); 261 } 262 263 #define REFMPN_TSTBIT(ptr,bit) \ 264 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) 265 266 int 267 refmpn_tstbit (mp_srcptr ptr, unsigned long bit) 268 { 269 return REFMPN_TSTBIT (ptr, bit); 270 } 271 272 unsigned long 273 refmpn_scan0 (mp_srcptr ptr, unsigned long bit) 274 { 275 while (REFMPN_TSTBIT (ptr, bit) != 0) 276 bit++; 277 return bit; 278 } 279 280 unsigned long 281 refmpn_scan1 (mp_srcptr ptr, unsigned long bit) 282 { 283 while (REFMPN_TSTBIT (ptr, bit) == 0) 284 bit++; 285 return bit; 286 } 287 288 void 289 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) 290 { 291 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 292 refmpn_copyi (rp, sp, size); 293 } 294 295 void 296 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) 297 { 298 mp_size_t i; 299 300 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 301 ASSERT (size >= 0); 302 303 for (i = 0; i < size; i++) 304 rp[i] = sp[i]; 305 } 306 307 void 308 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) 309 { 310 mp_size_t i; 311 312 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 313 ASSERT (size >= 0); 314 315 for (i = size-1; i >= 0; i--) 316 rp[i] = sp[i]; 317 } 318 319 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low 320 zeros to wsize. If x is longer, then copy just the high wsize limbs. */ 321 void 322 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) 323 { 324 ASSERT (wsize >= 0); 325 ASSERT (xsize >= 0); 326 327 /* high part of x if x bigger than w */ 328 if (xsize > wsize) 329 { 330 xp += xsize - wsize; 331 xsize = wsize; 332 } 333 334 refmpn_copy (wp + wsize-xsize, xp, xsize); 335 refmpn_zero (wp, wsize-xsize); 336 } 337 338 int 339 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 340 { 341 mp_size_t i; 342 343 ASSERT (size >= 1); 344 ASSERT_MPN (xp, size); 345 ASSERT_MPN (yp, size); 346 347 for (i = size-1; i >= 0; i--) 348 { 349 if (xp[i] > yp[i]) return 1; 350 if (xp[i] < yp[i]) return -1; 351 } 352 return 0; 353 } 354 355 int 356 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 357 { 358 if (size == 0) 359 return 0; 360 else 361 return refmpn_cmp (xp, yp, size); 362 } 363 364 int 365 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, 366 mp_srcptr yp, mp_size_t ysize) 367 { 368 int opp, cmp; 369 370 ASSERT_MPN (xp, xsize); 371 ASSERT_MPN (yp, ysize); 372 373 opp = (xsize < ysize); 374 if (opp) 375 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); 376 377 if (! refmpn_zero_p (xp+ysize, xsize-ysize)) 378 cmp = 1; 379 else 380 cmp = refmpn_cmp (xp, yp, ysize); 381 382 return (opp ? -cmp : cmp); 383 } 384 385 int 386 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 387 { 388 mp_size_t i; 389 ASSERT (size >= 0); 390 391 for (i = 0; i < size; i++) 392 if (xp[i] != yp[i]) 393 return 0; 394 return 1; 395 } 396 397 398 #define LOGOPS(operation) \ 399 { \ 400 mp_size_t i; \ 401 \ 402 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 403 ASSERT (size >= 1); \ 404 ASSERT_MPN (s1p, size); \ 405 ASSERT_MPN (s2p, size); \ 406 \ 407 for (i = 0; i < size; i++) \ 408 rp[i] = operation; \ 409 } 410 411 void 412 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 413 { 414 LOGOPS (s1p[i] & s2p[i]); 415 } 416 void 417 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 418 { 419 LOGOPS (s1p[i] & ~s2p[i]); 420 } 421 void 422 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 423 { 424 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); 425 } 426 void 427 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 428 { 429 LOGOPS (s1p[i] | s2p[i]); 430 } 431 void 432 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 433 { 434 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); 435 } 436 void 437 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 438 { 439 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); 440 } 441 void 442 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 443 { 444 LOGOPS (s1p[i] ^ s2p[i]); 445 } 446 void 447 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 448 { 449 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); 450 } 451 452 453 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */ 454 void 455 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, 456 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) 457 { 458 *dl = ml - sl; 459 *dh = mh - sh - (ml < sl); 460 } 461 462 463 /* set *w to x+y, return 0 or 1 carry */ 464 mp_limb_t 465 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 466 { 467 mp_limb_t sum, cy; 468 469 ASSERT_LIMB (x); 470 ASSERT_LIMB (y); 471 472 sum = x + y; 473 #if GMP_NAIL_BITS == 0 474 *w = sum; 475 cy = (sum < x); 476 #else 477 *w = sum & GMP_NUMB_MASK; 478 cy = (sum >> GMP_NUMB_BITS); 479 #endif 480 return cy; 481 } 482 483 /* set *w to x-y, return 0 or 1 borrow */ 484 mp_limb_t 485 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 486 { 487 mp_limb_t diff, cy; 488 489 ASSERT_LIMB (x); 490 ASSERT_LIMB (y); 491 492 diff = x - y; 493 #if GMP_NAIL_BITS == 0 494 *w = diff; 495 cy = (diff > x); 496 #else 497 *w = diff & GMP_NUMB_MASK; 498 cy = (diff >> GMP_NUMB_BITS) & 1; 499 #endif 500 return cy; 501 } 502 503 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ 504 mp_limb_t 505 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 506 { 507 mp_limb_t r; 508 509 ASSERT_LIMB (x); 510 ASSERT_LIMB (y); 511 ASSERT (c == 0 || c == 1); 512 513 r = ref_addc_limb (w, x, y); 514 return r + ref_addc_limb (w, *w, c); 515 } 516 517 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ 518 mp_limb_t 519 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 520 { 521 mp_limb_t r; 522 523 ASSERT_LIMB (x); 524 ASSERT_LIMB (y); 525 ASSERT (c == 0 || c == 1); 526 527 r = ref_subc_limb (w, x, y); 528 return r + ref_subc_limb (w, *w, c); 529 } 530 531 532 #define AORS_1(operation) \ 533 { \ 534 mp_size_t i; \ 535 \ 536 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 537 ASSERT (size >= 1); \ 538 ASSERT_MPN (sp, size); \ 539 ASSERT_LIMB (n); \ 540 \ 541 for (i = 0; i < size; i++) \ 542 n = operation (&rp[i], sp[i], n); \ 543 return n; \ 544 } 545 546 mp_limb_t 547 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 548 { 549 AORS_1 (ref_addc_limb); 550 } 551 mp_limb_t 552 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 553 { 554 AORS_1 (ref_subc_limb); 555 } 556 557 #define AORS_NC(operation) \ 558 { \ 559 mp_size_t i; \ 560 \ 561 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 562 ASSERT (carry == 0 || carry == 1); \ 563 ASSERT (size >= 1); \ 564 ASSERT_MPN (s1p, size); \ 565 ASSERT_MPN (s2p, size); \ 566 \ 567 for (i = 0; i < size; i++) \ 568 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 569 return carry; \ 570 } 571 572 mp_limb_t 573 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 574 mp_limb_t carry) 575 { 576 AORS_NC (adc); 577 } 578 mp_limb_t 579 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 580 mp_limb_t carry) 581 { 582 AORS_NC (sbb); 583 } 584 585 586 mp_limb_t 587 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 588 { 589 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 590 } 591 mp_limb_t 592 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 593 { 594 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 595 } 596 597 mp_limb_t 598 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 599 { 600 if (cnd != 0) 601 return refmpn_add_n (rp, s1p, s2p, size); 602 else 603 { 604 refmpn_copyi (rp, s1p, size); 605 return 0; 606 } 607 } 608 mp_limb_t 609 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 610 { 611 if (cnd != 0) 612 return refmpn_sub_n (rp, s1p, s2p, size); 613 else 614 { 615 refmpn_copyi (rp, s1p, size); 616 return 0; 617 } 618 } 619 620 621 #define AORS_ERR1_N(operation) \ 622 { \ 623 mp_size_t i; \ 624 mp_limb_t carry2; \ 625 \ 626 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 627 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 628 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \ 629 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \ 630 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \ 631 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \ 632 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \ 633 \ 634 ASSERT (carry == 0 || carry == 1); \ 635 ASSERT (size >= 1); \ 636 ASSERT_MPN (s1p, size); \ 637 ASSERT_MPN (s2p, size); \ 638 ASSERT_MPN (yp, size); \ 639 \ 640 ep[0] = ep[1] = CNST_LIMB(0); \ 641 \ 642 for (i = 0; i < size; i++) \ 643 { \ 644 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 645 if (carry == 1) \ 646 { \ 647 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \ 648 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 649 ASSERT (carry2 == 0); \ 650 } \ 651 } \ 652 return carry; \ 653 } 654 655 mp_limb_t 656 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 657 mp_ptr ep, mp_srcptr yp, 658 mp_size_t size, mp_limb_t carry) 659 { 660 AORS_ERR1_N (adc); 661 } 662 mp_limb_t 663 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 664 mp_ptr ep, mp_srcptr yp, 665 mp_size_t size, mp_limb_t carry) 666 { 667 AORS_ERR1_N (sbb); 668 } 669 670 671 #define AORS_ERR2_N(operation) \ 672 { \ 673 mp_size_t i; \ 674 mp_limb_t carry2; \ 675 \ 676 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 677 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 678 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 679 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 680 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \ 681 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \ 682 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \ 683 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \ 684 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \ 685 \ 686 ASSERT (carry == 0 || carry == 1); \ 687 ASSERT (size >= 1); \ 688 ASSERT_MPN (s1p, size); \ 689 ASSERT_MPN (s2p, size); \ 690 ASSERT_MPN (y1p, size); \ 691 ASSERT_MPN (y2p, size); \ 692 \ 693 ep[0] = ep[1] = CNST_LIMB(0); \ 694 ep[2] = ep[3] = CNST_LIMB(0); \ 695 \ 696 for (i = 0; i < size; i++) \ 697 { \ 698 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 699 if (carry == 1) \ 700 { \ 701 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 702 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 703 ASSERT (carry2 == 0); \ 704 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 705 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 706 ASSERT (carry2 == 0); \ 707 } \ 708 } \ 709 return carry; \ 710 } 711 712 mp_limb_t 713 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 714 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 715 mp_size_t size, mp_limb_t carry) 716 { 717 AORS_ERR2_N (adc); 718 } 719 mp_limb_t 720 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 721 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 722 mp_size_t size, mp_limb_t carry) 723 { 724 AORS_ERR2_N (sbb); 725 } 726 727 728 #define AORS_ERR3_N(operation) \ 729 { \ 730 mp_size_t i; \ 731 mp_limb_t carry2; \ 732 \ 733 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 734 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 735 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 736 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 737 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \ 738 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \ 739 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \ 740 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \ 741 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \ 742 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \ 743 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \ 744 \ 745 ASSERT (carry == 0 || carry == 1); \ 746 ASSERT (size >= 1); \ 747 ASSERT_MPN (s1p, size); \ 748 ASSERT_MPN (s2p, size); \ 749 ASSERT_MPN (y1p, size); \ 750 ASSERT_MPN (y2p, size); \ 751 ASSERT_MPN (y3p, size); \ 752 \ 753 ep[0] = ep[1] = CNST_LIMB(0); \ 754 ep[2] = ep[3] = CNST_LIMB(0); \ 755 ep[4] = ep[5] = CNST_LIMB(0); \ 756 \ 757 for (i = 0; i < size; i++) \ 758 { \ 759 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 760 if (carry == 1) \ 761 { \ 762 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 763 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 764 ASSERT (carry2 == 0); \ 765 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 766 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 767 ASSERT (carry2 == 0); \ 768 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \ 769 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \ 770 ASSERT (carry2 == 0); \ 771 } \ 772 } \ 773 return carry; \ 774 } 775 776 mp_limb_t 777 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 778 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 779 mp_size_t size, mp_limb_t carry) 780 { 781 AORS_ERR3_N (adc); 782 } 783 mp_limb_t 784 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 785 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 786 mp_size_t size, mp_limb_t carry) 787 { 788 AORS_ERR3_N (sbb); 789 } 790 791 792 mp_limb_t 793 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 794 mp_size_t n, unsigned int s) 795 { 796 mp_limb_t cy; 797 mp_ptr tp; 798 799 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 800 ASSERT (n >= 1); 801 ASSERT (0 < s && s < GMP_NUMB_BITS); 802 ASSERT_MPN (up, n); 803 ASSERT_MPN (vp, n); 804 805 tp = refmpn_malloc_limbs (n); 806 cy = refmpn_lshift (tp, vp, n, s); 807 cy += refmpn_add_n (rp, up, tp, n); 808 free (tp); 809 return cy; 810 } 811 mp_limb_t 812 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 813 { 814 return refmpn_addlsh_n (rp, up, vp, n, 1); 815 } 816 mp_limb_t 817 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 818 { 819 return refmpn_addlsh_n (rp, up, vp, n, 2); 820 } 821 mp_limb_t 822 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 823 { 824 return refmpn_addlsh_n (rp, rp, vp, n, s); 825 } 826 mp_limb_t 827 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 828 { 829 return refmpn_addlsh_n (rp, rp, vp, n, 1); 830 } 831 mp_limb_t 832 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 833 { 834 return refmpn_addlsh_n (rp, rp, vp, n, 2); 835 } 836 mp_limb_t 837 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 838 { 839 return refmpn_addlsh_n (rp, vp, rp, n, s); 840 } 841 mp_limb_t 842 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 843 { 844 return refmpn_addlsh_n (rp, vp, rp, n, 1); 845 } 846 mp_limb_t 847 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 848 { 849 return refmpn_addlsh_n (rp, vp, rp, n, 2); 850 } 851 mp_limb_t 852 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 853 mp_size_t n, unsigned int s, mp_limb_t carry) 854 { 855 mp_limb_t cy; 856 857 ASSERT (carry <= (CNST_LIMB(1) << s)); 858 859 cy = refmpn_addlsh_n (rp, up, vp, n, s); 860 cy += refmpn_add_1 (rp, rp, n, carry); 861 return cy; 862 } 863 mp_limb_t 864 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 865 { 866 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry); 867 } 868 mp_limb_t 869 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 870 { 871 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry); 872 } 873 874 mp_limb_t 875 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 876 mp_size_t n, unsigned int s) 877 { 878 mp_limb_t cy; 879 mp_ptr tp; 880 881 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 882 ASSERT (n >= 1); 883 ASSERT (0 < s && s < GMP_NUMB_BITS); 884 ASSERT_MPN (up, n); 885 ASSERT_MPN (vp, n); 886 887 tp = refmpn_malloc_limbs (n); 888 cy = mpn_lshift (tp, vp, n, s); 889 cy += mpn_sub_n (rp, up, tp, n); 890 free (tp); 891 return cy; 892 } 893 mp_limb_t 894 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 895 { 896 return refmpn_sublsh_n (rp, up, vp, n, 1); 897 } 898 mp_limb_t 899 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 900 { 901 return refmpn_sublsh_n (rp, up, vp, n, 2); 902 } 903 mp_limb_t 904 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 905 { 906 return refmpn_sublsh_n (rp, rp, vp, n, s); 907 } 908 mp_limb_t 909 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 910 { 911 return refmpn_sublsh_n (rp, rp, vp, n, 1); 912 } 913 mp_limb_t 914 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 915 { 916 return refmpn_sublsh_n (rp, rp, vp, n, 2); 917 } 918 mp_limb_t 919 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 920 { 921 return refmpn_sublsh_n (rp, vp, rp, n, s); 922 } 923 mp_limb_t 924 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 925 { 926 return refmpn_sublsh_n (rp, vp, rp, n, 1); 927 } 928 mp_limb_t 929 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 930 { 931 return refmpn_sublsh_n (rp, vp, rp, n, 2); 932 } 933 mp_limb_t 934 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 935 mp_size_t n, unsigned int s, mp_limb_t carry) 936 { 937 mp_limb_t cy; 938 939 ASSERT (carry <= (CNST_LIMB(1) << s)); 940 941 cy = refmpn_sublsh_n (rp, up, vp, n, s); 942 cy += refmpn_sub_1 (rp, rp, n, carry); 943 return cy; 944 } 945 mp_limb_t 946 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 947 { 948 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry); 949 } 950 mp_limb_t 951 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 952 { 953 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry); 954 } 955 956 mp_limb_signed_t 957 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 958 mp_size_t n, unsigned int s) 959 { 960 mp_limb_signed_t cy; 961 mp_ptr tp; 962 963 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 964 ASSERT (n >= 1); 965 ASSERT (0 < s && s < GMP_NUMB_BITS); 966 ASSERT_MPN (up, n); 967 ASSERT_MPN (vp, n); 968 969 tp = refmpn_malloc_limbs (n); 970 cy = mpn_lshift (tp, vp, n, s); 971 cy -= mpn_sub_n (rp, tp, up, n); 972 free (tp); 973 return cy; 974 } 975 mp_limb_signed_t 976 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 977 { 978 return refmpn_rsblsh_n (rp, up, vp, n, 1); 979 } 980 mp_limb_signed_t 981 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 982 { 983 return refmpn_rsblsh_n (rp, up, vp, n, 2); 984 } 985 mp_limb_signed_t 986 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 987 mp_size_t n, unsigned int s, mp_limb_signed_t carry) 988 { 989 mp_limb_signed_t cy; 990 991 ASSERT (carry == -1 || (carry >> s) == 0); 992 993 cy = refmpn_rsblsh_n (rp, up, vp, n, s); 994 if (carry > 0) 995 cy += refmpn_add_1 (rp, rp, n, carry); 996 else 997 cy -= refmpn_sub_1 (rp, rp, n, -carry); 998 return cy; 999 } 1000 mp_limb_signed_t 1001 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1002 { 1003 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry); 1004 } 1005 mp_limb_signed_t 1006 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1007 { 1008 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry); 1009 } 1010 1011 mp_limb_t 1012 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1013 { 1014 mp_limb_t cya, cys; 1015 1016 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1017 ASSERT (n >= 1); 1018 ASSERT_MPN (up, n); 1019 ASSERT_MPN (vp, n); 1020 1021 cya = mpn_add_n (rp, up, vp, n); 1022 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1023 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1024 return cys; 1025 } 1026 mp_limb_t 1027 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1028 { 1029 mp_limb_t cya, cys; 1030 1031 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1032 ASSERT (n >= 1); 1033 ASSERT_MPN (up, n); 1034 ASSERT_MPN (vp, n); 1035 1036 cya = mpn_sub_n (rp, up, vp, n); 1037 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1038 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1039 return cys; 1040 } 1041 1042 /* Twos complement, return borrow. */ 1043 mp_limb_t 1044 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) 1045 { 1046 mp_ptr zeros; 1047 mp_limb_t ret; 1048 1049 ASSERT (size >= 1); 1050 1051 zeros = refmpn_malloc_limbs (size); 1052 refmpn_fill (zeros, size, CNST_LIMB(0)); 1053 ret = refmpn_sub_n (dst, zeros, src, size); 1054 free (zeros); 1055 return ret; 1056 } 1057 1058 1059 #define AORS(aors_n, aors_1) \ 1060 { \ 1061 mp_limb_t c; \ 1062 ASSERT (s1size >= s2size); \ 1063 ASSERT (s2size >= 1); \ 1064 c = aors_n (rp, s1p, s2p, s2size); \ 1065 if (s1size-s2size != 0) \ 1066 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ 1067 return c; \ 1068 } 1069 mp_limb_t 1070 refmpn_add (mp_ptr rp, 1071 mp_srcptr s1p, mp_size_t s1size, 1072 mp_srcptr s2p, mp_size_t s2size) 1073 { 1074 AORS (refmpn_add_n, refmpn_add_1); 1075 } 1076 mp_limb_t 1077 refmpn_sub (mp_ptr rp, 1078 mp_srcptr s1p, mp_size_t s1size, 1079 mp_srcptr s2p, mp_size_t s2size) 1080 { 1081 AORS (refmpn_sub_n, refmpn_sub_1); 1082 } 1083 1084 1085 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) 1086 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) 1087 1088 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) 1089 #define HIGHMASK SHIFTHIGH(LOWMASK) 1090 1091 #define LOWPART(x) ((x) & LOWMASK) 1092 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) 1093 1094 /* Set return:*lo to x*y, using full limbs not nails. */ 1095 mp_limb_t 1096 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) 1097 { 1098 mp_limb_t hi, s; 1099 1100 *lo = LOWPART(x) * LOWPART(y); 1101 hi = HIGHPART(x) * HIGHPART(y); 1102 1103 s = LOWPART(x) * HIGHPART(y); 1104 hi += HIGHPART(s); 1105 s = SHIFTHIGH(LOWPART(s)); 1106 *lo += s; 1107 hi += (*lo < s); 1108 1109 s = HIGHPART(x) * LOWPART(y); 1110 hi += HIGHPART(s); 1111 s = SHIFTHIGH(LOWPART(s)); 1112 *lo += s; 1113 hi += (*lo < s); 1114 1115 return hi; 1116 } 1117 1118 mp_limb_t 1119 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) 1120 { 1121 return refmpn_umul_ppmm (lo, x, y); 1122 } 1123 1124 mp_limb_t 1125 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, 1126 mp_limb_t carry) 1127 { 1128 mp_size_t i; 1129 mp_limb_t hi, lo; 1130 1131 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1132 ASSERT (size >= 1); 1133 ASSERT_MPN (sp, size); 1134 ASSERT_LIMB (multiplier); 1135 ASSERT_LIMB (carry); 1136 1137 multiplier <<= GMP_NAIL_BITS; 1138 for (i = 0; i < size; i++) 1139 { 1140 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); 1141 lo >>= GMP_NAIL_BITS; 1142 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); 1143 rp[i] = lo; 1144 carry = hi; 1145 } 1146 return carry; 1147 } 1148 1149 mp_limb_t 1150 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1151 { 1152 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1153 } 1154 1155 1156 mp_limb_t 1157 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1158 mp_srcptr mult, mp_size_t msize) 1159 { 1160 mp_ptr src_copy; 1161 mp_limb_t ret; 1162 mp_size_t i; 1163 1164 ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); 1165 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1166 ASSERT (size >= msize); 1167 ASSERT_MPN (mult, msize); 1168 1169 /* in case dst==src */ 1170 src_copy = refmpn_malloc_limbs (size); 1171 refmpn_copyi (src_copy, src, size); 1172 src = src_copy; 1173 1174 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); 1175 for (i = 1; i < msize-1; i++) 1176 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1177 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1178 1179 free (src_copy); 1180 return ret; 1181 } 1182 1183 mp_limb_t 1184 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1185 { 1186 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); 1187 } 1188 mp_limb_t 1189 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1190 { 1191 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); 1192 } 1193 mp_limb_t 1194 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1195 { 1196 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); 1197 } 1198 mp_limb_t 1199 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1200 { 1201 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5); 1202 } 1203 mp_limb_t 1204 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1205 { 1206 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6); 1207 } 1208 1209 #define AORSMUL_1C(operation_n) \ 1210 { \ 1211 mp_ptr p; \ 1212 mp_limb_t ret; \ 1213 \ 1214 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 1215 \ 1216 p = refmpn_malloc_limbs (size); \ 1217 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ 1218 ret += operation_n (rp, rp, p, size); \ 1219 \ 1220 free (p); \ 1221 return ret; \ 1222 } 1223 1224 mp_limb_t 1225 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1226 mp_limb_t multiplier, mp_limb_t carry) 1227 { 1228 AORSMUL_1C (refmpn_add_n); 1229 } 1230 mp_limb_t 1231 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1232 mp_limb_t multiplier, mp_limb_t carry) 1233 { 1234 AORSMUL_1C (refmpn_sub_n); 1235 } 1236 1237 1238 mp_limb_t 1239 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1240 { 1241 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1242 } 1243 mp_limb_t 1244 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1245 { 1246 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1247 } 1248 1249 1250 mp_limb_t 1251 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1252 mp_srcptr mult, mp_size_t msize) 1253 { 1254 mp_ptr src_copy; 1255 mp_limb_t ret; 1256 mp_size_t i; 1257 1258 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); 1259 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1260 ASSERT (size >= msize); 1261 ASSERT_MPN (mult, msize); 1262 1263 /* in case dst==src */ 1264 src_copy = refmpn_malloc_limbs (size); 1265 refmpn_copyi (src_copy, src, size); 1266 src = src_copy; 1267 1268 for (i = 0; i < msize-1; i++) 1269 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1270 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1271 1272 free (src_copy); 1273 return ret; 1274 } 1275 1276 mp_limb_t 1277 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1278 { 1279 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); 1280 } 1281 mp_limb_t 1282 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1283 { 1284 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); 1285 } 1286 mp_limb_t 1287 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1288 { 1289 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); 1290 } 1291 mp_limb_t 1292 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1293 { 1294 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); 1295 } 1296 mp_limb_t 1297 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1298 { 1299 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); 1300 } 1301 mp_limb_t 1302 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1303 { 1304 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); 1305 } 1306 mp_limb_t 1307 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1308 { 1309 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); 1310 } 1311 1312 mp_limb_t 1313 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, 1314 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 1315 mp_limb_t carry) 1316 { 1317 mp_ptr p; 1318 mp_limb_t acy, scy; 1319 1320 /* Destinations can't overlap. */ 1321 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); 1322 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); 1323 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); 1324 ASSERT (size >= 1); 1325 1326 /* in case r1p==s1p or r1p==s2p */ 1327 p = refmpn_malloc_limbs (size); 1328 1329 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); 1330 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); 1331 refmpn_copyi (r1p, p, size); 1332 1333 free (p); 1334 return 2 * acy + scy; 1335 } 1336 1337 mp_limb_t 1338 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, 1339 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 1340 { 1341 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); 1342 } 1343 1344 1345 /* Right shift hi,lo and return the low limb of the result. 1346 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1347 mp_limb_t 1348 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1349 { 1350 ASSERT (shift < GMP_NUMB_BITS); 1351 if (shift == 0) 1352 return lo; 1353 else 1354 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; 1355 } 1356 1357 /* Left shift hi,lo and return the high limb of the result. 1358 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1359 mp_limb_t 1360 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1361 { 1362 ASSERT (shift < GMP_NUMB_BITS); 1363 if (shift == 0) 1364 return hi; 1365 else 1366 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; 1367 } 1368 1369 1370 mp_limb_t 1371 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1372 { 1373 mp_limb_t ret; 1374 mp_size_t i; 1375 1376 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1377 ASSERT (size >= 1); 1378 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1379 ASSERT_MPN (sp, size); 1380 1381 ret = rshift_make (sp[0], CNST_LIMB(0), shift); 1382 1383 for (i = 0; i < size-1; i++) 1384 rp[i] = rshift_make (sp[i+1], sp[i], shift); 1385 1386 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); 1387 return ret; 1388 } 1389 1390 mp_limb_t 1391 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1392 { 1393 mp_limb_t ret; 1394 mp_size_t i; 1395 1396 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1397 ASSERT (size >= 1); 1398 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1399 ASSERT_MPN (sp, size); 1400 1401 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); 1402 1403 for (i = size-2; i >= 0; i--) 1404 rp[i+1] = lshift_make (sp[i+1], sp[i], shift); 1405 1406 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); 1407 return ret; 1408 } 1409 1410 void 1411 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1412 { 1413 mp_size_t i; 1414 1415 /* We work downwards since mpn_lshiftc needs that. */ 1416 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1417 1418 for (i = size - 1; i >= 0; i--) 1419 rp[i] = (~sp[i]) & GMP_NUMB_MASK; 1420 } 1421 1422 mp_limb_t 1423 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1424 { 1425 mp_limb_t res; 1426 1427 /* No asserts here, refmpn_lshift will assert what we need. */ 1428 1429 res = refmpn_lshift (rp, sp, size, shift); 1430 refmpn_com (rp, rp, size); 1431 return res; 1432 } 1433 1434 /* accepting shift==0 and doing a plain copyi or copyd in that case */ 1435 mp_limb_t 1436 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1437 { 1438 if (shift == 0) 1439 { 1440 refmpn_copyi (rp, sp, size); 1441 return 0; 1442 } 1443 else 1444 { 1445 return refmpn_rshift (rp, sp, size, shift); 1446 } 1447 } 1448 mp_limb_t 1449 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1450 { 1451 if (shift == 0) 1452 { 1453 refmpn_copyd (rp, sp, size); 1454 return 0; 1455 } 1456 else 1457 { 1458 return refmpn_lshift (rp, sp, size, shift); 1459 } 1460 } 1461 1462 /* accepting size==0 too */ 1463 mp_limb_t 1464 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1465 unsigned shift) 1466 { 1467 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); 1468 } 1469 mp_limb_t 1470 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1471 unsigned shift) 1472 { 1473 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); 1474 } 1475 1476 /* Divide h,l by d, return quotient, store remainder to *rp. 1477 Operates on full limbs, not nails. 1478 Must have h < d. 1479 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ 1480 mp_limb_t 1481 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) 1482 { 1483 mp_limb_t q, r; 1484 int n; 1485 1486 ASSERT (d != 0); 1487 ASSERT (h < d); 1488 1489 #if 0 1490 udiv_qrnnd (q, r, h, l, d); 1491 *rp = r; 1492 return q; 1493 #endif 1494 1495 n = refmpn_count_leading_zeros (d); 1496 d <<= n; 1497 1498 if (n != 0) 1499 { 1500 h = (h << n) | (l >> (GMP_LIMB_BITS - n)); 1501 l <<= n; 1502 } 1503 1504 __udiv_qrnnd_c (q, r, h, l, d); 1505 r >>= n; 1506 *rp = r; 1507 return q; 1508 } 1509 1510 mp_limb_t 1511 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) 1512 { 1513 return refmpn_udiv_qrnnd (rp, h, l, d); 1514 } 1515 1516 /* This little subroutine avoids some bad code generation from i386 gcc 3.0 1517 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ 1518 static mp_limb_t 1519 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1520 mp_limb_t divisor, mp_limb_t carry) 1521 { 1522 mp_size_t i; 1523 mp_limb_t rem[1]; 1524 for (i = size-1; i >= 0; i--) 1525 { 1526 rp[i] = refmpn_udiv_qrnnd (rem, carry, 1527 sp[i] << GMP_NAIL_BITS, 1528 divisor << GMP_NAIL_BITS); 1529 carry = *rem >> GMP_NAIL_BITS; 1530 } 1531 return carry; 1532 } 1533 1534 mp_limb_t 1535 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1536 mp_limb_t divisor, mp_limb_t carry) 1537 { 1538 mp_ptr sp_orig; 1539 mp_ptr prod; 1540 mp_limb_t carry_orig; 1541 1542 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1543 ASSERT (size >= 0); 1544 ASSERT (carry < divisor); 1545 ASSERT_MPN (sp, size); 1546 ASSERT_LIMB (divisor); 1547 ASSERT_LIMB (carry); 1548 1549 if (size == 0) 1550 return carry; 1551 1552 sp_orig = refmpn_memdup_limbs (sp, size); 1553 prod = refmpn_malloc_limbs (size); 1554 carry_orig = carry; 1555 1556 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); 1557 1558 /* check by multiplying back */ 1559 #if 0 1560 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", 1561 size, divisor, carry_orig, carry); 1562 mpn_trace("s",sp_copy,size); 1563 mpn_trace("r",rp,size); 1564 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); 1565 mpn_trace("p",prod,size); 1566 #endif 1567 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); 1568 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); 1569 free (sp_orig); 1570 free (prod); 1571 1572 return carry; 1573 } 1574 1575 mp_limb_t 1576 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1577 { 1578 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); 1579 } 1580 1581 1582 mp_limb_t 1583 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1584 mp_limb_t carry) 1585 { 1586 mp_ptr p = refmpn_malloc_limbs (size); 1587 carry = refmpn_divmod_1c (p, sp, size, divisor, carry); 1588 free (p); 1589 return carry; 1590 } 1591 1592 mp_limb_t 1593 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1594 { 1595 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); 1596 } 1597 1598 mp_limb_t 1599 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1600 mp_limb_t inverse) 1601 { 1602 ASSERT (divisor & GMP_NUMB_HIGHBIT); 1603 ASSERT (inverse == refmpn_invert_limb (divisor)); 1604 return refmpn_mod_1 (sp, size, divisor); 1605 } 1606 1607 /* This implementation will be rather slow, but has the advantage of being 1608 in a different style than the libgmp versions. */ 1609 mp_limb_t 1610 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) 1611 { 1612 ASSERT ((GMP_NUMB_BITS % 4) == 0); 1613 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); 1614 } 1615 1616 1617 mp_limb_t 1618 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, 1619 mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1620 mp_limb_t carry) 1621 { 1622 mp_ptr z; 1623 1624 z = refmpn_malloc_limbs (xsize); 1625 refmpn_fill (z, xsize, CNST_LIMB(0)); 1626 1627 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); 1628 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); 1629 1630 free (z); 1631 return carry; 1632 } 1633 1634 mp_limb_t 1635 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, 1636 mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1637 { 1638 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); 1639 } 1640 1641 mp_limb_t 1642 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, 1643 mp_srcptr sp, mp_size_t size, 1644 mp_limb_t divisor, mp_limb_t inverse, unsigned shift) 1645 { 1646 ASSERT (size >= 0); 1647 ASSERT (shift == refmpn_count_leading_zeros (divisor)); 1648 ASSERT (inverse == refmpn_invert_limb (divisor << shift)); 1649 1650 return refmpn_divrem_1 (rp, xsize, sp, size, divisor); 1651 } 1652 1653 mp_limb_t 1654 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, 1655 mp_ptr np, mp_size_t nn, 1656 mp_srcptr dp) 1657 { 1658 mp_ptr tp; 1659 mp_limb_t qh; 1660 1661 tp = refmpn_malloc_limbs (nn + qxn); 1662 refmpn_zero (tp, qxn); 1663 refmpn_copyi (tp + qxn, np, nn); 1664 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); 1665 refmpn_copyi (np, tp, 2); 1666 free (tp); 1667 return qh; 1668 } 1669 1670 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers 1671 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d 1672 since d has the high bit set. */ 1673 1674 mp_limb_t 1675 refmpn_invert_limb (mp_limb_t d) 1676 { 1677 mp_limb_t r; 1678 ASSERT (d & GMP_LIMB_HIGHBIT); 1679 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); 1680 } 1681 1682 void 1683 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1684 { 1685 mp_ptr qp, tp; 1686 TMP_DECL; 1687 TMP_MARK; 1688 1689 tp = TMP_ALLOC_LIMBS (2 * n); 1690 qp = TMP_ALLOC_LIMBS (n + 1); 1691 1692 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); 1693 1694 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); 1695 refmpn_copyi (rp, qp, n); 1696 1697 TMP_FREE; 1698 } 1699 1700 void 1701 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1702 { 1703 mp_ptr tp; 1704 mp_limb_t binv; 1705 TMP_DECL; 1706 TMP_MARK; 1707 1708 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing 1709 code. To make up for it, we check that the inverse is correct using a 1710 multiply. */ 1711 1712 tp = TMP_ALLOC_LIMBS (2 * n); 1713 1714 MPN_ZERO (tp, n); 1715 tp[0] = 1; 1716 binvert_limb (binv, up[0]); 1717 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); 1718 1719 refmpn_mul_n (tp, rp, up, n); 1720 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); 1721 1722 TMP_FREE; 1723 } 1724 1725 /* The aim is to produce a dst quotient and return a remainder c, satisfying 1726 c*b^n + src-i == 3*dst, where i is the incoming carry. 1727 1728 Some value c==0, c==1 or c==2 will satisfy, so just try each. 1729 1730 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero 1731 remainder from the first division attempt determines the correct 1732 remainder (3-c), but don't bother with that, since we can't guarantee 1733 anything about GMP_NUMB_BITS when using nails. 1734 1735 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos 1736 complement negative, ie. b^n+a-i, and the calculation produces c1 1737 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This 1738 means it's enough to just add any borrow back at the end. 1739 1740 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in 1741 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 1742 or 1 respectively, so with 1 added the final return value is still in the 1743 prescribed range 0 to 2. */ 1744 1745 mp_limb_t 1746 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) 1747 { 1748 mp_ptr spcopy; 1749 mp_limb_t c, cs; 1750 1751 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1752 ASSERT (size >= 1); 1753 ASSERT (carry <= 2); 1754 ASSERT_MPN (sp, size); 1755 1756 spcopy = refmpn_malloc_limbs (size); 1757 cs = refmpn_sub_1 (spcopy, sp, size, carry); 1758 1759 for (c = 0; c <= 2; c++) 1760 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) 1761 goto done; 1762 ASSERT_FAIL (no value of c satisfies); 1763 1764 done: 1765 c += cs; 1766 ASSERT (c <= 2); 1767 1768 free (spcopy); 1769 return c; 1770 } 1771 1772 mp_limb_t 1773 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1774 { 1775 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); 1776 } 1777 1778 1779 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ 1780 void 1781 refmpn_mul_basecase (mp_ptr prodp, 1782 mp_srcptr up, mp_size_t usize, 1783 mp_srcptr vp, mp_size_t vsize) 1784 { 1785 mp_size_t i; 1786 1787 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1788 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1789 ASSERT (usize >= vsize); 1790 ASSERT (vsize >= 1); 1791 ASSERT_MPN (up, usize); 1792 ASSERT_MPN (vp, vsize); 1793 1794 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); 1795 for (i = 1; i < vsize; i++) 1796 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); 1797 } 1798 1799 1800 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */ 1801 void 1802 refmpn_mulmid_basecase (mp_ptr rp, 1803 mp_srcptr up, mp_size_t un, 1804 mp_srcptr vp, mp_size_t vn) 1805 { 1806 mp_limb_t cy; 1807 mp_size_t i; 1808 1809 ASSERT (un >= vn); 1810 ASSERT (vn >= 1); 1811 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un)); 1812 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn)); 1813 ASSERT_MPN (up, un); 1814 ASSERT_MPN (vp, vn); 1815 1816 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]); 1817 rp[un - vn + 2] = CNST_LIMB (0); 1818 for (i = 1; i < vn; i++) 1819 { 1820 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]); 1821 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy); 1822 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy); 1823 ASSERT (cy == 0); 1824 } 1825 } 1826 1827 void 1828 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, 1829 mp_ptr scratch) 1830 { 1831 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1832 } 1833 1834 void 1835 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1836 { 1837 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1838 off products near the middle product region boundary */ 1839 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1840 } 1841 1842 void 1843 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un, 1844 mp_srcptr vp, mp_size_t vn) 1845 { 1846 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1847 off products near the middle product region boundary */ 1848 refmpn_mulmid_basecase (rp, up, un, vp, vn); 1849 } 1850 1851 1852 1853 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) 1854 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) 1855 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD)) 1856 #if WANT_FFT 1857 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) 1858 #else 1859 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ 1860 #endif 1861 1862 void 1863 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) 1864 { 1865 mp_ptr tp, rp; 1866 mp_size_t tn; 1867 1868 if (vn < TOOM3_THRESHOLD) 1869 { 1870 /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */ 1871 if (vn != 0) 1872 refmpn_mul_basecase (wp, up, un, vp, vn); 1873 else 1874 MPN_ZERO (wp, un); 1875 return; 1876 } 1877 1878 MPN_ZERO (wp, vn); 1879 rp = refmpn_malloc_limbs (2 * vn); 1880 1881 if (vn < TOOM4_THRESHOLD) 1882 tn = mpn_toom22_mul_itch (vn, vn); 1883 else if (vn < TOOM6_THRESHOLD) 1884 tn = mpn_toom33_mul_itch (vn, vn); 1885 else if (vn < FFT_THRESHOLD) 1886 tn = mpn_toom44_mul_itch (vn, vn); 1887 else 1888 tn = mpn_toom6h_mul_itch (vn, vn); 1889 tp = refmpn_malloc_limbs (tn); 1890 1891 while (un >= vn) 1892 { 1893 if (vn < TOOM4_THRESHOLD) 1894 /* In the toom3 range, use mpn_toom22_mul. */ 1895 mpn_toom22_mul (rp, up, vn, vp, vn, tp); 1896 else if (vn < TOOM6_THRESHOLD) 1897 /* In the toom4 range, use mpn_toom33_mul. */ 1898 mpn_toom33_mul (rp, up, vn, vp, vn, tp); 1899 else if (vn < FFT_THRESHOLD) 1900 /* In the toom6 range, use mpn_toom44_mul. */ 1901 mpn_toom44_mul (rp, up, vn, vp, vn, tp); 1902 else 1903 /* For the largest operands, use mpn_toom6h_mul. */ 1904 mpn_toom6h_mul (rp, up, vn, vp, vn, tp); 1905 1906 ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn)); 1907 wp += vn; 1908 1909 up += vn; 1910 un -= vn; 1911 } 1912 1913 free (tp); 1914 1915 if (un != 0) 1916 { 1917 refmpn_mul (rp, vp, vn, up, un); 1918 ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn)); 1919 } 1920 free (rp); 1921 } 1922 1923 void 1924 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1925 { 1926 refmpn_mul (prodp, up, size, vp, size); 1927 } 1928 1929 void 1930 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1931 { 1932 mp_ptr tp = refmpn_malloc_limbs (2*size); 1933 refmpn_mul (tp, up, size, vp, size); 1934 refmpn_copyi (prodp, tp, size); 1935 free (tp); 1936 } 1937 1938 void 1939 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) 1940 { 1941 refmpn_mul (dst, src, size, src, size); 1942 } 1943 1944 void 1945 refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size) 1946 { 1947 refmpn_mullo_n (dst, src, src, size); 1948 } 1949 1950 /* Allowing usize<vsize, usize==0 or vsize==0. */ 1951 void 1952 refmpn_mul_any (mp_ptr prodp, 1953 mp_srcptr up, mp_size_t usize, 1954 mp_srcptr vp, mp_size_t vsize) 1955 { 1956 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1957 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1958 ASSERT (usize >= 0); 1959 ASSERT (vsize >= 0); 1960 ASSERT_MPN (up, usize); 1961 ASSERT_MPN (vp, vsize); 1962 1963 if (usize == 0) 1964 { 1965 refmpn_fill (prodp, vsize, CNST_LIMB(0)); 1966 return; 1967 } 1968 1969 if (vsize == 0) 1970 { 1971 refmpn_fill (prodp, usize, CNST_LIMB(0)); 1972 return; 1973 } 1974 1975 if (usize >= vsize) 1976 refmpn_mul (prodp, up, usize, vp, vsize); 1977 else 1978 refmpn_mul (prodp, vp, vsize, up, usize); 1979 } 1980 1981 1982 mp_limb_t 1983 refmpn_gcd_11 (mp_limb_t x, mp_limb_t y) 1984 { 1985 /* The non-ref function also requires input operands to be odd, but 1986 below refmpn_gcd_1 doesn't guarantee that. */ 1987 ASSERT (x > 0); 1988 ASSERT (y > 0); 1989 do 1990 { 1991 while ((x & 1) == 0) x >>= 1; 1992 while ((y & 1) == 0) y >>= 1; 1993 1994 if (x < y) 1995 MP_LIMB_T_SWAP (x, y); 1996 1997 x -= y; 1998 } 1999 while (x != 0); 2000 2001 return y; 2002 } 2003 2004 mp_double_limb_t 2005 refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0) 2006 { 2007 mp_double_limb_t g; 2008 mp_limb_t cy; 2009 ASSERT ((x0 & 1) != 0); 2010 ASSERT ((y0 & 1) != 0); 2011 2012 do 2013 { 2014 while ((x0 & 1) == 0) 2015 { 2016 x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1); 2017 x1 >>= 1; 2018 } 2019 while ((y0 & 1) == 0) 2020 { 2021 y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1); 2022 y1 >>= 1; 2023 } 2024 2025 2026 if (x1 < y1 || (x1 == y1 && x0 < y0)) 2027 { 2028 mp_limb_t t; 2029 t = x1; x1 = y1; y1 = t; 2030 t = x0; x0 = y0; y0 = t; 2031 } 2032 2033 cy = (x0 < y0); 2034 x0 -= y0; 2035 x1 -= y1 + cy; 2036 } 2037 while ((x1 | x0) != 0); 2038 2039 g.d1 = y1; 2040 g.d0 = y0; 2041 return g; 2042 } 2043 2044 mp_limb_t 2045 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) 2046 { 2047 mp_limb_t x; 2048 int twos; 2049 2050 ASSERT (y != 0); 2051 ASSERT (! refmpn_zero_p (xp, xsize)); 2052 ASSERT_MPN (xp, xsize); 2053 ASSERT_LIMB (y); 2054 2055 x = refmpn_mod_1 (xp, xsize, y); 2056 if (x == 0) 2057 return y; 2058 2059 twos = 0; 2060 while ((x & 1) == 0 && (y & 1) == 0) 2061 { 2062 x >>= 1; 2063 y >>= 1; 2064 twos++; 2065 } 2066 2067 return refmpn_gcd_11 (x, y) << twos; 2068 } 2069 2070 2071 /* Based on the full limb x, not nails. */ 2072 unsigned 2073 refmpn_count_leading_zeros (mp_limb_t x) 2074 { 2075 unsigned n = 0; 2076 2077 ASSERT (x != 0); 2078 2079 while ((x & GMP_LIMB_HIGHBIT) == 0) 2080 { 2081 x <<= 1; 2082 n++; 2083 } 2084 return n; 2085 } 2086 2087 /* Full limbs allowed, not limited to nails. */ 2088 unsigned 2089 refmpn_count_trailing_zeros (mp_limb_t x) 2090 { 2091 unsigned n = 0; 2092 2093 ASSERT (x != 0); 2094 ASSERT_LIMB (x); 2095 2096 while ((x & 1) == 0) 2097 { 2098 x >>= 1; 2099 n++; 2100 } 2101 return n; 2102 } 2103 2104 /* Strip factors of two (low zero bits) from {p,size} by right shifting. 2105 The return value is the number of twos stripped. */ 2106 mp_size_t 2107 refmpn_strip_twos (mp_ptr p, mp_size_t size) 2108 { 2109 mp_size_t limbs; 2110 unsigned shift; 2111 2112 ASSERT (size >= 1); 2113 ASSERT (! refmpn_zero_p (p, size)); 2114 ASSERT_MPN (p, size); 2115 2116 for (limbs = 0; p[0] == 0; limbs++) 2117 { 2118 refmpn_copyi (p, p+1, size-1); 2119 p[size-1] = 0; 2120 } 2121 2122 shift = refmpn_count_trailing_zeros (p[0]); 2123 if (shift) 2124 refmpn_rshift (p, p, size, shift); 2125 2126 return limbs*GMP_NUMB_BITS + shift; 2127 } 2128 2129 mp_limb_t 2130 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) 2131 { 2132 int cmp; 2133 2134 ASSERT (ysize >= 1); 2135 ASSERT (xsize >= ysize); 2136 ASSERT ((xp[0] & 1) != 0); 2137 ASSERT ((yp[0] & 1) != 0); 2138 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ 2139 ASSERT (yp[ysize-1] != 0); 2140 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); 2141 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); 2142 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); 2143 if (xsize == ysize) 2144 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); 2145 ASSERT_MPN (xp, xsize); 2146 ASSERT_MPN (yp, ysize); 2147 2148 refmpn_strip_twos (xp, xsize); 2149 MPN_NORMALIZE (xp, xsize); 2150 MPN_NORMALIZE (yp, ysize); 2151 2152 for (;;) 2153 { 2154 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); 2155 if (cmp == 0) 2156 break; 2157 if (cmp < 0) 2158 MPN_PTR_SWAP (xp,xsize, yp,ysize); 2159 2160 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); 2161 2162 refmpn_strip_twos (xp, xsize); 2163 MPN_NORMALIZE (xp, xsize); 2164 } 2165 2166 refmpn_copyi (gp, xp, xsize); 2167 return xsize; 2168 } 2169 2170 unsigned long 2171 ref_popc_limb (mp_limb_t src) 2172 { 2173 unsigned long count; 2174 int i; 2175 2176 count = 0; 2177 for (i = 0; i < GMP_LIMB_BITS; i++) 2178 { 2179 count += (src & 1); 2180 src >>= 1; 2181 } 2182 return count; 2183 } 2184 2185 unsigned long 2186 refmpn_popcount (mp_srcptr sp, mp_size_t size) 2187 { 2188 unsigned long count = 0; 2189 mp_size_t i; 2190 2191 ASSERT (size >= 0); 2192 ASSERT_MPN (sp, size); 2193 2194 for (i = 0; i < size; i++) 2195 count += ref_popc_limb (sp[i]); 2196 return count; 2197 } 2198 2199 unsigned long 2200 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 2201 { 2202 mp_ptr d; 2203 unsigned long count; 2204 2205 ASSERT (size >= 0); 2206 ASSERT_MPN (s1p, size); 2207 ASSERT_MPN (s2p, size); 2208 2209 if (size == 0) 2210 return 0; 2211 2212 d = refmpn_malloc_limbs (size); 2213 refmpn_xor_n (d, s1p, s2p, size); 2214 count = refmpn_popcount (d, size); 2215 free (d); 2216 return count; 2217 } 2218 2219 2220 /* set r to a%d */ 2221 void 2222 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) 2223 { 2224 mp_limb_t D[2]; 2225 int n; 2226 2227 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); 2228 ASSERT_MPN (a, 2); 2229 ASSERT_MPN (d, 2); 2230 2231 D[1] = d[1], D[0] = d[0]; 2232 r[1] = a[1], r[0] = a[0]; 2233 n = 0; 2234 2235 for (;;) 2236 { 2237 if (D[1] & GMP_NUMB_HIGHBIT) 2238 break; 2239 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) 2240 break; 2241 refmpn_lshift (D, D, (mp_size_t) 2, 1); 2242 n++; 2243 ASSERT (n <= GMP_NUMB_BITS); 2244 } 2245 2246 while (n >= 0) 2247 { 2248 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) 2249 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); 2250 refmpn_rshift (D, D, (mp_size_t) 2, 1); 2251 n--; 2252 } 2253 2254 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); 2255 } 2256 2257 2258 2259 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2260 particular the trial quotient is allowed to be 2 too big. */ 2261 mp_limb_t 2262 refmpn_sb_div_qr (mp_ptr qp, 2263 mp_ptr np, mp_size_t nsize, 2264 mp_srcptr dp, mp_size_t dsize) 2265 { 2266 mp_limb_t retval = 0; 2267 mp_size_t i; 2268 mp_limb_t d1 = dp[dsize-1]; 2269 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); 2270 2271 ASSERT (nsize >= dsize); 2272 /* ASSERT (dsize > 2); */ 2273 ASSERT (dsize >= 2); 2274 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); 2275 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); 2276 ASSERT_MPN (np, nsize); 2277 ASSERT_MPN (dp, dsize); 2278 2279 i = nsize-dsize; 2280 if (refmpn_cmp (np+i, dp, dsize) >= 0) 2281 { 2282 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); 2283 retval = 1; 2284 } 2285 2286 for (i--; i >= 0; i--) 2287 { 2288 mp_limb_t n0 = np[i+dsize]; 2289 mp_limb_t n1 = np[i+dsize-1]; 2290 mp_limb_t q, dummy_r; 2291 2292 ASSERT (n0 <= d1); 2293 if (n0 == d1) 2294 q = GMP_NUMB_MAX; 2295 else 2296 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, 2297 d1 << GMP_NAIL_BITS); 2298 2299 n0 -= refmpn_submul_1 (np+i, dp, dsize, q); 2300 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); 2301 if (n0) 2302 { 2303 q--; 2304 if (! refmpn_add_n (np+i, np+i, dp, dsize)) 2305 { 2306 q--; 2307 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); 2308 } 2309 } 2310 np[i+dsize] = 0; 2311 2312 qp[i] = q; 2313 } 2314 2315 /* remainder < divisor */ 2316 #if 0 /* ASSERT triggers gcc 4.2.1 bug */ 2317 ASSERT (refmpn_cmp (np, dp, dsize) < 0); 2318 #endif 2319 2320 /* multiply back to original */ 2321 { 2322 mp_ptr mp = refmpn_malloc_limbs (nsize); 2323 2324 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); 2325 if (retval) 2326 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); 2327 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); 2328 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); 2329 2330 free (mp); 2331 } 2332 2333 free (np_orig); 2334 return retval; 2335 } 2336 2337 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2338 particular the trial quotient is allowed to be 2 too big. */ 2339 void 2340 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 2341 mp_ptr np, mp_size_t nsize, 2342 mp_srcptr dp, mp_size_t dsize) 2343 { 2344 ASSERT (qxn == 0); 2345 ASSERT_MPN (np, nsize); 2346 ASSERT_MPN (dp, dsize); 2347 ASSERT (dsize > 0); 2348 ASSERT (dp[dsize-1] != 0); 2349 2350 if (dsize == 1) 2351 { 2352 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); 2353 return; 2354 } 2355 else 2356 { 2357 mp_ptr n2p = refmpn_malloc_limbs (nsize+1); 2358 mp_ptr d2p = refmpn_malloc_limbs (dsize); 2359 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; 2360 2361 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); 2362 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); 2363 2364 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); 2365 refmpn_rshift_or_copy (rp, n2p, dsize, norm); 2366 2367 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ 2368 free (n2p); 2369 free (d2p); 2370 } 2371 } 2372 2373 mp_limb_t 2374 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) 2375 { 2376 mp_size_t j; 2377 mp_limb_t cy; 2378 2379 ASSERT_MPN (up, 2*n); 2380 /* ASSERT about directed overlap rp, up */ 2381 /* ASSERT about overlap rp, mp */ 2382 /* ASSERT about overlap up, mp */ 2383 2384 for (j = n - 1; j >= 0; j--) 2385 { 2386 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); 2387 up++; 2388 } 2389 cy = mpn_add_n (rp, up, up - n, n); 2390 return cy; 2391 } 2392 2393 size_t 2394 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) 2395 { 2396 unsigned char *d; 2397 size_t dsize; 2398 2399 ASSERT (size >= 0); 2400 ASSERT (base >= 2); 2401 ASSERT (base < numberof (mp_bases)); 2402 ASSERT (size == 0 || src[size-1] != 0); 2403 ASSERT_MPN (src, size); 2404 2405 MPN_SIZEINBASE (dsize, src, size, base); 2406 ASSERT (dsize >= 1); 2407 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES)); 2408 2409 if (size == 0) 2410 { 2411 dst[0] = 0; 2412 return 1; 2413 } 2414 2415 /* don't clobber input for power of 2 bases */ 2416 if (POW2_P (base)) 2417 src = refmpn_memdup_limbs (src, size); 2418 2419 d = dst + dsize; 2420 do 2421 { 2422 d--; 2423 ASSERT (d >= dst); 2424 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); 2425 size -= (src[size-1] == 0); 2426 } 2427 while (size != 0); 2428 2429 /* Move result back and decrement dsize if we didn't generate 2430 the maximum possible digits. */ 2431 if (d != dst) 2432 { 2433 size_t i; 2434 dsize -= d - dst; 2435 for (i = 0; i < dsize; i++) 2436 dst[i] = d[i]; 2437 } 2438 2439 if (POW2_P (base)) 2440 free (src); 2441 2442 return dsize; 2443 } 2444 2445 2446 mp_limb_t 2447 ref_bswap_limb (mp_limb_t src) 2448 { 2449 mp_limb_t dst; 2450 int i; 2451 2452 dst = 0; 2453 for (i = 0; i < GMP_LIMB_BYTES; i++) 2454 { 2455 dst = (dst << 8) + (src & 0xFF); 2456 src >>= 8; 2457 } 2458 return dst; 2459 } 2460 2461 2462 /* These random functions are mostly for transitional purposes while adding 2463 nail support, since they're independent of the normal mpn routines. They 2464 can probably be removed when those normal routines are reliable, though 2465 perhaps something independent would still be useful at times. */ 2466 2467 #if GMP_LIMB_BITS == 32 2468 #define RAND_A CNST_LIMB(0x29CF535) 2469 #endif 2470 #if GMP_LIMB_BITS == 64 2471 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) 2472 #endif 2473 2474 mp_limb_t refmpn_random_seed; 2475 2476 mp_limb_t 2477 refmpn_random_half (void) 2478 { 2479 refmpn_random_seed = refmpn_random_seed * RAND_A + 1; 2480 return (refmpn_random_seed >> GMP_LIMB_BITS/2); 2481 } 2482 2483 mp_limb_t 2484 refmpn_random_limb (void) 2485 { 2486 return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) 2487 | refmpn_random_half ()) & GMP_NUMB_MASK; 2488 } 2489 2490 void 2491 refmpn_random (mp_ptr ptr, mp_size_t size) 2492 { 2493 mp_size_t i; 2494 if (GMP_NAIL_BITS == 0) 2495 { 2496 mpn_random (ptr, size); 2497 return; 2498 } 2499 2500 for (i = 0; i < size; i++) 2501 ptr[i] = refmpn_random_limb (); 2502 } 2503 2504 void 2505 refmpn_random2 (mp_ptr ptr, mp_size_t size) 2506 { 2507 mp_size_t i; 2508 mp_limb_t bit, mask, limb; 2509 int run; 2510 2511 if (GMP_NAIL_BITS == 0) 2512 { 2513 mpn_random2 (ptr, size); 2514 return; 2515 } 2516 2517 #define RUN_MODULUS 32 2518 2519 /* start with ones at a random pos in the high limb */ 2520 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); 2521 mask = 0; 2522 run = 0; 2523 2524 for (i = size-1; i >= 0; i--) 2525 { 2526 limb = 0; 2527 do 2528 { 2529 if (run == 0) 2530 { 2531 run = (refmpn_random_half () % RUN_MODULUS) + 1; 2532 mask = ~mask; 2533 } 2534 2535 limb |= (bit & mask); 2536 bit >>= 1; 2537 run--; 2538 } 2539 while (bit != 0); 2540 2541 ptr[i] = limb; 2542 bit = GMP_NUMB_HIGHBIT; 2543 } 2544 } 2545 2546 /* This is a simple bitwise algorithm working high to low across "s" and 2547 testing each time whether setting the bit would make s^2 exceed n. */ 2548 mp_size_t 2549 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) 2550 { 2551 mp_ptr tp, dp; 2552 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; 2553 unsigned ibit; 2554 long i; 2555 mp_limb_t c; 2556 2557 ASSERT (nsize >= 0); 2558 2559 /* If n==0, then s=0 and r=0. */ 2560 if (nsize == 0) 2561 return 0; 2562 2563 ASSERT (np[nsize - 1] != 0); 2564 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); 2565 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); 2566 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); 2567 2568 /* root */ 2569 ssize = (nsize+1)/2; 2570 refmpn_zero (sp, ssize); 2571 2572 /* the remainder so far */ 2573 dp = refmpn_memdup_limbs (np, nsize); 2574 dsize = nsize; 2575 2576 /* temporary */ 2577 talloc = 2*ssize + 1; 2578 tp = refmpn_malloc_limbs (talloc); 2579 2580 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) 2581 { 2582 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i 2583 is added to it */ 2584 2585 ilimbs = (i+1) / GMP_NUMB_BITS; 2586 ibit = (i+1) % GMP_NUMB_BITS; 2587 refmpn_zero (tp, ilimbs); 2588 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); 2589 tsize = ilimbs + ssize; 2590 tp[tsize] = c; 2591 tsize += (c != 0); 2592 2593 ilimbs = (2*i) / GMP_NUMB_BITS; 2594 ibit = (2*i) % GMP_NUMB_BITS; 2595 if (ilimbs + 1 > tsize) 2596 { 2597 refmpn_zero_extend (tp, tsize, ilimbs + 1); 2598 tsize = ilimbs + 1; 2599 } 2600 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, 2601 CNST_LIMB(1) << ibit); 2602 ASSERT (tsize < talloc); 2603 tp[tsize] = c; 2604 tsize += (c != 0); 2605 2606 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) 2607 { 2608 /* set this bit in s and subtract from the remainder */ 2609 refmpn_setbit (sp, i); 2610 2611 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); 2612 dsize = refmpn_normalize (dp, dsize); 2613 } 2614 } 2615 2616 if (rp == NULL) 2617 { 2618 ret = ! refmpn_zero_p (dp, dsize); 2619 } 2620 else 2621 { 2622 ASSERT (dsize == 0 || dp[dsize-1] != 0); 2623 refmpn_copy (rp, dp, dsize); 2624 ret = dsize; 2625 } 2626 2627 free (dp); 2628 free (tp); 2629 return ret; 2630 } 2631