try.c revision 1.1 1 /* Run some tests on various mpn routines.
2
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6 Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
7 Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23
24
25 /* Usage: try [options] <function>...
26
27 For example, "./try mpn_add_n" to run tests of that function.
28
29 Combinations of alignments and overlaps are tested, with redzones above
30 or below the destinations, and with the sources write-protected.
31
32 The number of tests performed becomes ridiculously large with all the
33 combinations, and for that reason this can't be a part of a "make check",
34 it's meant only for development. The code isn't very pretty either.
35
36 During development it can help to disable the redzones, since seeing the
37 rest of the destination written can show where the wrong part is, or if
38 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
39 fill (see below) will show locations never written.
40
41 The -s option can be used to test only certain size operands, which is
42 useful if some new code doesn't yet support say sizes less than the
43 unrolling, or whatever.
44
45 When a problem occurs it'll of course be necessary to run the program
46 under gdb to find out quite where, how and why it's going wrong. Disable
47 the spinner with the -W option when doing this, or single stepping won't
48 work. Using the "-1" option to run with simple data can be useful.
49
50 New functions to test can be added in try_array[]. If a new TYPE is
51 required then add it to the existing constants, set up its parameters in
52 param_init(), and add it to the call() function. Extra parameter fields
53 can be added if necessary, or further interpretations given to existing
54 fields.
55
56
57 Portability:
58
59 This program is not designed for use on Cray vector systems under Unicos,
60 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
61 don't really have pages or mprotect. We could arrange to run the tests
62 without the redzones, but we haven't bothered currently.
63
64
65 Enhancements:
66
67 umul_ppmm support is not very good, lots of source data is generated
68 whereas only two limbs are needed.
69
70 Make a little scheme for interpreting the "SIZE" selections uniformly.
71
72 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
73 source limbs. Possibly increase the default repetitions in that case.
74
75 Automatically detect gdb and disable the spinner (use -W for now).
76
77 Make a way to re-run a failing case in the debugger. Have an option to
78 snapshot each test case before it's run so the data is available if a
79 segv occurs. (This should be more reliable than the current print_all()
80 in the signal handler.)
81
82 When alignment means a dst isn't hard against the redzone, check the
83 space in between remains unchanged.
84
85 When a source overlaps a destination, don't run both s[i].high 0 and 1,
86 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
87
88 When partial overlaps aren't done, don't loop over source alignments
89 during overlaps.
90
91 Try to make the looping code a bit less horrible. Right now it's pretty
92 hard to see what iterations are actually done.
93
94 Perhaps specific setups and loops for each style of function under test
95 would be clearer than a parameterized general loop. There's lots of
96 stuff common to all functions, but the exceptions get messy.
97
98 When there's no overlap, run with both src>dst and src<dst. A subtle
99 calling-conventions violation occurred in a P6 copy which depended on the
100 relative location of src and dst.
101
102 multiplier_N is more or less a third source region for the addmul_N
103 routines, and could be done with the redzoned region scheme.
104
105 */
106
107
108 /* always do assertion checking */
109 #define WANT_ASSERT 1
110
111 #include "config.h"
112
113 #include <errno.h>
114 #include <limits.h>
115 #include <signal.h>
116 #include <stdio.h>
117 #include <stdlib.h>
118 #include <string.h>
119 #include <time.h>
120
121 #if HAVE_UNISTD_H
122 #include <unistd.h>
123 #endif
124
125 #if HAVE_SYS_MMAN_H
126 #include <sys/mman.h>
127 #endif
128
129 #include "gmp.h"
130 #include "gmp-impl.h"
131 #include "longlong.h"
132 #include "tests.h"
133
134
135 #if !HAVE_DECL_OPTARG
136 extern char *optarg;
137 extern int optind, opterr;
138 #endif
139
140 #if ! HAVE_DECL_SYS_NERR
141 extern int sys_nerr;
142 #endif
143
144 #if ! HAVE_DECL_SYS_ERRLIST
145 extern char *sys_errlist[];
146 #endif
147
148 #if ! HAVE_STRERROR
149 char *
150 strerror (int n)
151 {
152 if (n < 0 || n >= sys_nerr)
153 return "errno out of range";
154 else
155 return sys_errlist[n];
156 }
157 #endif
158
159 /* Rumour has it some systems lack a define of PROT_NONE. */
160 #ifndef PROT_NONE
161 #define PROT_NONE 0
162 #endif
163
164 /* Dummy defines for when mprotect doesn't exist. */
165 #ifndef PROT_READ
166 #define PROT_READ 0
167 #endif
168 #ifndef PROT_WRITE
169 #define PROT_WRITE 0
170 #endif
171
172 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
173 _SC_PAGE_SIZE instead. */
174 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
175 #define _SC_PAGESIZE _SC_PAGE_SIZE
176 #endif
177
178
179 #ifdef EXTRA_PROTOS
180 EXTRA_PROTOS
181 #endif
182 #ifdef EXTRA_PROTOS2
183 EXTRA_PROTOS2
184 #endif
185
186
187 #define DEFAULT_REPETITIONS 10
188
189 int option_repetitions = DEFAULT_REPETITIONS;
190 int option_spinner = 1;
191 int option_redzones = 1;
192 int option_firstsize = 0;
193 int option_lastsize = 500;
194 int option_firstsize2 = 0;
195
196 #define ALIGNMENTS 4
197 #define OVERLAPS 4
198 #define CARRY_RANDOMS 5
199 #define MULTIPLIER_RANDOMS 5
200 #define DIVISOR_RANDOMS 5
201 #define FRACTION_COUNT 4
202
203 int option_print = 0;
204
205 #define DATA_TRAND 0
206 #define DATA_ZEROS 1
207 #define DATA_SEQ 2
208 #define DATA_FFS 3
209 #define DATA_2FD 4
210 int option_data = DATA_TRAND;
211
212
213 mp_size_t pagesize;
214 #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
215
216 /* must be a multiple of the page size */
217 #define REDZONE_BYTES (pagesize * 16)
218 #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
219
220
221 #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
222
223 #if GMP_LIMB_BITS == 32
224 #define DEADVAL CNST_LIMB(0xDEADBEEF)
225 #else
226 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
227 #endif
228
229
230 struct region_t {
231 mp_ptr ptr;
232 mp_size_t size;
233 };
234
235
236 #define TRAP_NOWHERE 0
237 #define TRAP_REF 1
238 #define TRAP_FUN 2
239 #define TRAP_SETUPS 3
240 int trap_location = TRAP_NOWHERE;
241
242
243 #define NUM_SOURCES 2
244 #define NUM_DESTS 2
245
246 struct source_t {
247 struct region_t region;
248 int high;
249 mp_size_t align;
250 mp_ptr p;
251 };
252
253 struct source_t s[NUM_SOURCES];
254
255 struct dest_t {
256 int high;
257 mp_size_t align;
258 mp_size_t size;
259 };
260
261 struct dest_t d[NUM_DESTS];
262
263 struct source_each_t {
264 mp_ptr p;
265 };
266
267 struct dest_each_t {
268 struct region_t region;
269 mp_ptr p;
270 };
271
272 mp_size_t size;
273 mp_size_t size2;
274 unsigned long shift;
275 mp_limb_t carry;
276 mp_limb_t divisor;
277 mp_limb_t multiplier;
278 mp_limb_t multiplier_N[8];
279
280 struct each_t {
281 const char *name;
282 struct dest_each_t d[NUM_DESTS];
283 struct source_each_t s[NUM_SOURCES];
284 mp_limb_t retval;
285 };
286
287 struct each_t ref = { "Ref" };
288 struct each_t fun = { "Fun" };
289
290 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
291
292 void validate_fail __GMP_PROTO ((void));
293
294
295 #if HAVE_TRY_NEW_C
296 #include "try-new.c"
297 #endif
298
299
300 typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
301
302 struct try_t {
303 char retval;
304
305 char src[2];
306 char dst[2];
307
308 #define SIZE_YES 1
309 #define SIZE_ALLOW_ZERO 2
310 #define SIZE_1 3 /* 1 limb */
311 #define SIZE_2 4 /* 2 limbs */
312 #define SIZE_3 5 /* 3 limbs */
313 #define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
314 #define SIZE_SIZE2 7
315 #define SIZE_PLUS_1 8
316 #define SIZE_SUM 9
317 #define SIZE_DIFF 10
318 #define SIZE_DIFF_PLUS_1 11
319 #define SIZE_RETVAL 12
320 #define SIZE_CEIL_HALF 13
321 #define SIZE_GET_STR 14
322 #define SIZE_PLUS_MSIZE_SUB_1 15 /* size+msize-1 */
323 char size;
324 char size2;
325 char dst_size[2];
326
327 /* multiplier_N size in limbs */
328 mp_size_t msize;
329
330 char dst_bytes[2];
331
332 char dst0_from_src1;
333
334 #define CARRY_BIT 1 /* single bit 0 or 1 */
335 #define CARRY_3 2 /* 0, 1, 2 */
336 #define CARRY_4 3 /* 0 to 3 */
337 #define CARRY_LIMB 4 /* any limb value */
338 #define CARRY_DIVISOR 5 /* carry<divisor */
339 char carry;
340
341 /* a fudge to tell the output when to print negatives */
342 char carry_sign;
343
344 char multiplier;
345 char shift;
346
347 #define DIVISOR_LIMB 1
348 #define DIVISOR_NORM 2
349 #define DIVISOR_ODD 3
350 char divisor;
351
352 #define DATA_NON_ZERO 1
353 #define DATA_GCD 2
354 #define DATA_SRC0_ODD 3
355 #define DATA_SRC0_HIGHBIT 4
356 #define DATA_SRC1_ODD 5
357 #define DATA_SRC1_HIGHBIT 6
358 #define DATA_MULTIPLE_DIVISOR 7
359 #define DATA_UDIV_QRNND 8
360 char data;
361
362 /* Default is allow full overlap. */
363 #define OVERLAP_NONE 1
364 #define OVERLAP_LOW_TO_HIGH 2
365 #define OVERLAP_HIGH_TO_LOW 3
366 #define OVERLAP_NOT_SRCS 4
367 #define OVERLAP_NOT_SRC2 8
368 char overlap;
369
370 tryfun_t reference;
371 const char *reference_name;
372
373 void (*validate) __GMP_PROTO ((void));
374 const char *validate_name;
375 };
376
377 struct try_t *tr;
378
379
380 void
381 validate_mod_34lsub1 (void)
382 {
383 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
384
385 mp_srcptr ptr = s[0].p;
386 int error = 0;
387 mp_limb_t got, got_mod, want, want_mod;
388
389 ASSERT (size >= 1);
390
391 got = fun.retval;
392 got_mod = got % CNST_34LSUB1;
393
394 want = refmpn_mod_34lsub1 (ptr, size);
395 want_mod = want % CNST_34LSUB1;
396
397 if (got_mod != want_mod)
398 {
399 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
400 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
401 error = 1;
402 }
403
404 if (error)
405 validate_fail ();
406 }
407
408 void
409 validate_divexact_1 (void)
410 {
411 mp_srcptr src = s[0].p;
412 mp_srcptr dst = fun.d[0].p;
413 int error = 0;
414
415 ASSERT (size >= 1);
416
417 {
418 mp_ptr tp = refmpn_malloc_limbs (size);
419 mp_limb_t rem;
420
421 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
422 if (rem != 0)
423 {
424 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
425 error = 1;
426 }
427 if (! refmpn_equal_anynail (tp, dst, size))
428 {
429 printf ("Quotient a/d wrong\n");
430 mpn_trace ("fun ", dst, size);
431 mpn_trace ("want", tp, size);
432 error = 1;
433 }
434 free (tp);
435 }
436
437 if (error)
438 validate_fail ();
439 }
440
441
442 void
443 validate_modexact_1c_odd (void)
444 {
445 mp_srcptr ptr = s[0].p;
446 mp_limb_t r = fun.retval;
447 int error = 0;
448
449 ASSERT (size >= 1);
450 ASSERT (divisor & 1);
451
452 if ((r & GMP_NAIL_MASK) != 0)
453 printf ("r has non-zero nail\n");
454
455 if (carry < divisor)
456 {
457 if (! (r < divisor))
458 {
459 printf ("Don't have r < divisor\n");
460 error = 1;
461 }
462 }
463 else /* carry >= divisor */
464 {
465 if (! (r <= divisor))
466 {
467 printf ("Don't have r <= divisor\n");
468 error = 1;
469 }
470 }
471
472 {
473 mp_limb_t c = carry % divisor;
474 mp_ptr tp = refmpn_malloc_limbs (size+1);
475 mp_size_t k;
476
477 for (k = size-1; k <= size; k++)
478 {
479 /* set {tp,size+1} to r*b^k + a - c */
480 refmpn_copyi (tp, ptr, size);
481 tp[size] = 0;
482 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
483 if (refmpn_sub_1 (tp, tp, size+1, c))
484 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
485
486 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
487 goto good_remainder;
488 }
489 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
490 error = 1;
491
492 good_remainder:
493 free (tp);
494 }
495
496 if (error)
497 validate_fail ();
498 }
499
500 void
501 validate_modexact_1_odd (void)
502 {
503 carry = 0;
504 validate_modexact_1c_odd ();
505 }
506
507
508 void
509 validate_sqrtrem (void)
510 {
511 mp_srcptr orig_ptr = s[0].p;
512 mp_size_t orig_size = size;
513 mp_size_t root_size = (size+1)/2;
514 mp_srcptr root_ptr = fun.d[0].p;
515 mp_size_t rem_size = fun.retval;
516 mp_srcptr rem_ptr = fun.d[1].p;
517 mp_size_t prod_size = 2*root_size;
518 mp_ptr p;
519 int error = 0;
520
521 if (rem_size < 0 || rem_size > size)
522 {
523 printf ("Bad remainder size retval %ld\n", (long) rem_size);
524 validate_fail ();
525 }
526
527 p = refmpn_malloc_limbs (prod_size);
528
529 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
530 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
531 {
532 printf ("Remainder bigger than 2*root\n");
533 error = 1;
534 }
535
536 refmpn_sqr (p, root_ptr, root_size);
537 if (rem_size != 0)
538 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
539 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
540 {
541 printf ("root^2+rem != original\n");
542 mpn_trace ("prod", p, prod_size);
543 error = 1;
544 }
545 free (p);
546
547 if (error)
548 validate_fail ();
549 }
550
551
552 /* These types are indexes into the param[] array and are arbitrary so long
553 as they're all distinct and within the size of param[]. Renumber
554 whenever necessary or desired. */
555
556 #define TYPE_ADD 1
557 #define TYPE_ADD_N 2
558 #define TYPE_ADD_NC 3
559 #define TYPE_SUB 4
560 #define TYPE_SUB_N 5
561 #define TYPE_SUB_NC 6
562
563 #define TYPE_MUL_1 7
564 #define TYPE_MUL_1C 8
565
566 #define TYPE_MUL_2 9
567 #define TYPE_MUL_3 92
568 #define TYPE_MUL_4 93
569
570 #define TYPE_ADDMUL_1 10
571 #define TYPE_ADDMUL_1C 11
572 #define TYPE_SUBMUL_1 12
573 #define TYPE_SUBMUL_1C 13
574
575 #define TYPE_ADDMUL_2 14
576 #define TYPE_ADDMUL_3 15
577 #define TYPE_ADDMUL_4 16
578 #define TYPE_ADDMUL_5 17
579 #define TYPE_ADDMUL_6 18
580 #define TYPE_ADDMUL_7 19
581 #define TYPE_ADDMUL_8 20
582
583 #define TYPE_ADDSUB_N 21
584 #define TYPE_ADDSUB_NC 22
585
586 #define TYPE_RSHIFT 23
587 #define TYPE_LSHIFT 24
588 #define TYPE_LSHIFTC 25
589
590 #define TYPE_COPY 26
591 #define TYPE_COPYI 27
592 #define TYPE_COPYD 28
593 #define TYPE_COM 29
594
595 #define TYPE_ADDLSH1_N 30
596 #define TYPE_ADDLSH2_N 48
597 #define TYPE_ADDLSH_N 49
598 #define TYPE_SUBLSH1_N 31
599 #define TYPE_SUBLSH_N 130
600 #define TYPE_RSBLSH1_N 34
601 #define TYPE_RSBLSH2_N 46
602 #define TYPE_RSBLSH_N 47
603 #define TYPE_RSH1ADD_N 32
604 #define TYPE_RSH1SUB_N 33
605
606 #define TYPE_MOD_1 35
607 #define TYPE_MOD_1C 36
608 #define TYPE_DIVMOD_1 37
609 #define TYPE_DIVMOD_1C 38
610 #define TYPE_DIVREM_1 39
611 #define TYPE_DIVREM_1C 40
612 #define TYPE_PREINV_DIVREM_1 41
613 #define TYPE_PREINV_MOD_1 42
614 #define TYPE_MOD_34LSUB1 43
615 #define TYPE_UDIV_QRNND 44
616 #define TYPE_UDIV_QRNND_R 45
617
618 #define TYPE_DIVEXACT_1 50
619 #define TYPE_DIVEXACT_BY3 51
620 #define TYPE_DIVEXACT_BY3C 52
621 #define TYPE_MODEXACT_1_ODD 53
622 #define TYPE_MODEXACT_1C_ODD 54
623
624 #define TYPE_INVERT 55
625 #define TYPE_BINVERT 56
626
627 #define TYPE_GCD 60
628 #define TYPE_GCD_1 61
629 #define TYPE_GCD_FINDA 62
630 #define TYPE_MPZ_JACOBI 63
631 #define TYPE_MPZ_KRONECKER 64
632 #define TYPE_MPZ_KRONECKER_UI 65
633 #define TYPE_MPZ_KRONECKER_SI 66
634 #define TYPE_MPZ_UI_KRONECKER 67
635 #define TYPE_MPZ_SI_KRONECKER 68
636
637 #define TYPE_AND_N 70
638 #define TYPE_NAND_N 71
639 #define TYPE_ANDN_N 72
640 #define TYPE_IOR_N 73
641 #define TYPE_IORN_N 74
642 #define TYPE_NIOR_N 75
643 #define TYPE_XOR_N 76
644 #define TYPE_XNOR_N 77
645
646 #define TYPE_MUL_MN 80
647 #define TYPE_MUL_N 81
648 #define TYPE_SQR 82
649 #define TYPE_UMUL_PPMM 83
650 #define TYPE_UMUL_PPMM_R 84
651 #define TYPE_MULLO_N 85
652
653 #define TYPE_SBPI1_DIV_QR 90
654 #define TYPE_TDIV_QR 91
655
656 #define TYPE_SQRTREM 100
657 #define TYPE_ZERO 101
658 #define TYPE_GET_STR 102
659 #define TYPE_POPCOUNT 103
660 #define TYPE_HAMDIST 104
661
662 #define TYPE_EXTRA 110
663
664 struct try_t param[150];
665
666
667 void
668 param_init (void)
669 {
670 struct try_t *p;
671
672 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
673
674 #if HAVE_STRINGIZE
675 #define REFERENCE(fun) \
676 p->reference = (tryfun_t) fun; \
677 p->reference_name = #fun
678 #define VALIDATE(fun) \
679 p->validate = fun; \
680 p->validate_name = #fun
681 #else
682 #define REFERENCE(fun) \
683 p->reference = (tryfun_t) fun; \
684 p->reference_name = "fun"
685 #define VALIDATE(fun) \
686 p->validate = fun; \
687 p->validate_name = "fun"
688 #endif
689
690
691 p = ¶m[TYPE_ADD_N];
692 p->retval = 1;
693 p->dst[0] = 1;
694 p->src[0] = 1;
695 p->src[1] = 1;
696 REFERENCE (refmpn_add_n);
697
698 p = ¶m[TYPE_ADD_NC];
699 COPY (TYPE_ADD_N);
700 p->carry = CARRY_BIT;
701 REFERENCE (refmpn_add_nc);
702
703 p = ¶m[TYPE_SUB_N];
704 COPY (TYPE_ADD_N);
705 REFERENCE (refmpn_sub_n);
706
707 p = ¶m[TYPE_SUB_NC];
708 COPY (TYPE_ADD_NC);
709 REFERENCE (refmpn_sub_nc);
710
711 p = ¶m[TYPE_ADD];
712 COPY (TYPE_ADD_N);
713 p->size = SIZE_ALLOW_ZERO;
714 p->size2 = 1;
715 REFERENCE (refmpn_add);
716
717 p = ¶m[TYPE_SUB];
718 COPY (TYPE_ADD);
719 REFERENCE (refmpn_sub);
720
721
722 p = ¶m[TYPE_MUL_1];
723 p->retval = 1;
724 p->dst[0] = 1;
725 p->src[0] = 1;
726 p->multiplier = 1;
727 p->overlap = OVERLAP_LOW_TO_HIGH;
728 REFERENCE (refmpn_mul_1);
729
730 p = ¶m[TYPE_MUL_1C];
731 COPY (TYPE_MUL_1);
732 p->carry = CARRY_LIMB;
733 REFERENCE (refmpn_mul_1c);
734
735
736 p = ¶m[TYPE_MUL_2];
737 p->retval = 1;
738 p->dst[0] = 1;
739 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
740 p->src[0] = 1;
741 p->src[1] = 1;
742 p->msize = 2;
743 p->overlap = OVERLAP_NOT_SRC2;
744 REFERENCE (refmpn_mul_2);
745
746 p = ¶m[TYPE_MUL_3];
747 COPY (TYPE_MUL_2);
748 p->msize = 3;
749 REFERENCE (refmpn_mul_3);
750
751 p = ¶m[TYPE_MUL_4];
752 COPY (TYPE_MUL_2);
753 p->msize = 4;
754 REFERENCE (refmpn_mul_4);
755
756
757 p = ¶m[TYPE_ADDMUL_1];
758 p->retval = 1;
759 p->dst[0] = 1;
760 p->src[0] = 1;
761 p->multiplier = 1;
762 p->dst0_from_src1 = 1;
763 REFERENCE (refmpn_addmul_1);
764
765 p = ¶m[TYPE_ADDMUL_1C];
766 COPY (TYPE_ADDMUL_1);
767 p->carry = CARRY_LIMB;
768 REFERENCE (refmpn_addmul_1c);
769
770 p = ¶m[TYPE_SUBMUL_1];
771 COPY (TYPE_ADDMUL_1);
772 REFERENCE (refmpn_submul_1);
773
774 p = ¶m[TYPE_SUBMUL_1C];
775 COPY (TYPE_ADDMUL_1C);
776 REFERENCE (refmpn_submul_1c);
777
778
779 p = ¶m[TYPE_ADDMUL_2];
780 p->retval = 1;
781 p->dst[0] = 1;
782 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
783 p->src[0] = 1;
784 p->src[1] = 1;
785 p->msize = 2;
786 p->dst0_from_src1 = 1;
787 p->overlap = OVERLAP_NOT_SRC2;
788 REFERENCE (refmpn_addmul_2);
789
790 p = ¶m[TYPE_ADDMUL_3];
791 COPY (TYPE_ADDMUL_2);
792 p->msize = 3;
793 REFERENCE (refmpn_addmul_3);
794
795 p = ¶m[TYPE_ADDMUL_4];
796 COPY (TYPE_ADDMUL_2);
797 p->msize = 4;
798 REFERENCE (refmpn_addmul_4);
799
800 p = ¶m[TYPE_ADDMUL_5];
801 COPY (TYPE_ADDMUL_2);
802 p->msize = 5;
803 REFERENCE (refmpn_addmul_5);
804
805 p = ¶m[TYPE_ADDMUL_6];
806 COPY (TYPE_ADDMUL_2);
807 p->msize = 6;
808 REFERENCE (refmpn_addmul_6);
809
810 p = ¶m[TYPE_ADDMUL_7];
811 COPY (TYPE_ADDMUL_2);
812 p->msize = 7;
813 REFERENCE (refmpn_addmul_7);
814
815 p = ¶m[TYPE_ADDMUL_8];
816 COPY (TYPE_ADDMUL_2);
817 p->msize = 8;
818 REFERENCE (refmpn_addmul_8);
819
820
821 p = ¶m[TYPE_AND_N];
822 p->dst[0] = 1;
823 p->src[0] = 1;
824 p->src[1] = 1;
825 REFERENCE (refmpn_and_n);
826
827 p = ¶m[TYPE_ANDN_N];
828 COPY (TYPE_AND_N);
829 REFERENCE (refmpn_andn_n);
830
831 p = ¶m[TYPE_NAND_N];
832 COPY (TYPE_AND_N);
833 REFERENCE (refmpn_nand_n);
834
835 p = ¶m[TYPE_IOR_N];
836 COPY (TYPE_AND_N);
837 REFERENCE (refmpn_ior_n);
838
839 p = ¶m[TYPE_IORN_N];
840 COPY (TYPE_AND_N);
841 REFERENCE (refmpn_iorn_n);
842
843 p = ¶m[TYPE_NIOR_N];
844 COPY (TYPE_AND_N);
845 REFERENCE (refmpn_nior_n);
846
847 p = ¶m[TYPE_XOR_N];
848 COPY (TYPE_AND_N);
849 REFERENCE (refmpn_xor_n);
850
851 p = ¶m[TYPE_XNOR_N];
852 COPY (TYPE_AND_N);
853 REFERENCE (refmpn_xnor_n);
854
855
856 p = ¶m[TYPE_ADDSUB_N];
857 p->retval = 1;
858 p->dst[0] = 1;
859 p->dst[1] = 1;
860 p->src[0] = 1;
861 p->src[1] = 1;
862 REFERENCE (refmpn_add_n_sub_n);
863
864 p = ¶m[TYPE_ADDSUB_NC];
865 COPY (TYPE_ADDSUB_N);
866 p->carry = CARRY_4;
867 REFERENCE (refmpn_add_n_sub_nc);
868
869
870 p = ¶m[TYPE_COPY];
871 p->dst[0] = 1;
872 p->src[0] = 1;
873 p->overlap = OVERLAP_NONE;
874 p->size = SIZE_ALLOW_ZERO;
875 REFERENCE (refmpn_copy);
876
877 p = ¶m[TYPE_COPYI];
878 p->dst[0] = 1;
879 p->src[0] = 1;
880 p->overlap = OVERLAP_LOW_TO_HIGH;
881 p->size = SIZE_ALLOW_ZERO;
882 REFERENCE (refmpn_copyi);
883
884 p = ¶m[TYPE_COPYD];
885 p->dst[0] = 1;
886 p->src[0] = 1;
887 p->overlap = OVERLAP_HIGH_TO_LOW;
888 p->size = SIZE_ALLOW_ZERO;
889 REFERENCE (refmpn_copyd);
890
891 p = ¶m[TYPE_COM];
892 p->dst[0] = 1;
893 p->src[0] = 1;
894 REFERENCE (refmpn_com);
895
896
897 p = ¶m[TYPE_ADDLSH1_N];
898 COPY (TYPE_ADD_N);
899 REFERENCE (refmpn_addlsh1_n);
900
901 p = ¶m[TYPE_ADDLSH2_N];
902 COPY (TYPE_ADD_N);
903 REFERENCE (refmpn_addlsh2_n);
904
905 p = ¶m[TYPE_ADDLSH_N];
906 COPY (TYPE_ADD_N);
907 p->shift = 1;
908 REFERENCE (refmpn_addlsh_n);
909
910 p = ¶m[TYPE_SUBLSH1_N];
911 COPY (TYPE_ADD_N);
912 REFERENCE (refmpn_sublsh1_n);
913
914 p = ¶m[TYPE_SUBLSH_N];
915 COPY (TYPE_ADDLSH_N);
916 REFERENCE (refmpn_sublsh_n);
917
918 p = ¶m[TYPE_RSBLSH1_N];
919 COPY (TYPE_ADD_N);
920 REFERENCE (refmpn_rsblsh1_n);
921
922 p = ¶m[TYPE_RSBLSH2_N];
923 COPY (TYPE_ADD_N);
924 REFERENCE (refmpn_rsblsh2_n);
925
926 p = ¶m[TYPE_RSBLSH_N];
927 COPY (TYPE_ADDLSH_N);
928 REFERENCE (refmpn_rsblsh_n);
929
930 p = ¶m[TYPE_RSH1ADD_N];
931 COPY (TYPE_ADD_N);
932 REFERENCE (refmpn_rsh1add_n);
933
934 p = ¶m[TYPE_RSH1SUB_N];
935 COPY (TYPE_ADD_N);
936 REFERENCE (refmpn_rsh1sub_n);
937
938
939 p = ¶m[TYPE_MOD_1];
940 p->retval = 1;
941 p->src[0] = 1;
942 p->size = SIZE_ALLOW_ZERO;
943 p->divisor = DIVISOR_LIMB;
944 REFERENCE (refmpn_mod_1);
945
946 p = ¶m[TYPE_MOD_1C];
947 COPY (TYPE_MOD_1);
948 p->carry = CARRY_DIVISOR;
949 REFERENCE (refmpn_mod_1c);
950
951 p = ¶m[TYPE_DIVMOD_1];
952 COPY (TYPE_MOD_1);
953 p->dst[0] = 1;
954 REFERENCE (refmpn_divmod_1);
955
956 p = ¶m[TYPE_DIVMOD_1C];
957 COPY (TYPE_DIVMOD_1);
958 p->carry = CARRY_DIVISOR;
959 REFERENCE (refmpn_divmod_1c);
960
961 p = ¶m[TYPE_DIVREM_1];
962 COPY (TYPE_DIVMOD_1);
963 p->size2 = SIZE_FRACTION;
964 p->dst_size[0] = SIZE_SUM;
965 REFERENCE (refmpn_divrem_1);
966
967 p = ¶m[TYPE_DIVREM_1C];
968 COPY (TYPE_DIVREM_1);
969 p->carry = CARRY_DIVISOR;
970 REFERENCE (refmpn_divrem_1c);
971
972 p = ¶m[TYPE_PREINV_DIVREM_1];
973 COPY (TYPE_DIVREM_1);
974 p->size = SIZE_YES; /* ie. no size==0 */
975 REFERENCE (refmpn_preinv_divrem_1);
976
977 p = ¶m[TYPE_PREINV_MOD_1];
978 p->retval = 1;
979 p->src[0] = 1;
980 p->divisor = DIVISOR_NORM;
981 REFERENCE (refmpn_preinv_mod_1);
982
983 p = ¶m[TYPE_MOD_34LSUB1];
984 p->retval = 1;
985 p->src[0] = 1;
986 VALIDATE (validate_mod_34lsub1);
987
988 p = ¶m[TYPE_UDIV_QRNND];
989 p->retval = 1;
990 p->src[0] = 1;
991 p->dst[0] = 1;
992 p->dst_size[0] = SIZE_1;
993 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
994 p->data = DATA_UDIV_QRNND;
995 p->overlap = OVERLAP_NONE;
996 REFERENCE (refmpn_udiv_qrnnd);
997
998 p = ¶m[TYPE_UDIV_QRNND_R];
999 COPY (TYPE_UDIV_QRNND);
1000 REFERENCE (refmpn_udiv_qrnnd_r);
1001
1002
1003 p = ¶m[TYPE_DIVEXACT_1];
1004 p->dst[0] = 1;
1005 p->src[0] = 1;
1006 p->divisor = DIVISOR_LIMB;
1007 p->data = DATA_MULTIPLE_DIVISOR;
1008 VALIDATE (validate_divexact_1);
1009 REFERENCE (refmpn_divmod_1);
1010
1011
1012 p = ¶m[TYPE_DIVEXACT_BY3];
1013 p->retval = 1;
1014 p->dst[0] = 1;
1015 p->src[0] = 1;
1016 REFERENCE (refmpn_divexact_by3);
1017
1018 p = ¶m[TYPE_DIVEXACT_BY3C];
1019 COPY (TYPE_DIVEXACT_BY3);
1020 p->carry = CARRY_3;
1021 REFERENCE (refmpn_divexact_by3c);
1022
1023
1024 p = ¶m[TYPE_MODEXACT_1_ODD];
1025 p->retval = 1;
1026 p->src[0] = 1;
1027 p->divisor = DIVISOR_ODD;
1028 VALIDATE (validate_modexact_1_odd);
1029
1030 p = ¶m[TYPE_MODEXACT_1C_ODD];
1031 COPY (TYPE_MODEXACT_1_ODD);
1032 p->carry = CARRY_LIMB;
1033 VALIDATE (validate_modexact_1c_odd);
1034
1035
1036 p = ¶m[TYPE_GCD_1];
1037 p->retval = 1;
1038 p->src[0] = 1;
1039 p->data = DATA_NON_ZERO;
1040 p->divisor = DIVISOR_LIMB;
1041 REFERENCE (refmpn_gcd_1);
1042
1043 p = ¶m[TYPE_GCD];
1044 p->retval = 1;
1045 p->dst[0] = 1;
1046 p->src[0] = 1;
1047 p->src[1] = 1;
1048 p->size2 = 1;
1049 p->dst_size[0] = SIZE_RETVAL;
1050 p->overlap = OVERLAP_NOT_SRCS;
1051 p->data = DATA_GCD;
1052 REFERENCE (refmpn_gcd);
1053
1054
1055 p = ¶m[TYPE_MPZ_JACOBI];
1056 p->retval = 1;
1057 p->src[0] = 1;
1058 p->size = SIZE_ALLOW_ZERO;
1059 p->src[1] = 1;
1060 p->data = DATA_SRC1_ODD;
1061 p->size2 = 1;
1062 p->carry = CARRY_4;
1063 p->carry_sign = 1;
1064 REFERENCE (refmpz_jacobi);
1065
1066 p = ¶m[TYPE_MPZ_KRONECKER];
1067 COPY (TYPE_MPZ_JACOBI);
1068 p->data = 0; /* clear inherited DATA_SRC1_ODD */
1069 REFERENCE (refmpz_kronecker);
1070
1071
1072 p = ¶m[TYPE_MPZ_KRONECKER_UI];
1073 p->retval = 1;
1074 p->src[0] = 1;
1075 p->size = SIZE_ALLOW_ZERO;
1076 p->multiplier = 1;
1077 p->carry = CARRY_BIT;
1078 REFERENCE (refmpz_kronecker_ui);
1079
1080 p = ¶m[TYPE_MPZ_KRONECKER_SI];
1081 COPY (TYPE_MPZ_KRONECKER_UI);
1082 REFERENCE (refmpz_kronecker_si);
1083
1084 p = ¶m[TYPE_MPZ_UI_KRONECKER];
1085 COPY (TYPE_MPZ_KRONECKER_UI);
1086 REFERENCE (refmpz_ui_kronecker);
1087
1088 p = ¶m[TYPE_MPZ_SI_KRONECKER];
1089 COPY (TYPE_MPZ_KRONECKER_UI);
1090 REFERENCE (refmpz_si_kronecker);
1091
1092
1093 p = ¶m[TYPE_SQR];
1094 p->dst[0] = 1;
1095 p->src[0] = 1;
1096 p->dst_size[0] = SIZE_SUM;
1097 p->overlap = OVERLAP_NONE;
1098 REFERENCE (refmpn_sqr);
1099
1100 p = ¶m[TYPE_MUL_N];
1101 COPY (TYPE_SQR);
1102 p->src[1] = 1;
1103 REFERENCE (refmpn_mul_n);
1104
1105 p = ¶m[TYPE_MULLO_N];
1106 COPY (TYPE_MUL_N);
1107 p->dst_size[0] = 0;
1108 REFERENCE (refmpn_mullo_n);
1109
1110 p = ¶m[TYPE_MUL_MN];
1111 COPY (TYPE_MUL_N);
1112 p->size2 = 1;
1113 REFERENCE (refmpn_mul_basecase);
1114
1115 p = ¶m[TYPE_UMUL_PPMM];
1116 p->retval = 1;
1117 p->src[0] = 1;
1118 p->dst[0] = 1;
1119 p->dst_size[0] = SIZE_1;
1120 p->overlap = OVERLAP_NONE;
1121 REFERENCE (refmpn_umul_ppmm);
1122
1123 p = ¶m[TYPE_UMUL_PPMM_R];
1124 COPY (TYPE_UMUL_PPMM);
1125 REFERENCE (refmpn_umul_ppmm_r);
1126
1127
1128 p = ¶m[TYPE_RSHIFT];
1129 p->retval = 1;
1130 p->dst[0] = 1;
1131 p->src[0] = 1;
1132 p->shift = 1;
1133 p->overlap = OVERLAP_LOW_TO_HIGH;
1134 REFERENCE (refmpn_rshift);
1135
1136 p = ¶m[TYPE_LSHIFT];
1137 COPY (TYPE_RSHIFT);
1138 p->overlap = OVERLAP_HIGH_TO_LOW;
1139 REFERENCE (refmpn_lshift);
1140
1141 p = ¶m[TYPE_LSHIFTC];
1142 COPY (TYPE_RSHIFT);
1143 p->overlap = OVERLAP_HIGH_TO_LOW;
1144 REFERENCE (refmpn_lshiftc);
1145
1146
1147 p = ¶m[TYPE_POPCOUNT];
1148 p->retval = 1;
1149 p->src[0] = 1;
1150 REFERENCE (refmpn_popcount);
1151
1152 p = ¶m[TYPE_HAMDIST];
1153 COPY (TYPE_POPCOUNT);
1154 p->src[1] = 1;
1155 REFERENCE (refmpn_hamdist);
1156
1157
1158 p = ¶m[TYPE_SBPI1_DIV_QR];
1159 p->retval = 1;
1160 p->dst[0] = 1;
1161 p->dst[1] = 1;
1162 p->src[0] = 1;
1163 p->src[1] = 1;
1164 p->data = DATA_SRC1_HIGHBIT;
1165 p->size2 = 1;
1166 p->dst_size[0] = SIZE_DIFF;
1167 p->overlap = OVERLAP_NONE;
1168 REFERENCE (refmpn_sb_div_qr);
1169
1170 p = ¶m[TYPE_TDIV_QR];
1171 p->dst[0] = 1;
1172 p->dst[1] = 1;
1173 p->src[0] = 1;
1174 p->src[1] = 1;
1175 p->size2 = 1;
1176 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1177 p->dst_size[1] = SIZE_SIZE2;
1178 p->overlap = OVERLAP_NONE;
1179 REFERENCE (refmpn_tdiv_qr);
1180
1181 p = ¶m[TYPE_SQRTREM];
1182 p->retval = 1;
1183 p->dst[0] = 1;
1184 p->dst[1] = 1;
1185 p->src[0] = 1;
1186 p->dst_size[0] = SIZE_CEIL_HALF;
1187 p->dst_size[1] = SIZE_RETVAL;
1188 p->overlap = OVERLAP_NONE;
1189 VALIDATE (validate_sqrtrem);
1190 REFERENCE (refmpn_sqrtrem);
1191
1192 p = ¶m[TYPE_ZERO];
1193 p->dst[0] = 1;
1194 p->size = SIZE_ALLOW_ZERO;
1195 REFERENCE (refmpn_zero);
1196
1197 p = ¶m[TYPE_GET_STR];
1198 p->retval = 1;
1199 p->src[0] = 1;
1200 p->size = SIZE_ALLOW_ZERO;
1201 p->dst[0] = 1;
1202 p->dst[1] = 1;
1203 p->dst_size[0] = SIZE_GET_STR;
1204 p->dst_bytes[0] = 1;
1205 p->overlap = OVERLAP_NONE;
1206 REFERENCE (refmpn_get_str);
1207
1208 p = ¶m[TYPE_BINVERT];
1209 p->dst[0] = 1;
1210 p->src[0] = 1;
1211 p->data = DATA_SRC0_ODD;
1212 p->overlap = OVERLAP_NONE;
1213 REFERENCE (refmpn_binvert);
1214
1215 p = ¶m[TYPE_INVERT];
1216 p->dst[0] = 1;
1217 p->src[0] = 1;
1218 p->data = DATA_SRC0_HIGHBIT;
1219 p->overlap = OVERLAP_NONE;
1220 REFERENCE (refmpn_invert);
1221
1222 #ifdef EXTRA_PARAM_INIT
1223 EXTRA_PARAM_INIT
1224 #endif
1225 }
1226
1227
1228 /* The following are macros if there's no native versions, so wrap them in
1229 functions that can be in try_array[]. */
1230
1231 void
1232 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1233 { MPN_COPY (rp, sp, size); }
1234
1235 void
1236 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1237 { MPN_COPY_INCR (rp, sp, size); }
1238
1239 void
1240 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1241 { MPN_COPY_DECR (rp, sp, size); }
1242
1243 void
1244 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1245 { __GMPN_COPY (rp, sp, size); }
1246
1247 #ifdef __GMPN_COPY_INCR
1248 void
1249 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1250 { __GMPN_COPY_INCR (rp, sp, size); }
1251 #endif
1252
1253 void
1254 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1255 { mpn_com (rp, sp, size); }
1256
1257 void
1258 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1259 { mpn_and_n (rp, s1, s2, size); }
1260
1261 void
1262 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1263 { mpn_andn_n (rp, s1, s2, size); }
1264
1265 void
1266 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1267 { mpn_nand_n (rp, s1, s2, size); }
1268
1269 void
1270 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1271 { mpn_ior_n (rp, s1, s2, size); }
1272
1273 void
1274 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1275 { mpn_iorn_n (rp, s1, s2, size); }
1276
1277 void
1278 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1279 { mpn_nior_n (rp, s1, s2, size); }
1280
1281 void
1282 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1283 { mpn_xor_n (rp, s1, s2, size); }
1284
1285 void
1286 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1287 { mpn_xnor_n (rp, s1, s2, size); }
1288
1289 mp_limb_t
1290 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1291 {
1292 mp_limb_t q;
1293 udiv_qrnnd (q, *remptr, n1, n0, d);
1294 return q;
1295 }
1296
1297 mp_limb_t
1298 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1299 {
1300 return mpn_divexact_by3 (rp, sp, size);
1301 }
1302
1303 mp_limb_t
1304 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1305 {
1306 return mpn_modexact_1_odd (ptr, size, divisor);
1307 }
1308
1309 void
1310 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1311 {
1312 mp_ptr tspace;
1313 TMP_DECL;
1314 TMP_MARK;
1315 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1316 mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1317 TMP_FREE;
1318 }
1319 void
1320 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1321 {
1322 mp_ptr tspace;
1323 TMP_DECL;
1324 TMP_MARK;
1325 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1326 mpn_toom2_sqr (dst, src, size, tspace);
1327 TMP_FREE;
1328 }
1329 void
1330 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1331 {
1332 mp_ptr tspace;
1333 TMP_DECL;
1334 TMP_MARK;
1335 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1336 mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1337 TMP_FREE;
1338 }
1339 void
1340 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1341 {
1342 mp_ptr tspace;
1343 TMP_DECL;
1344 TMP_MARK;
1345 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1346 mpn_toom3_sqr (dst, src, size, tspace);
1347 TMP_FREE;
1348 }
1349 void
1350 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1351 {
1352 mp_ptr tspace;
1353 TMP_DECL;
1354 TMP_MARK;
1355 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1356 mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1357 TMP_FREE;
1358 }
1359 void
1360 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1361 {
1362 mp_ptr tspace;
1363 TMP_DECL;
1364 TMP_MARK;
1365 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1366 mpn_toom4_sqr (dst, src, size, tspace);
1367 TMP_FREE;
1368 }
1369
1370 mp_limb_t
1371 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1372 {
1373 mp_limb_t high;
1374 umul_ppmm (high, *lowptr, m1, m2);
1375 return high;
1376 }
1377
1378 void
1379 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1380 { MPN_ZERO (ptr, size); }
1381
1382
1383 struct choice_t {
1384 const char *name;
1385 tryfun_t function;
1386 int type;
1387 mp_size_t minsize;
1388 };
1389
1390 #if HAVE_STRINGIZE
1391 #define TRY(fun) #fun, (tryfun_t) fun
1392 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1393 #else
1394 #define TRY(fun) "fun", (tryfun_t) fun
1395 #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1396 #endif
1397
1398 const struct choice_t choice_array[] = {
1399 { TRY(mpn_add), TYPE_ADD },
1400 { TRY(mpn_sub), TYPE_SUB },
1401
1402 { TRY(mpn_add_n), TYPE_ADD_N },
1403 { TRY(mpn_sub_n), TYPE_SUB_N },
1404
1405 #if HAVE_NATIVE_mpn_add_nc
1406 { TRY(mpn_add_nc), TYPE_ADD_NC },
1407 #endif
1408 #if HAVE_NATIVE_mpn_sub_nc
1409 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1410 #endif
1411
1412 #if HAVE_NATIVE_mpn_add_n_sub_n
1413 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1414 #endif
1415 #if HAVE_NATIVE_mpn_add_n_sub_nc
1416 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1417 #endif
1418
1419 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1420 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1421 #if HAVE_NATIVE_mpn_addmul_1c
1422 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1423 #endif
1424 #if HAVE_NATIVE_mpn_submul_1c
1425 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1426 #endif
1427
1428 #if HAVE_NATIVE_mpn_addmul_2
1429 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1430 #endif
1431 #if HAVE_NATIVE_mpn_addmul_3
1432 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1433 #endif
1434 #if HAVE_NATIVE_mpn_addmul_4
1435 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1436 #endif
1437 #if HAVE_NATIVE_mpn_addmul_5
1438 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1439 #endif
1440 #if HAVE_NATIVE_mpn_addmul_6
1441 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1442 #endif
1443 #if HAVE_NATIVE_mpn_addmul_7
1444 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1445 #endif
1446 #if HAVE_NATIVE_mpn_addmul_8
1447 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1448 #endif
1449
1450 { TRY_FUNFUN(mpn_com), TYPE_COM },
1451
1452 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1453 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1454 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1455
1456 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1457 #ifdef __GMPN_COPY_INCR
1458 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1459 #endif
1460
1461 #if HAVE_NATIVE_mpn_copyi
1462 { TRY(mpn_copyi), TYPE_COPYI },
1463 #endif
1464 #if HAVE_NATIVE_mpn_copyd
1465 { TRY(mpn_copyd), TYPE_COPYD },
1466 #endif
1467
1468 #if HAVE_NATIVE_mpn_addlsh1_n
1469 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1470 #endif
1471 #if HAVE_NATIVE_mpn_addlsh2_n
1472 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1473 #endif
1474 #if HAVE_NATIVE_mpn_addlsh_n
1475 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1476 #endif
1477 #if HAVE_NATIVE_mpn_sublsh1_n
1478 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1479 #endif
1480 #if HAVE_NATIVE_mpn_sublsh_n
1481 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1482 #endif
1483 #if HAVE_NATIVE_mpn_rsblsh1_n
1484 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1485 #endif
1486 #if HAVE_NATIVE_mpn_rsblsh2_n
1487 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1488 #endif
1489 #if HAVE_NATIVE_mpn_rsblsh_n
1490 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1491 #endif
1492 #if HAVE_NATIVE_mpn_rsh1add_n
1493 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1494 #endif
1495 #if HAVE_NATIVE_mpn_rsh1sub_n
1496 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1497 #endif
1498
1499 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1500 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1501 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1502 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1503 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1504 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1505 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1506 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1507
1508 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1509 #if USE_PREINV_DIVREM_1
1510 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1511 #endif
1512 { TRY(mpn_mod_1), TYPE_MOD_1 },
1513 #if USE_PREINV_MOD_1
1514 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1515 #endif
1516 #if HAVE_NATIVE_mpn_divrem_1c
1517 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1518 #endif
1519 #if HAVE_NATIVE_mpn_mod_1c
1520 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1521 #endif
1522 #if GMP_NUMB_BITS % 4 == 0
1523 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1524 #endif
1525
1526 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1527 #if HAVE_NATIVE_mpn_udiv_qrnnd
1528 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1529 #endif
1530 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1531 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1532 #endif
1533
1534 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1535 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1536 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1537
1538 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1539 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1540
1541
1542 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1543 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1544
1545 { TRY(mpn_mul_1), TYPE_MUL_1 },
1546 #if HAVE_NATIVE_mpn_mul_1c
1547 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1548 #endif
1549 #if HAVE_NATIVE_mpn_mul_2
1550 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1551 #endif
1552 #if HAVE_NATIVE_mpn_mul_3
1553 { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1554 #endif
1555 #if HAVE_NATIVE_mpn_mul_4
1556 { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1557 #endif
1558
1559 { TRY(mpn_rshift), TYPE_RSHIFT },
1560 { TRY(mpn_lshift), TYPE_LSHIFT },
1561 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1562
1563
1564 { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1565 { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1566 #if SQR_TOOM2_THRESHOLD > 0
1567 { TRY(mpn_sqr_basecase), TYPE_SQR },
1568 #endif
1569
1570 { TRY(mpn_mul), TYPE_MUL_MN },
1571 { TRY(mpn_mul_n), TYPE_MUL_N },
1572 { TRY(mpn_sqr), TYPE_SQR },
1573
1574 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1575 #if HAVE_NATIVE_mpn_umul_ppmm
1576 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1577 #endif
1578 #if HAVE_NATIVE_mpn_umul_ppmm_r
1579 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1580 #endif
1581
1582 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1583 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1584 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1585 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1586 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1587 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1588
1589 { TRY(mpn_gcd_1), TYPE_GCD_1 },
1590 { TRY(mpn_gcd), TYPE_GCD },
1591 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1592 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1593 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1594 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1595 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1596
1597 { TRY(mpn_popcount), TYPE_POPCOUNT },
1598 { TRY(mpn_hamdist), TYPE_HAMDIST },
1599
1600 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
1601
1602 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1603
1604 { TRY(mpn_get_str), TYPE_GET_STR },
1605
1606 { TRY(mpn_binvert), TYPE_BINVERT },
1607 { TRY(mpn_invert), TYPE_INVERT },
1608
1609 #ifdef EXTRA_ROUTINES
1610 EXTRA_ROUTINES
1611 #endif
1612 };
1613
1614 const struct choice_t *choice = NULL;
1615
1616
1617 void
1618 mprotect_maybe (void *addr, size_t len, int prot)
1619 {
1620 if (!option_redzones)
1621 return;
1622
1623 #if HAVE_MPROTECT
1624 if (mprotect (addr, len, prot) != 0)
1625 {
1626 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1627 addr, (unsigned) len, prot, strerror (errno));
1628 exit (1);
1629 }
1630 #else
1631 {
1632 static int warned = 0;
1633 if (!warned)
1634 {
1635 fprintf (stderr,
1636 "mprotect not available, bounds testing not performed\n");
1637 warned = 1;
1638 }
1639 }
1640 #endif
1641 }
1642
1643 /* round "a" up to a multiple of "m" */
1644 size_t
1645 round_up_multiple (size_t a, size_t m)
1646 {
1647 unsigned long r;
1648
1649 r = a % m;
1650 if (r == 0)
1651 return a;
1652 else
1653 return a + (m - r);
1654 }
1655
1656
1657 /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1658 for instance HP-UX 10.
1659
1660 mmap will almost certainly return a pointer already aligned to a page
1661 boundary, but it's easy enough to share the alignment handling with the
1662 malloc case. */
1663
1664 void
1665 malloc_region (struct region_t *r, mp_size_t n)
1666 {
1667 mp_ptr p;
1668 size_t nbytes;
1669
1670 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1671
1672 n = round_up_multiple (n, PAGESIZE_LIMBS);
1673 r->size = n;
1674
1675 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
1676
1677 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
1678 #define MAP_ANON MAP_ANONYMOUS
1679 #endif
1680
1681 #if HAVE_MMAP && defined (MAP_ANON)
1682 /* note must pass fd=-1 for MAP_ANON on BSD */
1683 p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1684 if (p == (void *) -1)
1685 {
1686 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
1687 (unsigned) nbytes, strerror (errno));
1688 exit (1);
1689 }
1690 #else
1691 p = (mp_ptr) malloc (nbytes);
1692 ASSERT_ALWAYS (p != NULL);
1693 #endif
1694
1695 p = align_pointer (p, pagesize);
1696
1697 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
1698 p += REDZONE_LIMBS;
1699 r->ptr = p;
1700
1701 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
1702 }
1703
1704 void
1705 mprotect_region (const struct region_t *r, int prot)
1706 {
1707 mprotect_maybe (r->ptr, r->size, prot);
1708 }
1709
1710
1711 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
1712 and CARRY_4 */
1713 mp_limb_t carry_array[] = {
1714 0, 1, 2, 3,
1715 4,
1716 CNST_LIMB(1) << 8,
1717 CNST_LIMB(1) << 16,
1718 GMP_NUMB_MAX
1719 };
1720 int carry_index;
1721
1722 #define CARRY_COUNT \
1723 ((tr->carry == CARRY_BIT) ? 2 \
1724 : tr->carry == CARRY_3 ? 3 \
1725 : tr->carry == CARRY_4 ? 4 \
1726 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
1727 ? numberof(carry_array) + CARRY_RANDOMS \
1728 : 1)
1729
1730 #define MPN_RANDOM_ALT(index,dst,size) \
1731 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
1732
1733 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1734 the same type */
1735 #define CARRY_ITERATION \
1736 for (carry_index = 0; \
1737 (carry_index < numberof (carry_array) \
1738 ? (carry = carry_array[carry_index]) \
1739 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
1740 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
1741 carry_index < CARRY_COUNT; \
1742 carry_index++)
1743
1744
1745 mp_limb_t multiplier_array[] = {
1746 0, 1, 2, 3,
1747 CNST_LIMB(1) << 8,
1748 CNST_LIMB(1) << 16,
1749 GMP_NUMB_MAX - 2,
1750 GMP_NUMB_MAX - 1,
1751 GMP_NUMB_MAX
1752 };
1753 int multiplier_index;
1754
1755 mp_limb_t divisor_array[] = {
1756 1, 2, 3,
1757 CNST_LIMB(1) << 8,
1758 CNST_LIMB(1) << 16,
1759 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
1760 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
1761 GMP_NUMB_HIGHBIT,
1762 GMP_NUMB_HIGHBIT + 1,
1763 GMP_NUMB_MAX - 2,
1764 GMP_NUMB_MAX - 1,
1765 GMP_NUMB_MAX
1766 };
1767
1768 int divisor_index;
1769
1770 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1771 the same type */
1772 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
1773 for (index = 0; \
1774 (index < numberof (array) \
1775 ? (var = array[index]) \
1776 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
1777 index < limit; \
1778 index++)
1779
1780 #define MULTIPLIER_COUNT \
1781 (tr->multiplier \
1782 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
1783 : 1)
1784
1785 #define MULTIPLIER_ITERATION \
1786 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
1787 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
1788
1789 #define DIVISOR_COUNT \
1790 (tr->divisor \
1791 ? numberof (divisor_array) + DIVISOR_RANDOMS \
1792 : 1)
1793
1794 #define DIVISOR_ITERATION \
1795 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
1796 DIVISOR_RANDOMS, TRY_DIVISOR)
1797
1798
1799 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
1800 d[0] or d[1] respectively, -1 means a separate (write-protected)
1801 location. */
1802
1803 struct overlap_t {
1804 int s[NUM_SOURCES];
1805 } overlap_array[] = {
1806 { { -1, -1 } },
1807 { { 0, -1 } },
1808 { { -1, 0 } },
1809 { { 0, 0 } },
1810 { { 1, -1 } },
1811 { { -1, 1 } },
1812 { { 1, 1 } },
1813 { { 0, 1 } },
1814 { { 1, 0 } },
1815 };
1816
1817 struct overlap_t *overlap, *overlap_limit;
1818
1819 #define OVERLAP_COUNT \
1820 (tr->overlap & OVERLAP_NONE ? 1 \
1821 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
1822 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
1823 : tr->dst[1] ? 9 \
1824 : tr->src[1] ? 4 \
1825 : tr->dst[0] ? 2 \
1826 : 1)
1827
1828 #define OVERLAP_ITERATION \
1829 for (overlap = &overlap_array[0], \
1830 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
1831 overlap < overlap_limit; \
1832 overlap++)
1833
1834
1835 int base = 10;
1836
1837 #define T_RAND_COUNT 2
1838 int t_rand;
1839
1840 void
1841 t_random (mp_ptr ptr, mp_size_t n)
1842 {
1843 if (n == 0)
1844 return;
1845
1846 switch (option_data) {
1847 case DATA_TRAND:
1848 switch (t_rand) {
1849 case 0: refmpn_random (ptr, n); break;
1850 case 1: refmpn_random2 (ptr, n); break;
1851 default: abort();
1852 }
1853 break;
1854 case DATA_SEQ:
1855 {
1856 static mp_limb_t counter = 0;
1857 mp_size_t i;
1858 for (i = 0; i < n; i++)
1859 ptr[i] = ++counter;
1860 }
1861 break;
1862 case DATA_ZEROS:
1863 refmpn_zero (ptr, n);
1864 break;
1865 case DATA_FFS:
1866 refmpn_fill (ptr, n, GMP_NUMB_MAX);
1867 break;
1868 case DATA_2FD:
1869 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
1870 inducing the q1_ff special case in the mul-by-inverse part of some
1871 versions of divrem_1 and mod_1. */
1872 refmpn_fill (ptr, n, (mp_limb_t) -1);
1873 ptr[n-1] = 2;
1874 ptr[0] -= 2;
1875 break;
1876
1877 default:
1878 abort();
1879 }
1880 }
1881 #define T_RAND_ITERATION \
1882 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
1883
1884
1885 void
1886 print_each (const struct each_t *e)
1887 {
1888 int i;
1889
1890 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
1891 if (tr->retval)
1892 mpn_trace (" retval", &e->retval, 1);
1893
1894 for (i = 0; i < NUM_DESTS; i++)
1895 {
1896 if (tr->dst[i])
1897 {
1898 if (tr->dst_bytes[i])
1899 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1900 else
1901 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1902 printf (" located %p\n", (void *) (e->d[i].p));
1903 }
1904 }
1905
1906 for (i = 0; i < NUM_SOURCES; i++)
1907 if (tr->src[i])
1908 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
1909 }
1910
1911
1912 void
1913 print_all (void)
1914 {
1915 int i;
1916
1917 printf ("\n");
1918 printf ("size %ld\n", (long) size);
1919 if (tr->size2)
1920 printf ("size2 %ld\n", (long) size2);
1921
1922 for (i = 0; i < NUM_DESTS; i++)
1923 if (d[i].size != size)
1924 printf ("d[%d].size %ld\n", i, (long) d[i].size);
1925
1926 if (tr->multiplier)
1927 mpn_trace (" multiplier", &multiplier, 1);
1928 if (tr->divisor)
1929 mpn_trace (" divisor", &divisor, 1);
1930 if (tr->shift)
1931 printf (" shift %lu\n", shift);
1932 if (tr->carry)
1933 mpn_trace (" carry", &carry, 1);
1934 if (tr->msize)
1935 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
1936
1937 for (i = 0; i < NUM_DESTS; i++)
1938 if (tr->dst[i])
1939 printf (" d[%d] %s, align %ld, size %ld\n",
1940 i, d[i].high ? "high" : "low",
1941 (long) d[i].align, (long) d[i].size);
1942
1943 for (i = 0; i < NUM_SOURCES; i++)
1944 {
1945 if (tr->src[i])
1946 {
1947 printf (" s[%d] %s, align %ld, ",
1948 i, s[i].high ? "high" : "low", (long) s[i].align);
1949 switch (overlap->s[i]) {
1950 case -1:
1951 printf ("no overlap\n");
1952 break;
1953 default:
1954 printf ("==d[%d]%s\n",
1955 overlap->s[i],
1956 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
1957 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
1958 : "");
1959 break;
1960 }
1961 printf (" s[%d]=", i);
1962 if (tr->carry_sign && (carry & (1 << i)))
1963 printf ("-");
1964 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
1965 }
1966 }
1967
1968 if (tr->dst0_from_src1)
1969 mpn_trace (" d[0]", s[1].region.ptr, size);
1970
1971 if (tr->reference)
1972 print_each (&ref);
1973 print_each (&fun);
1974 }
1975
1976 void
1977 compare (void)
1978 {
1979 int error = 0;
1980 int i;
1981
1982 if (tr->retval && ref.retval != fun.retval)
1983 {
1984 gmp_printf ("Different return values (%Mu, %Mu)\n",
1985 ref.retval, fun.retval);
1986 error = 1;
1987 }
1988
1989 for (i = 0; i < NUM_DESTS; i++)
1990 {
1991 switch (tr->dst_size[i]) {
1992 case SIZE_RETVAL:
1993 case SIZE_GET_STR:
1994 d[i].size = ref.retval;
1995 break;
1996 }
1997 }
1998
1999 for (i = 0; i < NUM_DESTS; i++)
2000 {
2001 if (! tr->dst[i])
2002 continue;
2003
2004 if (tr->dst_bytes[i])
2005 {
2006 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2007 {
2008 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2009 i,
2010 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2011 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2012 error = 1;
2013 }
2014 }
2015 else
2016 {
2017 if (d[i].size != 0
2018 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2019 {
2020 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2021 i,
2022 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2023 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2024 error = 1;
2025 }
2026 }
2027 }
2028
2029 if (error)
2030 {
2031 print_all();
2032 abort();
2033 }
2034 }
2035
2036
2037 /* The functions are cast if the return value should be a long rather than
2038 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2039 might not be enough if some actual calling conventions checking is
2040 implemented on a long long limb system. */
2041
2042 void
2043 call (struct each_t *e, tryfun_t function)
2044 {
2045 switch (choice->type) {
2046 case TYPE_ADD:
2047 case TYPE_SUB:
2048 e->retval = CALLING_CONVENTIONS (function)
2049 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2050 break;
2051
2052 case TYPE_ADD_N:
2053 case TYPE_SUB_N:
2054 case TYPE_ADDLSH1_N:
2055 case TYPE_ADDLSH2_N:
2056 case TYPE_SUBLSH1_N:
2057 case TYPE_RSBLSH1_N:
2058 case TYPE_RSBLSH2_N:
2059 case TYPE_RSH1ADD_N:
2060 case TYPE_RSH1SUB_N:
2061 e->retval = CALLING_CONVENTIONS (function)
2062 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2063 break;
2064 case TYPE_ADDLSH_N:
2065 case TYPE_SUBLSH_N:
2066 case TYPE_RSBLSH_N:
2067 e->retval = CALLING_CONVENTIONS (function)
2068 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2069 break;
2070 case TYPE_ADD_NC:
2071 case TYPE_SUB_NC:
2072 e->retval = CALLING_CONVENTIONS (function)
2073 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2074 break;
2075
2076 case TYPE_MUL_1:
2077 case TYPE_ADDMUL_1:
2078 case TYPE_SUBMUL_1:
2079 e->retval = CALLING_CONVENTIONS (function)
2080 (e->d[0].p, e->s[0].p, size, multiplier);
2081 break;
2082 case TYPE_MUL_1C:
2083 case TYPE_ADDMUL_1C:
2084 case TYPE_SUBMUL_1C:
2085 e->retval = CALLING_CONVENTIONS (function)
2086 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2087 break;
2088
2089 case TYPE_MUL_2:
2090 case TYPE_MUL_3:
2091 case TYPE_MUL_4:
2092 if (size == 1)
2093 abort ();
2094 e->retval = CALLING_CONVENTIONS (function)
2095 (e->d[0].p, e->s[0].p, size, multiplier_N);
2096 break;
2097
2098 case TYPE_ADDMUL_2:
2099 case TYPE_ADDMUL_3:
2100 case TYPE_ADDMUL_4:
2101 case TYPE_ADDMUL_5:
2102 case TYPE_ADDMUL_6:
2103 case TYPE_ADDMUL_7:
2104 case TYPE_ADDMUL_8:
2105 if (size == 1)
2106 abort ();
2107 e->retval = CALLING_CONVENTIONS (function)
2108 (e->d[0].p, e->s[0].p, size, multiplier_N);
2109 break;
2110
2111 case TYPE_AND_N:
2112 case TYPE_ANDN_N:
2113 case TYPE_NAND_N:
2114 case TYPE_IOR_N:
2115 case TYPE_IORN_N:
2116 case TYPE_NIOR_N:
2117 case TYPE_XOR_N:
2118 case TYPE_XNOR_N:
2119 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2120 break;
2121
2122 case TYPE_ADDSUB_N:
2123 e->retval = CALLING_CONVENTIONS (function)
2124 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2125 break;
2126 case TYPE_ADDSUB_NC:
2127 e->retval = CALLING_CONVENTIONS (function)
2128 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2129 break;
2130
2131 case TYPE_COPY:
2132 case TYPE_COPYI:
2133 case TYPE_COPYD:
2134 case TYPE_COM:
2135 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2136 break;
2137
2138
2139 case TYPE_DIVEXACT_BY3:
2140 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2141 break;
2142 case TYPE_DIVEXACT_BY3C:
2143 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2144 carry);
2145 break;
2146
2147
2148 case TYPE_DIVMOD_1:
2149 case TYPE_DIVEXACT_1:
2150 e->retval = CALLING_CONVENTIONS (function)
2151 (e->d[0].p, e->s[0].p, size, divisor);
2152 break;
2153 case TYPE_DIVMOD_1C:
2154 e->retval = CALLING_CONVENTIONS (function)
2155 (e->d[0].p, e->s[0].p, size, divisor, carry);
2156 break;
2157 case TYPE_DIVREM_1:
2158 e->retval = CALLING_CONVENTIONS (function)
2159 (e->d[0].p, size2, e->s[0].p, size, divisor);
2160 break;
2161 case TYPE_DIVREM_1C:
2162 e->retval = CALLING_CONVENTIONS (function)
2163 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2164 break;
2165 case TYPE_PREINV_DIVREM_1:
2166 {
2167 mp_limb_t dinv;
2168 unsigned shift;
2169 shift = refmpn_count_leading_zeros (divisor);
2170 dinv = refmpn_invert_limb (divisor << shift);
2171 e->retval = CALLING_CONVENTIONS (function)
2172 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2173 }
2174 break;
2175 case TYPE_MOD_1:
2176 case TYPE_MODEXACT_1_ODD:
2177 e->retval = CALLING_CONVENTIONS (function)
2178 (e->s[0].p, size, divisor);
2179 break;
2180 case TYPE_MOD_1C:
2181 case TYPE_MODEXACT_1C_ODD:
2182 e->retval = CALLING_CONVENTIONS (function)
2183 (e->s[0].p, size, divisor, carry);
2184 break;
2185 case TYPE_PREINV_MOD_1:
2186 e->retval = CALLING_CONVENTIONS (function)
2187 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2188 break;
2189 case TYPE_MOD_34LSUB1:
2190 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2191 break;
2192
2193 case TYPE_UDIV_QRNND:
2194 e->retval = CALLING_CONVENTIONS (function)
2195 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2196 break;
2197 case TYPE_UDIV_QRNND_R:
2198 e->retval = CALLING_CONVENTIONS (function)
2199 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2200 break;
2201
2202 case TYPE_SBPI1_DIV_QR:
2203 {
2204 gmp_pi1_t dinv;
2205 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2206 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2207 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2208 e->retval = CALLING_CONVENTIONS (function)
2209 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2210 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2211 }
2212 break;
2213
2214 case TYPE_TDIV_QR:
2215 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2216 e->s[0].p, size, e->s[1].p, size2);
2217 break;
2218
2219 case TYPE_GCD_1:
2220 /* Must have a non-zero src, but this probably isn't the best way to do
2221 it. */
2222 if (refmpn_zero_p (e->s[0].p, size))
2223 e->retval = 0;
2224 else
2225 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2226 break;
2227
2228 case TYPE_GCD:
2229 /* Sources are destroyed, so they're saved and replaced, but a general
2230 approach to this might be better. Note that it's still e->s[0].p and
2231 e->s[1].p that are passed, to get the desired alignments. */
2232 {
2233 mp_ptr s0 = refmpn_malloc_limbs (size);
2234 mp_ptr s1 = refmpn_malloc_limbs (size2);
2235 refmpn_copyi (s0, e->s[0].p, size);
2236 refmpn_copyi (s1, e->s[1].p, size2);
2237
2238 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2239 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2240 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2241 e->s[0].p, size,
2242 e->s[1].p, size2);
2243 refmpn_copyi (e->s[0].p, s0, size);
2244 refmpn_copyi (e->s[1].p, s1, size2);
2245 free (s0);
2246 free (s1);
2247 }
2248 break;
2249
2250 case TYPE_GCD_FINDA:
2251 {
2252 /* FIXME: do this with a flag */
2253 mp_limb_t c[2];
2254 c[0] = e->s[0].p[0];
2255 c[0] += (c[0] == 0);
2256 c[1] = e->s[0].p[0];
2257 c[1] += (c[1] == 0);
2258 e->retval = CALLING_CONVENTIONS (function) (c);
2259 }
2260 break;
2261
2262 case TYPE_MPZ_JACOBI:
2263 case TYPE_MPZ_KRONECKER:
2264 {
2265 mpz_t a, b;
2266 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2267 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2268 e->retval = CALLING_CONVENTIONS (function) (a, b);
2269 }
2270 break;
2271 case TYPE_MPZ_KRONECKER_UI:
2272 {
2273 mpz_t a;
2274 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2275 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2276 }
2277 break;
2278 case TYPE_MPZ_KRONECKER_SI:
2279 {
2280 mpz_t a;
2281 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2282 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2283 }
2284 break;
2285 case TYPE_MPZ_UI_KRONECKER:
2286 {
2287 mpz_t b;
2288 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2289 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2290 }
2291 break;
2292 case TYPE_MPZ_SI_KRONECKER:
2293 {
2294 mpz_t b;
2295 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2296 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2297 }
2298 break;
2299
2300 case TYPE_MUL_MN:
2301 CALLING_CONVENTIONS (function)
2302 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2303 break;
2304 case TYPE_MUL_N:
2305 case TYPE_MULLO_N:
2306 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2307 break;
2308 case TYPE_SQR:
2309 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2310 break;
2311
2312 case TYPE_UMUL_PPMM:
2313 e->retval = CALLING_CONVENTIONS (function)
2314 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2315 break;
2316 case TYPE_UMUL_PPMM_R:
2317 e->retval = CALLING_CONVENTIONS (function)
2318 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2319 break;
2320
2321 case TYPE_LSHIFT:
2322 case TYPE_LSHIFTC:
2323 case TYPE_RSHIFT:
2324 e->retval = CALLING_CONVENTIONS (function)
2325 (e->d[0].p, e->s[0].p, size, shift);
2326 break;
2327
2328 case TYPE_POPCOUNT:
2329 e->retval = (* (unsigned long (*)(ANYARGS))
2330 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2331 break;
2332 case TYPE_HAMDIST:
2333 e->retval = (* (unsigned long (*)(ANYARGS))
2334 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2335 break;
2336
2337 case TYPE_SQRTREM:
2338 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2339 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2340 break;
2341
2342 case TYPE_ZERO:
2343 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2344 break;
2345
2346 case TYPE_GET_STR:
2347 {
2348 size_t sizeinbase, fill;
2349 char *dst;
2350 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2351 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2352 fill = d[0].size - sizeinbase;
2353 if (d[0].high)
2354 {
2355 memset (e->d[0].p, 0xBA, fill);
2356 dst = (char *) e->d[0].p + fill;
2357 }
2358 else
2359 {
2360 dst = (char *) e->d[0].p;
2361 memset (dst + sizeinbase, 0xBA, fill);
2362 }
2363 if (POW2_P (base))
2364 {
2365 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2366 e->s[0].p, size);
2367 }
2368 else
2369 {
2370 refmpn_copy (e->d[1].p, e->s[0].p, size);
2371 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2372 e->d[1].p, size);
2373 }
2374 refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2375 }
2376 break;
2377
2378 case TYPE_INVERT:
2379 {
2380 mp_ptr scratch;
2381 TMP_DECL;
2382 TMP_MARK;
2383 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2384 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2385 TMP_FREE;
2386 }
2387 break;
2388 case TYPE_BINVERT:
2389 {
2390 mp_ptr scratch;
2391 TMP_DECL;
2392 TMP_MARK;
2393 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2394 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2395 TMP_FREE;
2396 }
2397 break;
2398
2399 #ifdef EXTRA_CALL
2400 EXTRA_CALL
2401 #endif
2402
2403 default:
2404 printf ("Unknown routine type %d\n", choice->type);
2405 abort ();
2406 break;
2407 }
2408 }
2409
2410
2411 void
2412 pointer_setup (struct each_t *e)
2413 {
2414 int i, j;
2415
2416 for (i = 0; i < NUM_DESTS; i++)
2417 {
2418 switch (tr->dst_size[i]) {
2419 case 0:
2420 case SIZE_RETVAL: /* will be adjusted later */
2421 d[i].size = size;
2422 break;
2423
2424 case SIZE_1:
2425 d[i].size = 1;
2426 break;
2427 case SIZE_2:
2428 d[i].size = 2;
2429 break;
2430 case SIZE_3:
2431 d[i].size = 3;
2432 break;
2433
2434 case SIZE_PLUS_1:
2435 d[i].size = size+1;
2436 break;
2437 case SIZE_PLUS_MSIZE_SUB_1:
2438 d[i].size = size + tr->msize - 1;
2439 break;
2440
2441 case SIZE_SUM:
2442 if (tr->size2)
2443 d[i].size = size + size2;
2444 else
2445 d[i].size = 2*size;
2446 break;
2447
2448 case SIZE_SIZE2:
2449 d[i].size = size2;
2450 break;
2451
2452 case SIZE_DIFF:
2453 d[i].size = size - size2;
2454 break;
2455
2456 case SIZE_DIFF_PLUS_1:
2457 d[i].size = size - size2 + 1;
2458 break;
2459
2460 case SIZE_CEIL_HALF:
2461 d[i].size = (size+1)/2;
2462 break;
2463
2464 case SIZE_GET_STR:
2465 {
2466 mp_limb_t ff = GMP_NUMB_MAX;
2467 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2468 }
2469 break;
2470
2471 default:
2472 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2473 abort ();
2474 }
2475 }
2476
2477 /* establish e->d[].p destinations */
2478 for (i = 0; i < NUM_DESTS; i++)
2479 {
2480 mp_size_t offset = 0;
2481
2482 /* possible room for overlapping sources */
2483 for (j = 0; j < numberof (overlap->s); j++)
2484 if (overlap->s[j] == i)
2485 offset = MAX (offset, s[j].align);
2486
2487 if (d[i].high)
2488 {
2489 if (tr->dst_bytes[i])
2490 {
2491 e->d[i].p = (mp_ptr)
2492 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2493 - d[i].size - d[i].align);
2494 }
2495 else
2496 {
2497 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2498 - d[i].size - d[i].align;
2499 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2500 e->d[i].p -= offset;
2501 }
2502 }
2503 else
2504 {
2505 if (tr->dst_bytes[i])
2506 {
2507 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2508 }
2509 else
2510 {
2511 e->d[i].p = e->d[i].region.ptr + d[i].align;
2512 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2513 e->d[i].p += offset;
2514 }
2515 }
2516 }
2517
2518 /* establish e->s[].p sources */
2519 for (i = 0; i < NUM_SOURCES; i++)
2520 {
2521 int o = overlap->s[i];
2522 switch (o) {
2523 case -1:
2524 /* no overlap */
2525 e->s[i].p = s[i].p;
2526 break;
2527 case 0:
2528 case 1:
2529 /* overlap with d[o] */
2530 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2531 e->s[i].p = e->d[o].p - s[i].align;
2532 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2533 e->s[i].p = e->d[o].p + s[i].align;
2534 else if (tr->size2 == SIZE_FRACTION)
2535 e->s[i].p = e->d[o].p + size2;
2536 else
2537 e->s[i].p = e->d[o].p;
2538 break;
2539 default:
2540 abort();
2541 break;
2542 }
2543 }
2544 }
2545
2546
2547 void
2548 validate_fail (void)
2549 {
2550 if (tr->reference)
2551 {
2552 trap_location = TRAP_REF;
2553 call (&ref, tr->reference);
2554 trap_location = TRAP_NOWHERE;
2555 }
2556
2557 print_all();
2558 abort();
2559 }
2560
2561
2562 void
2563 try_one (void)
2564 {
2565 int i;
2566
2567 if (option_spinner)
2568 spinner();
2569 spinner_count++;
2570
2571 trap_location = TRAP_SETUPS;
2572
2573 if (tr->divisor == DIVISOR_NORM)
2574 divisor |= GMP_NUMB_HIGHBIT;
2575 if (tr->divisor == DIVISOR_ODD)
2576 divisor |= 1;
2577
2578 for (i = 0; i < NUM_SOURCES; i++)
2579 {
2580 if (s[i].high)
2581 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2582 else
2583 s[i].p = s[i].region.ptr + s[i].align;
2584 }
2585
2586 pointer_setup (&ref);
2587 pointer_setup (&fun);
2588
2589 ref.retval = 0x04152637;
2590 fun.retval = 0x8C9DAEBF;
2591
2592 t_random (multiplier_N, tr->msize);
2593
2594 for (i = 0; i < NUM_SOURCES; i++)
2595 {
2596 if (! tr->src[i])
2597 continue;
2598
2599 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2600 t_random (s[i].p, SRC_SIZE(i));
2601
2602 switch (tr->data) {
2603 case DATA_NON_ZERO:
2604 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2605 s[i].p[0] = 1;
2606 break;
2607
2608 case DATA_MULTIPLE_DIVISOR:
2609 /* same number of low zero bits as divisor */
2610 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
2611 refmpn_sub_1 (s[i].p, s[i].p, size,
2612 refmpn_mod_1 (s[i].p, size, divisor));
2613 break;
2614
2615 case DATA_GCD:
2616 /* s[1] no more bits than s[0] */
2617 if (i == 1 && size2 == size)
2618 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
2619
2620 /* high limb non-zero */
2621 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
2622
2623 /* odd */
2624 s[i].p[0] |= 1;
2625 break;
2626
2627 case DATA_SRC0_ODD:
2628 if (i == 0)
2629 s[i].p[0] |= 1;
2630 break;
2631
2632 case DATA_SRC1_ODD:
2633 if (i == 1)
2634 s[i].p[0] |= 1;
2635 break;
2636
2637 case DATA_SRC1_HIGHBIT:
2638 if (i == 1)
2639 {
2640 if (tr->size2)
2641 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
2642 else
2643 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2644 }
2645 break;
2646
2647 case DATA_SRC0_HIGHBIT:
2648 if (i == 0)
2649 {
2650 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2651 }
2652 break;
2653
2654 case DATA_UDIV_QRNND:
2655 s[i].p[1] %= divisor;
2656 break;
2657 }
2658
2659 mprotect_region (&s[i].region, PROT_READ);
2660 }
2661
2662 for (i = 0; i < NUM_DESTS; i++)
2663 {
2664 if (! tr->dst[i])
2665 continue;
2666
2667 if (tr->dst0_from_src1 && i==0)
2668 {
2669 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
2670 mp_size_t fill = MAX (0, d[0].size - copy);
2671 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
2672 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
2673 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
2674 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
2675 }
2676 else if (tr->dst_bytes[i])
2677 {
2678 memset (ref.d[i].p, 0xBA, d[i].size);
2679 memset (fun.d[i].p, 0xBA, d[i].size);
2680 }
2681 else
2682 {
2683 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
2684 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
2685 }
2686 }
2687
2688 for (i = 0; i < NUM_SOURCES; i++)
2689 {
2690 if (! tr->src[i])
2691 continue;
2692
2693 if (ref.s[i].p != s[i].p)
2694 {
2695 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
2696 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
2697 }
2698 }
2699
2700 if (option_print)
2701 print_all();
2702
2703 if (tr->validate != NULL)
2704 {
2705 trap_location = TRAP_FUN;
2706 call (&fun, choice->function);
2707 trap_location = TRAP_NOWHERE;
2708
2709 if (! CALLING_CONVENTIONS_CHECK ())
2710 {
2711 print_all();
2712 abort();
2713 }
2714
2715 (*tr->validate) ();
2716 }
2717 else
2718 {
2719 trap_location = TRAP_REF;
2720 call (&ref, tr->reference);
2721 trap_location = TRAP_FUN;
2722 call (&fun, choice->function);
2723 trap_location = TRAP_NOWHERE;
2724
2725 if (! CALLING_CONVENTIONS_CHECK ())
2726 {
2727 print_all();
2728 abort();
2729 }
2730
2731 compare ();
2732 }
2733 }
2734
2735
2736 #define SIZE_ITERATION \
2737 for (size = MAX3 (option_firstsize, \
2738 choice->minsize, \
2739 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
2740 size <= option_lastsize; \
2741 size++)
2742
2743 #define SIZE2_FIRST \
2744 (tr->size2 == SIZE_2 ? 2 \
2745 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
2746 : tr->size2 ? \
2747 MAX (choice->minsize, (option_firstsize2 != 0 \
2748 ? option_firstsize2 : 1)) \
2749 : 0)
2750
2751 #define SIZE2_LAST \
2752 (tr->size2 == SIZE_2 ? 2 \
2753 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
2754 : tr->size2 ? size \
2755 : 0)
2756
2757 #define SIZE2_ITERATION \
2758 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
2759
2760 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
2761 #define ALIGN_ITERATION(w,n,cond) \
2762 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
2763
2764 #define HIGH_LIMIT(cond) ((cond) != 0)
2765 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
2766 #define HIGH_ITERATION(w,n,cond) \
2767 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
2768
2769 #define SHIFT_LIMIT \
2770 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
2771
2772 #define SHIFT_ITERATION \
2773 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
2774
2775
2776 void
2777 try_many (void)
2778 {
2779 int i;
2780
2781 {
2782 unsigned long total = 1;
2783
2784 total *= option_repetitions;
2785 total *= option_lastsize;
2786 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
2787 else if (tr->size2) total *= (option_lastsize+1)/2;
2788
2789 total *= SHIFT_LIMIT;
2790 total *= MULTIPLIER_COUNT;
2791 total *= DIVISOR_COUNT;
2792 total *= CARRY_COUNT;
2793 total *= T_RAND_COUNT;
2794
2795 total *= HIGH_COUNT (tr->dst[0]);
2796 total *= HIGH_COUNT (tr->dst[1]);
2797 total *= HIGH_COUNT (tr->src[0]);
2798 total *= HIGH_COUNT (tr->src[1]);
2799
2800 total *= ALIGN_COUNT (tr->dst[0]);
2801 total *= ALIGN_COUNT (tr->dst[1]);
2802 total *= ALIGN_COUNT (tr->src[0]);
2803 total *= ALIGN_COUNT (tr->src[1]);
2804
2805 total *= OVERLAP_COUNT;
2806
2807 printf ("%s %lu\n", choice->name, total);
2808 }
2809
2810 spinner_count = 0;
2811
2812 for (i = 0; i < option_repetitions; i++)
2813 SIZE_ITERATION
2814 SIZE2_ITERATION
2815
2816 SHIFT_ITERATION
2817 MULTIPLIER_ITERATION
2818 DIVISOR_ITERATION
2819 CARRY_ITERATION /* must be after divisor */
2820 T_RAND_ITERATION
2821
2822 HIGH_ITERATION(d,0, tr->dst[0])
2823 HIGH_ITERATION(d,1, tr->dst[1])
2824 HIGH_ITERATION(s,0, tr->src[0])
2825 HIGH_ITERATION(s,1, tr->src[1])
2826
2827 ALIGN_ITERATION(d,0, tr->dst[0])
2828 ALIGN_ITERATION(d,1, tr->dst[1])
2829 ALIGN_ITERATION(s,0, tr->src[0])
2830 ALIGN_ITERATION(s,1, tr->src[1])
2831
2832 OVERLAP_ITERATION
2833 try_one();
2834
2835 printf("\n");
2836 }
2837
2838
2839 /* Usually print_all() doesn't show much, but it might give a hint as to
2840 where the function was up to when it died. */
2841 void
2842 trap (int sig)
2843 {
2844 const char *name = "noname";
2845
2846 switch (sig) {
2847 case SIGILL: name = "SIGILL"; break;
2848 #ifdef SIGBUS
2849 case SIGBUS: name = "SIGBUS"; break;
2850 #endif
2851 case SIGSEGV: name = "SIGSEGV"; break;
2852 case SIGFPE: name = "SIGFPE"; break;
2853 }
2854
2855 printf ("\n\nSIGNAL TRAP: %s\n", name);
2856
2857 switch (trap_location) {
2858 case TRAP_REF:
2859 printf (" in reference function: %s\n", tr->reference_name);
2860 break;
2861 case TRAP_FUN:
2862 printf (" in test function: %s\n", choice->name);
2863 print_all ();
2864 break;
2865 case TRAP_SETUPS:
2866 printf (" in parameter setups\n");
2867 print_all ();
2868 break;
2869 default:
2870 printf (" somewhere unknown\n");
2871 break;
2872 }
2873 exit (1);
2874 }
2875
2876
2877 void
2878 try_init (void)
2879 {
2880 #if HAVE_GETPAGESIZE
2881 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
2882 know _SC_PAGESIZE. */
2883 pagesize = getpagesize ();
2884 #else
2885 #if HAVE_SYSCONF
2886 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
2887 {
2888 /* According to the linux man page, sysconf doesn't set errno */
2889 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
2890 exit (1);
2891 }
2892 #else
2893 Error, error, cannot get page size
2894 #endif
2895 #endif
2896
2897 printf ("pagesize is 0x%lX bytes\n", pagesize);
2898
2899 signal (SIGILL, trap);
2900 #ifdef SIGBUS
2901 signal (SIGBUS, trap);
2902 #endif
2903 signal (SIGSEGV, trap);
2904 signal (SIGFPE, trap);
2905
2906 {
2907 int i;
2908
2909 for (i = 0; i < NUM_SOURCES; i++)
2910 {
2911 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
2912 printf ("s[%d] %p to %p (0x%lX bytes)\n",
2913 i, (void *) (s[i].region.ptr),
2914 (void *) (s[i].region.ptr + s[i].region.size),
2915 (long) s[i].region.size * BYTES_PER_MP_LIMB);
2916 }
2917
2918 #define INIT_EACH(e,es) \
2919 for (i = 0; i < NUM_DESTS; i++) \
2920 { \
2921 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
2922 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
2923 es, i, (void *) (e.d[i].region.ptr), \
2924 (void *) (e.d[i].region.ptr + e.d[i].region.size), \
2925 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \
2926 }
2927
2928 INIT_EACH(ref, "ref");
2929 INIT_EACH(fun, "fun");
2930 }
2931 }
2932
2933 int
2934 strmatch_wild (const char *pattern, const char *str)
2935 {
2936 size_t plen, slen;
2937
2938 /* wildcard at start */
2939 if (pattern[0] == '*')
2940 {
2941 pattern++;
2942 plen = strlen (pattern);
2943 slen = strlen (str);
2944 return (plen == 0
2945 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
2946 }
2947
2948 /* wildcard at end */
2949 plen = strlen (pattern);
2950 if (plen >= 1 && pattern[plen-1] == '*')
2951 return (memcmp (pattern, str, plen-1) == 0);
2952
2953 /* no wildcards */
2954 return (strcmp (pattern, str) == 0);
2955 }
2956
2957 void
2958 try_name (const char *name)
2959 {
2960 int found = 0;
2961 int i;
2962
2963 for (i = 0; i < numberof (choice_array); i++)
2964 {
2965 if (strmatch_wild (name, choice_array[i].name))
2966 {
2967 choice = &choice_array[i];
2968 tr = ¶m[choice->type];
2969 try_many ();
2970 found = 1;
2971 }
2972 }
2973
2974 if (!found)
2975 {
2976 printf ("%s unknown\n", name);
2977 /* exit (1); */
2978 }
2979 }
2980
2981
2982 void
2983 usage (const char *prog)
2984 {
2985 int col = 0;
2986 int i;
2987
2988 printf ("Usage: %s [options] function...\n", prog);
2989 printf (" -1 use limb data 1,2,3,etc\n");
2990 printf (" -9 use limb data all 0xFF..FFs\n");
2991 printf (" -a zeros use limb data all zeros\n");
2992 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
2993 printf (" -a 2fd use data 0x2FFF...FFFD\n");
2994 printf (" -p print each case tried (try this if seg faulting)\n");
2995 printf (" -R seed random numbers from time()\n");
2996 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
2997 printf (" -s size starting size to test\n");
2998 printf (" -S size2 starting size2 to test\n");
2999 printf (" -s s1-s2 range of sizes to test\n");
3000 printf (" -W don't show the spinner (use this in gdb)\n");
3001 printf (" -z disable mprotect() redzones\n");
3002 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3003 printf ("\n");
3004 printf ("Functions that can be tested:\n");
3005
3006 for (i = 0; i < numberof (choice_array); i++)
3007 {
3008 if (col + 1 + strlen (choice_array[i].name) > 79)
3009 {
3010 printf ("\n");
3011 col = 0;
3012 }
3013 printf (" %s", choice_array[i].name);
3014 col += 1 + strlen (choice_array[i].name);
3015 }
3016 printf ("\n");
3017
3018 exit(1);
3019 }
3020
3021
3022 int
3023 main (int argc, char *argv[])
3024 {
3025 int i;
3026
3027 /* unbuffered output */
3028 setbuf (stdout, NULL);
3029 setbuf (stderr, NULL);
3030
3031 /* default trace in hex, and in upper-case so can paste into bc */
3032 mp_trace_base = -16;
3033
3034 param_init ();
3035
3036 {
3037 unsigned long seed = 123;
3038 int opt;
3039
3040 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3041 {
3042 switch (opt) {
3043 case '1':
3044 /* use limb data values 1, 2, 3, ... etc */
3045 option_data = DATA_SEQ;
3046 break;
3047 case '9':
3048 /* use limb data values 0xFFF...FFF always */
3049 option_data = DATA_FFS;
3050 break;
3051 case 'a':
3052 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3053 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3054 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3055 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3056 else
3057 {
3058 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3059 exit (1);
3060 }
3061 break;
3062 case 'b':
3063 mp_trace_base = atoi (optarg);
3064 break;
3065 case 'E':
3066 /* re-seed */
3067 sscanf (optarg, "%lu", &seed);
3068 printf ("Re-seeding with %lu\n", seed);
3069 break;
3070 case 'p':
3071 option_print = 1;
3072 break;
3073 case 'R':
3074 /* randomize */
3075 seed = time (NULL);
3076 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3077 break;
3078 case 'r':
3079 option_repetitions = atoi (optarg);
3080 break;
3081 case 's':
3082 {
3083 char *p;
3084 option_firstsize = strtol (optarg, 0, 0);
3085 if ((p = strchr (optarg, '-')) != NULL)
3086 option_lastsize = strtol (p+1, 0, 0);
3087 }
3088 break;
3089 case 'S':
3090 /* -S <size> sets the starting size for the second of a two size
3091 routine (like mpn_mul_basecase) */
3092 option_firstsize2 = strtol (optarg, 0, 0);
3093 break;
3094 case 'W':
3095 /* use this when running in the debugger */
3096 option_spinner = 0;
3097 break;
3098 case 'z':
3099 /* disable redzones */
3100 option_redzones = 0;
3101 break;
3102 case '?':
3103 usage (argv[0]);
3104 break;
3105 }
3106 }
3107
3108 gmp_randinit_default (__gmp_rands);
3109 __gmp_rands_initialized = 1;
3110 gmp_randseed_ui (__gmp_rands, seed);
3111 }
3112
3113 try_init();
3114
3115 if (argc <= optind)
3116 usage (argv[0]);
3117
3118 for (i = optind; i < argc; i++)
3119 try_name (argv[i]);
3120
3121 return 0;
3122 }
3123