try.c revision 1.1.1.3 1 /* Run some tests on various mpn routines.
2
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6 Copyright 2000-2006, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
7
8 This file is part of the GNU MP Library test suite.
9
10 The GNU MP Library test suite is free software; you can redistribute it
11 and/or modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 3 of the License,
13 or (at your option) any later version.
14
15 The GNU MP Library test suite is distributed in the hope that it will be
16 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
18 Public License for more details.
19
20 You should have received a copy of the GNU General Public License along with
21 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
22
23
24 /* Usage: try [options] <function>...
25
26 For example, "./try mpn_add_n" to run tests of that function.
27
28 Combinations of alignments and overlaps are tested, with redzones above
29 or below the destinations, and with the sources write-protected.
30
31 The number of tests performed becomes ridiculously large with all the
32 combinations, and for that reason this can't be a part of a "make check",
33 it's meant only for development. The code isn't very pretty either.
34
35 During development it can help to disable the redzones, since seeing the
36 rest of the destination written can show where the wrong part is, or if
37 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
38 fill (see below) will show locations never written.
39
40 The -s option can be used to test only certain size operands, which is
41 useful if some new code doesn't yet support say sizes less than the
42 unrolling, or whatever.
43
44 When a problem occurs it'll of course be necessary to run the program
45 under gdb to find out quite where, how and why it's going wrong. Disable
46 the spinner with the -W option when doing this, or single stepping won't
47 work. Using the "-1" option to run with simple data can be useful.
48
49 New functions to test can be added in try_array[]. If a new TYPE is
50 required then add it to the existing constants, set up its parameters in
51 param_init(), and add it to the call() function. Extra parameter fields
52 can be added if necessary, or further interpretations given to existing
53 fields.
54
55
56 Portability:
57
58 This program is not designed for use on Cray vector systems under Unicos,
59 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
60 don't really have pages or mprotect. We could arrange to run the tests
61 without the redzones, but we haven't bothered currently.
62
63
64 Enhancements:
65
66 umul_ppmm support is not very good, lots of source data is generated
67 whereas only two limbs are needed.
68
69 Make a little scheme for interpreting the "SIZE" selections uniformly.
70
71 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
72 source limbs. Possibly increase the default repetitions in that case.
73
74 Automatically detect gdb and disable the spinner (use -W for now).
75
76 Make a way to re-run a failing case in the debugger. Have an option to
77 snapshot each test case before it's run so the data is available if a
78 segv occurs. (This should be more reliable than the current print_all()
79 in the signal handler.)
80
81 When alignment means a dst isn't hard against the redzone, check the
82 space in between remains unchanged.
83
84 When a source overlaps a destination, don't run both s[i].high 0 and 1,
85 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
86
87 When partial overlaps aren't done, don't loop over source alignments
88 during overlaps.
89
90 Try to make the looping code a bit less horrible. Right now it's pretty
91 hard to see what iterations are actually done.
92
93 Perhaps specific setups and loops for each style of function under test
94 would be clearer than a parameterized general loop. There's lots of
95 stuff common to all functions, but the exceptions get messy.
96
97 When there's no overlap, run with both src>dst and src<dst. A subtle
98 calling-conventions violation occurred in a P6 copy which depended on the
99 relative location of src and dst.
100
101 multiplier_N is more or less a third source region for the addmul_N
102 routines, and could be done with the redzoned region scheme.
103
104 */
105
106
107 /* always do assertion checking */
108 #define WANT_ASSERT 1
109
110 #include "config.h"
111
112 #include <errno.h>
113 #include <limits.h>
114 #include <signal.h>
115 #include <stdio.h>
116 #include <stdlib.h>
117 #include <string.h>
118 #include <time.h>
119
120 #if HAVE_UNISTD_H
121 #include <unistd.h>
122 #endif
123
124 #if HAVE_SYS_MMAN_H
125 #include <sys/mman.h>
126 #endif
127
128 #include "gmp.h"
129 #include "gmp-impl.h"
130 #include "longlong.h"
131 #include "tests.h"
132
133
134 #if !HAVE_DECL_OPTARG
135 extern char *optarg;
136 extern int optind, opterr;
137 #endif
138
139 #if ! HAVE_DECL_SYS_NERR
140 extern int sys_nerr;
141 #endif
142
143 #if ! HAVE_DECL_SYS_ERRLIST
144 extern char *sys_errlist[];
145 #endif
146
147 #if ! HAVE_STRERROR
148 char *
149 strerror (int n)
150 {
151 if (n < 0 || n >= sys_nerr)
152 return "errno out of range";
153 else
154 return sys_errlist[n];
155 }
156 #endif
157
158 /* Rumour has it some systems lack a define of PROT_NONE. */
159 #ifndef PROT_NONE
160 #define PROT_NONE 0
161 #endif
162
163 /* Dummy defines for when mprotect doesn't exist. */
164 #ifndef PROT_READ
165 #define PROT_READ 0
166 #endif
167 #ifndef PROT_WRITE
168 #define PROT_WRITE 0
169 #endif
170
171 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
172 _SC_PAGE_SIZE instead. */
173 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
174 #define _SC_PAGESIZE _SC_PAGE_SIZE
175 #endif
176
177
178 #ifdef EXTRA_PROTOS
179 EXTRA_PROTOS
180 #endif
181 #ifdef EXTRA_PROTOS2
182 EXTRA_PROTOS2
183 #endif
184
185
186 #define DEFAULT_REPETITIONS 10
187
188 int option_repetitions = DEFAULT_REPETITIONS;
189 int option_spinner = 1;
190 int option_redzones = 1;
191 int option_firstsize = 0;
192 int option_lastsize = 500;
193 int option_firstsize2 = 0;
194
195 #define ALIGNMENTS 4
196 #define OVERLAPS 4
197 #define CARRY_RANDOMS 5
198 #define MULTIPLIER_RANDOMS 5
199 #define DIVISOR_RANDOMS 5
200 #define FRACTION_COUNT 4
201
202 int option_print = 0;
203
204 #define DATA_TRAND 0
205 #define DATA_ZEROS 1
206 #define DATA_SEQ 2
207 #define DATA_FFS 3
208 #define DATA_2FD 4
209 int option_data = DATA_TRAND;
210
211
212 mp_size_t pagesize;
213 #define PAGESIZE_LIMBS (pagesize / GMP_LIMB_BYTES)
214
215 /* must be a multiple of the page size */
216 #define REDZONE_BYTES (pagesize * 16)
217 #define REDZONE_LIMBS (REDZONE_BYTES / GMP_LIMB_BYTES)
218
219
220 #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
221
222 #if GMP_LIMB_BITS == 32
223 #define DEADVAL CNST_LIMB(0xDEADBEEF)
224 #else
225 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
226 #endif
227
228
229 struct region_t {
230 mp_ptr ptr;
231 mp_size_t size;
232 };
233
234
235 #define TRAP_NOWHERE 0
236 #define TRAP_REF 1
237 #define TRAP_FUN 2
238 #define TRAP_SETUPS 3
239 int trap_location = TRAP_NOWHERE;
240
241
242 #define NUM_SOURCES 5
243 #define NUM_DESTS 2
244
245 struct source_t {
246 struct region_t region;
247 int high;
248 mp_size_t align;
249 mp_ptr p;
250 };
251
252 struct source_t s[NUM_SOURCES];
253
254 struct dest_t {
255 int high;
256 mp_size_t align;
257 mp_size_t size;
258 };
259
260 struct dest_t d[NUM_DESTS];
261
262 struct source_each_t {
263 mp_ptr p;
264 };
265
266 struct dest_each_t {
267 struct region_t region;
268 mp_ptr p;
269 };
270
271 mp_size_t size;
272 mp_size_t size2;
273 unsigned long shift;
274 mp_limb_t carry;
275 mp_limb_t divisor;
276 mp_limb_t multiplier;
277 mp_limb_t multiplier_N[8];
278
279 struct each_t {
280 const char *name;
281 struct dest_each_t d[NUM_DESTS];
282 struct source_each_t s[NUM_SOURCES];
283 mp_limb_t retval;
284 };
285
286 struct each_t ref = { "Ref" };
287 struct each_t fun = { "Fun" };
288
289 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
290
291 void validate_fail (void);
292
293
294 #if HAVE_TRY_NEW_C
295 #include "try-new.c"
296 #endif
297
298
299 typedef mp_limb_t (*tryfun_t) (ANYARGS);
300
301 struct try_t {
302 char retval;
303
304 char src[NUM_SOURCES];
305 char dst[NUM_DESTS];
306
307 #define SIZE_YES 1
308 #define SIZE_ALLOW_ZERO 2
309 #define SIZE_1 3 /* 1 limb */
310 #define SIZE_2 4 /* 2 limbs */
311 #define SIZE_3 5 /* 3 limbs */
312 #define SIZE_4 6 /* 4 limbs */
313 #define SIZE_6 7 /* 6 limbs */
314 #define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */
315 #define SIZE_SIZE2 9
316 #define SIZE_PLUS_1 10
317 #define SIZE_SUM 11
318 #define SIZE_DIFF 12
319 #define SIZE_DIFF_PLUS_1 13
320 #define SIZE_DIFF_PLUS_3 14
321 #define SIZE_RETVAL 15
322 #define SIZE_CEIL_HALF 16
323 #define SIZE_GET_STR 17
324 #define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */
325 #define SIZE_ODD 19
326 char size;
327 char size2;
328 char dst_size[NUM_DESTS];
329
330 /* multiplier_N size in limbs */
331 mp_size_t msize;
332
333 char dst_bytes[NUM_DESTS];
334
335 char dst0_from_src1;
336
337 #define CARRY_BIT 1 /* single bit 0 or 1 */
338 #define CARRY_3 2 /* 0, 1, 2 */
339 #define CARRY_4 3 /* 0 to 3 */
340 #define CARRY_LIMB 4 /* any limb value */
341 #define CARRY_DIVISOR 5 /* carry<divisor */
342 char carry;
343
344 /* a fudge to tell the output when to print negatives */
345 char carry_sign;
346
347 char multiplier;
348 char shift;
349
350 #define DIVISOR_LIMB 1
351 #define DIVISOR_NORM 2
352 #define DIVISOR_ODD 3
353 char divisor;
354
355 #define DATA_NON_ZERO 1
356 #define DATA_GCD 2
357 #define DATA_SRC0_ODD 3
358 #define DATA_SRC0_HIGHBIT 4
359 #define DATA_SRC1_ODD 5
360 #define DATA_SRC1_ODD_PRIME 6
361 #define DATA_SRC1_HIGHBIT 7
362 #define DATA_MULTIPLE_DIVISOR 8
363 #define DATA_UDIV_QRNND 9
364 #define DATA_DIV_QR_1 10
365 char data;
366
367 /* Default is allow full overlap. */
368 #define OVERLAP_NONE 1
369 #define OVERLAP_LOW_TO_HIGH 2
370 #define OVERLAP_HIGH_TO_LOW 3
371 #define OVERLAP_NOT_SRCS 4
372 #define OVERLAP_NOT_SRC2 8
373 #define OVERLAP_NOT_DST2 16
374 char overlap;
375
376 tryfun_t reference;
377 const char *reference_name;
378
379 void (*validate) (void);
380 const char *validate_name;
381 };
382
383 struct try_t *tr;
384
385
386 void
387 validate_mod_34lsub1 (void)
388 {
389 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
390
391 mp_srcptr ptr = s[0].p;
392 int error = 0;
393 mp_limb_t got, got_mod, want, want_mod;
394
395 ASSERT (size >= 1);
396
397 got = fun.retval;
398 got_mod = got % CNST_34LSUB1;
399
400 want = refmpn_mod_34lsub1 (ptr, size);
401 want_mod = want % CNST_34LSUB1;
402
403 if (got_mod != want_mod)
404 {
405 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
406 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
407 error = 1;
408 }
409
410 if (error)
411 validate_fail ();
412 }
413
414 void
415 validate_divexact_1 (void)
416 {
417 mp_srcptr src = s[0].p;
418 mp_srcptr dst = fun.d[0].p;
419 int error = 0;
420
421 ASSERT (size >= 1);
422
423 {
424 mp_ptr tp = refmpn_malloc_limbs (size);
425 mp_limb_t rem;
426
427 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
428 if (rem != 0)
429 {
430 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
431 error = 1;
432 }
433 if (! refmpn_equal_anynail (tp, dst, size))
434 {
435 printf ("Quotient a/d wrong\n");
436 mpn_trace ("fun ", dst, size);
437 mpn_trace ("want", tp, size);
438 error = 1;
439 }
440 free (tp);
441 }
442
443 if (error)
444 validate_fail ();
445 }
446
447 void
448 validate_bdiv_q_1
449 (void)
450 {
451 mp_srcptr src = s[0].p;
452 mp_srcptr dst = fun.d[0].p;
453 int error = 0;
454
455 ASSERT (size >= 1);
456
457 {
458 mp_ptr tp = refmpn_malloc_limbs (size + 1);
459
460 refmpn_mul_1 (tp, dst, size, divisor);
461 /* Set ignored low bits */
462 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
463 if (! refmpn_equal_anynail (tp, src, size))
464 {
465 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
466 mpn_trace ("res ", dst, size);
467 mpn_trace ("src ", src, size);
468 error = 1;
469 }
470 free (tp);
471 }
472
473 if (error)
474 validate_fail ();
475 }
476
477
478 void
479 validate_modexact_1c_odd (void)
480 {
481 mp_srcptr ptr = s[0].p;
482 mp_limb_t r = fun.retval;
483 int error = 0;
484
485 ASSERT (size >= 1);
486 ASSERT (divisor & 1);
487
488 if ((r & GMP_NAIL_MASK) != 0)
489 printf ("r has non-zero nail\n");
490
491 if (carry < divisor)
492 {
493 if (! (r < divisor))
494 {
495 printf ("Don't have r < divisor\n");
496 error = 1;
497 }
498 }
499 else /* carry >= divisor */
500 {
501 if (! (r <= divisor))
502 {
503 printf ("Don't have r <= divisor\n");
504 error = 1;
505 }
506 }
507
508 {
509 mp_limb_t c = carry % divisor;
510 mp_ptr tp = refmpn_malloc_limbs (size+1);
511 mp_size_t k;
512
513 for (k = size-1; k <= size; k++)
514 {
515 /* set {tp,size+1} to r*b^k + a - c */
516 refmpn_copyi (tp, ptr, size);
517 tp[size] = 0;
518 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
519 if (refmpn_sub_1 (tp, tp, size+1, c))
520 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
521
522 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
523 goto good_remainder;
524 }
525 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
526 error = 1;
527
528 good_remainder:
529 free (tp);
530 }
531
532 if (error)
533 validate_fail ();
534 }
535
536 void
537 validate_modexact_1_odd (void)
538 {
539 carry = 0;
540 validate_modexact_1c_odd ();
541 }
542
543 void
544 validate_div_qr_1_pi1 (void)
545 {
546 mp_srcptr up = ref.s[0].p;
547 mp_size_t un = size;
548 mp_size_t uh = ref.s[1].p[0];
549 mp_srcptr qp = fun.d[0].p;
550 mp_limb_t r = fun.retval;
551 mp_limb_t cy;
552 int cmp;
553 mp_ptr tp;
554 if (r >= divisor)
555 {
556 gmp_printf ("Bad remainder %Md, d = %Md\n", r, divisor);
557 validate_fail ();
558 }
559 tp = refmpn_malloc_limbs (un);
560 cy = refmpn_mul_1 (tp, qp, un, divisor);
561 cy += refmpn_add_1 (tp, tp, un, r);
562 if (cy != uh || refmpn_cmp (tp, up, un) != 0)
563 {
564 gmp_printf ("Incorrect result, size %ld.\n"
565 "d = %Mx, u = %Mx, %Nx\n"
566 "got: r = %Mx, q = %Nx\n"
567 "q d + r = %Mx, %Nx",
568 (long) un,
569 divisor, uh, up, un,
570 r, qp, un,
571 cy, tp, un);
572 validate_fail ();
573 }
574 free (tp);
575 }
576
577
578 void
579 validate_sqrtrem (void)
580 {
581 mp_srcptr orig_ptr = s[0].p;
582 mp_size_t orig_size = size;
583 mp_size_t root_size = (size+1)/2;
584 mp_srcptr root_ptr = fun.d[0].p;
585 mp_size_t rem_size = fun.retval;
586 mp_srcptr rem_ptr = fun.d[1].p;
587 mp_size_t prod_size = 2*root_size;
588 mp_ptr p;
589 int error = 0;
590
591 if (rem_size < 0 || rem_size > size)
592 {
593 printf ("Bad remainder size retval %ld\n", (long) rem_size);
594 validate_fail ();
595 }
596
597 p = refmpn_malloc_limbs (prod_size);
598
599 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
600 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
601 {
602 printf ("Remainder bigger than 2*root\n");
603 error = 1;
604 }
605
606 refmpn_sqr (p, root_ptr, root_size);
607 if (rem_size != 0)
608 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
609 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
610 {
611 printf ("root^2+rem != original\n");
612 mpn_trace ("prod", p, prod_size);
613 error = 1;
614 }
615 free (p);
616
617 if (error)
618 validate_fail ();
619 }
620
621 void
622 validate_sqrt (void)
623 {
624 mp_srcptr orig_ptr = s[0].p;
625 mp_size_t orig_size = size;
626 mp_size_t root_size = (size+1)/2;
627 mp_srcptr root_ptr = fun.d[0].p;
628 int perf_pow = (fun.retval == 0);
629 mp_size_t prod_size = 2*root_size;
630 mp_ptr p;
631 int error = 0;
632
633 p = refmpn_malloc_limbs (prod_size);
634
635 refmpn_sqr (p, root_ptr, root_size);
636 MPN_NORMALIZE (p, prod_size);
637 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != - !perf_pow)
638 {
639 printf ("root^2 bigger than original, or wrong return value.\n");
640 mpn_trace ("prod...", p, prod_size);
641 error = 1;
642 }
643
644 refmpn_sub (p, orig_ptr,orig_size, p,prod_size);
645 MPN_NORMALIZE (p, prod_size);
646 if (prod_size >= root_size &&
647 refmpn_sub (p, p,prod_size, root_ptr, root_size) == 0 &&
648 refmpn_cmp_twosizes (p, prod_size, root_ptr, root_size) > 0)
649 {
650 printf ("(root+1)^2 smaller than original.\n");
651 mpn_trace ("prod", p, prod_size);
652 error = 1;
653 }
654 free (p);
655
656 if (error)
657 validate_fail ();
658 }
659
660
661 /* These types are indexes into the param[] array and are arbitrary so long
662 as they're all distinct and within the size of param[]. Renumber
663 whenever necessary or desired. */
664
665 enum {
666 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
667
668 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
669 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
670
671 TYPE_MUL_1, TYPE_MUL_1C,
672
673 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
674
675 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
676
677 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
678 TYPE_ADDMUL_7, TYPE_ADDMUL_8,
679
680 TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
681
682 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
683
684 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
685
686 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
687 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
688 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
689 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
690 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
691 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
692 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
693
694 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
695 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
696 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
697
698 TYPE_ADDCND_N, TYPE_SUBCND_N,
699
700 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
701 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
702 TYPE_DIV_QR_1N_PI1,
703 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
704
705 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
706 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
707
708 TYPE_INVERT, TYPE_BINVERT,
709
710 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
711 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
712 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
713
714 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
715 TYPE_XOR_N, TYPE_XNOR_N,
716
717 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
718 TYPE_MULLO_N, TYPE_SQRLO, TYPE_MULMID_MN, TYPE_MULMID_N,
719
720 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
721
722 TYPE_SQRTREM, TYPE_SQRT, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
723
724 TYPE_EXTRA
725 };
726
727 struct try_t param[TYPE_EXTRA];
728
729
730 void
731 param_init (void)
732 {
733 struct try_t *p;
734
735 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
736
737 #define REFERENCE(fun) \
738 p->reference = (tryfun_t) fun; \
739 p->reference_name = #fun
740 #define VALIDATE(fun) \
741 p->validate = fun; \
742 p->validate_name = #fun
743
744
745 p = ¶m[TYPE_ADD_N];
746 p->retval = 1;
747 p->dst[0] = 1;
748 p->src[0] = 1;
749 p->src[1] = 1;
750 REFERENCE (refmpn_add_n);
751
752 p = ¶m[TYPE_ADD_NC];
753 COPY (TYPE_ADD_N);
754 p->carry = CARRY_BIT;
755 REFERENCE (refmpn_add_nc);
756
757 p = ¶m[TYPE_SUB_N];
758 COPY (TYPE_ADD_N);
759 REFERENCE (refmpn_sub_n);
760
761 p = ¶m[TYPE_SUB_NC];
762 COPY (TYPE_ADD_NC);
763 REFERENCE (refmpn_sub_nc);
764
765 p = ¶m[TYPE_ADD];
766 COPY (TYPE_ADD_N);
767 p->size = SIZE_ALLOW_ZERO;
768 p->size2 = 1;
769 REFERENCE (refmpn_add);
770
771 p = ¶m[TYPE_SUB];
772 COPY (TYPE_ADD);
773 REFERENCE (refmpn_sub);
774
775
776 p = ¶m[TYPE_ADD_ERR1_N];
777 p->retval = 1;
778 p->dst[0] = 1;
779 p->dst[1] = 1;
780 p->src[0] = 1;
781 p->src[1] = 1;
782 p->src[2] = 1;
783 p->dst_size[1] = SIZE_2;
784 p->carry = CARRY_BIT;
785 p->overlap = OVERLAP_NOT_DST2;
786 REFERENCE (refmpn_add_err1_n);
787
788 p = ¶m[TYPE_SUB_ERR1_N];
789 COPY (TYPE_ADD_ERR1_N);
790 REFERENCE (refmpn_sub_err1_n);
791
792 p = ¶m[TYPE_ADD_ERR2_N];
793 COPY (TYPE_ADD_ERR1_N);
794 p->src[3] = 1;
795 p->dst_size[1] = SIZE_4;
796 REFERENCE (refmpn_add_err2_n);
797
798 p = ¶m[TYPE_SUB_ERR2_N];
799 COPY (TYPE_ADD_ERR2_N);
800 REFERENCE (refmpn_sub_err2_n);
801
802 p = ¶m[TYPE_ADD_ERR3_N];
803 COPY (TYPE_ADD_ERR2_N);
804 p->src[4] = 1;
805 p->dst_size[1] = SIZE_6;
806 REFERENCE (refmpn_add_err3_n);
807
808 p = ¶m[TYPE_SUB_ERR3_N];
809 COPY (TYPE_ADD_ERR3_N);
810 REFERENCE (refmpn_sub_err3_n);
811
812 p = ¶m[TYPE_ADDCND_N];
813 COPY (TYPE_ADD_N);
814 p->carry = CARRY_BIT;
815 REFERENCE (refmpn_cnd_add_n);
816
817 p = ¶m[TYPE_SUBCND_N];
818 COPY (TYPE_ADD_N);
819 p->carry = CARRY_BIT;
820 REFERENCE (refmpn_cnd_sub_n);
821
822
823 p = ¶m[TYPE_MUL_1];
824 p->retval = 1;
825 p->dst[0] = 1;
826 p->src[0] = 1;
827 p->multiplier = 1;
828 p->overlap = OVERLAP_LOW_TO_HIGH;
829 REFERENCE (refmpn_mul_1);
830
831 p = ¶m[TYPE_MUL_1C];
832 COPY (TYPE_MUL_1);
833 p->carry = CARRY_LIMB;
834 REFERENCE (refmpn_mul_1c);
835
836
837 p = ¶m[TYPE_MUL_2];
838 p->retval = 1;
839 p->dst[0] = 1;
840 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
841 p->src[0] = 1;
842 p->src[1] = 1;
843 p->msize = 2;
844 p->overlap = OVERLAP_NOT_SRC2;
845 REFERENCE (refmpn_mul_2);
846
847 p = ¶m[TYPE_MUL_3];
848 COPY (TYPE_MUL_2);
849 p->msize = 3;
850 REFERENCE (refmpn_mul_3);
851
852 p = ¶m[TYPE_MUL_4];
853 COPY (TYPE_MUL_2);
854 p->msize = 4;
855 REFERENCE (refmpn_mul_4);
856
857 p = ¶m[TYPE_MUL_5];
858 COPY (TYPE_MUL_2);
859 p->msize = 5;
860 REFERENCE (refmpn_mul_5);
861
862 p = ¶m[TYPE_MUL_6];
863 COPY (TYPE_MUL_2);
864 p->msize = 6;
865 REFERENCE (refmpn_mul_6);
866
867
868 p = ¶m[TYPE_ADDMUL_1];
869 p->retval = 1;
870 p->dst[0] = 1;
871 p->src[0] = 1;
872 p->multiplier = 1;
873 p->dst0_from_src1 = 1;
874 REFERENCE (refmpn_addmul_1);
875
876 p = ¶m[TYPE_ADDMUL_1C];
877 COPY (TYPE_ADDMUL_1);
878 p->carry = CARRY_LIMB;
879 REFERENCE (refmpn_addmul_1c);
880
881 p = ¶m[TYPE_SUBMUL_1];
882 COPY (TYPE_ADDMUL_1);
883 REFERENCE (refmpn_submul_1);
884
885 p = ¶m[TYPE_SUBMUL_1C];
886 COPY (TYPE_ADDMUL_1C);
887 REFERENCE (refmpn_submul_1c);
888
889
890 p = ¶m[TYPE_ADDMUL_2];
891 p->retval = 1;
892 p->dst[0] = 1;
893 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
894 p->src[0] = 1;
895 p->src[1] = 1;
896 p->msize = 2;
897 p->dst0_from_src1 = 1;
898 p->overlap = OVERLAP_NONE;
899 REFERENCE (refmpn_addmul_2);
900
901 p = ¶m[TYPE_ADDMUL_3];
902 COPY (TYPE_ADDMUL_2);
903 p->msize = 3;
904 REFERENCE (refmpn_addmul_3);
905
906 p = ¶m[TYPE_ADDMUL_4];
907 COPY (TYPE_ADDMUL_2);
908 p->msize = 4;
909 REFERENCE (refmpn_addmul_4);
910
911 p = ¶m[TYPE_ADDMUL_5];
912 COPY (TYPE_ADDMUL_2);
913 p->msize = 5;
914 REFERENCE (refmpn_addmul_5);
915
916 p = ¶m[TYPE_ADDMUL_6];
917 COPY (TYPE_ADDMUL_2);
918 p->msize = 6;
919 REFERENCE (refmpn_addmul_6);
920
921 p = ¶m[TYPE_ADDMUL_7];
922 COPY (TYPE_ADDMUL_2);
923 p->msize = 7;
924 REFERENCE (refmpn_addmul_7);
925
926 p = ¶m[TYPE_ADDMUL_8];
927 COPY (TYPE_ADDMUL_2);
928 p->msize = 8;
929 REFERENCE (refmpn_addmul_8);
930
931
932 p = ¶m[TYPE_AND_N];
933 p->dst[0] = 1;
934 p->src[0] = 1;
935 p->src[1] = 1;
936 REFERENCE (refmpn_and_n);
937
938 p = ¶m[TYPE_ANDN_N];
939 COPY (TYPE_AND_N);
940 REFERENCE (refmpn_andn_n);
941
942 p = ¶m[TYPE_NAND_N];
943 COPY (TYPE_AND_N);
944 REFERENCE (refmpn_nand_n);
945
946 p = ¶m[TYPE_IOR_N];
947 COPY (TYPE_AND_N);
948 REFERENCE (refmpn_ior_n);
949
950 p = ¶m[TYPE_IORN_N];
951 COPY (TYPE_AND_N);
952 REFERENCE (refmpn_iorn_n);
953
954 p = ¶m[TYPE_NIOR_N];
955 COPY (TYPE_AND_N);
956 REFERENCE (refmpn_nior_n);
957
958 p = ¶m[TYPE_XOR_N];
959 COPY (TYPE_AND_N);
960 REFERENCE (refmpn_xor_n);
961
962 p = ¶m[TYPE_XNOR_N];
963 COPY (TYPE_AND_N);
964 REFERENCE (refmpn_xnor_n);
965
966
967 p = ¶m[TYPE_ADDSUB_N];
968 p->retval = 1;
969 p->dst[0] = 1;
970 p->dst[1] = 1;
971 p->src[0] = 1;
972 p->src[1] = 1;
973 REFERENCE (refmpn_add_n_sub_n);
974
975 p = ¶m[TYPE_ADDSUB_NC];
976 COPY (TYPE_ADDSUB_N);
977 p->carry = CARRY_4;
978 REFERENCE (refmpn_add_n_sub_nc);
979
980
981 p = ¶m[TYPE_COPY];
982 p->dst[0] = 1;
983 p->src[0] = 1;
984 p->overlap = OVERLAP_NONE;
985 p->size = SIZE_ALLOW_ZERO;
986 REFERENCE (refmpn_copy);
987
988 p = ¶m[TYPE_COPYI];
989 p->dst[0] = 1;
990 p->src[0] = 1;
991 p->overlap = OVERLAP_LOW_TO_HIGH;
992 p->size = SIZE_ALLOW_ZERO;
993 REFERENCE (refmpn_copyi);
994
995 p = ¶m[TYPE_COPYD];
996 p->dst[0] = 1;
997 p->src[0] = 1;
998 p->overlap = OVERLAP_HIGH_TO_LOW;
999 p->size = SIZE_ALLOW_ZERO;
1000 REFERENCE (refmpn_copyd);
1001
1002 p = ¶m[TYPE_COM];
1003 p->dst[0] = 1;
1004 p->src[0] = 1;
1005 REFERENCE (refmpn_com);
1006
1007
1008 p = ¶m[TYPE_ADDLSH1_N];
1009 COPY (TYPE_ADD_N);
1010 REFERENCE (refmpn_addlsh1_n);
1011
1012 p = ¶m[TYPE_ADDLSH2_N];
1013 COPY (TYPE_ADD_N);
1014 REFERENCE (refmpn_addlsh2_n);
1015
1016 p = ¶m[TYPE_ADDLSH_N];
1017 COPY (TYPE_ADD_N);
1018 p->shift = 1;
1019 REFERENCE (refmpn_addlsh_n);
1020
1021 p = ¶m[TYPE_ADDLSH1_N_IP1];
1022 p->retval = 1;
1023 p->dst[0] = 1;
1024 p->src[0] = 1;
1025 p->dst0_from_src1 = 1;
1026 REFERENCE (refmpn_addlsh1_n_ip1);
1027
1028 p = ¶m[TYPE_ADDLSH2_N_IP1];
1029 COPY (TYPE_ADDLSH1_N_IP1);
1030 REFERENCE (refmpn_addlsh2_n_ip1);
1031
1032 p = ¶m[TYPE_ADDLSH_N_IP1];
1033 COPY (TYPE_ADDLSH1_N_IP1);
1034 p->shift = 1;
1035 REFERENCE (refmpn_addlsh_n_ip1);
1036
1037 p = ¶m[TYPE_ADDLSH1_N_IP2];
1038 COPY (TYPE_ADDLSH1_N_IP1);
1039 REFERENCE (refmpn_addlsh1_n_ip2);
1040
1041 p = ¶m[TYPE_ADDLSH2_N_IP2];
1042 COPY (TYPE_ADDLSH1_N_IP1);
1043 REFERENCE (refmpn_addlsh2_n_ip2);
1044
1045 p = ¶m[TYPE_ADDLSH_N_IP2];
1046 COPY (TYPE_ADDLSH_N_IP1);
1047 REFERENCE (refmpn_addlsh_n_ip2);
1048
1049 p = ¶m[TYPE_SUBLSH1_N];
1050 COPY (TYPE_ADD_N);
1051 REFERENCE (refmpn_sublsh1_n);
1052
1053 p = ¶m[TYPE_SUBLSH2_N];
1054 COPY (TYPE_ADD_N);
1055 REFERENCE (refmpn_sublsh2_n);
1056
1057 p = ¶m[TYPE_SUBLSH_N];
1058 COPY (TYPE_ADDLSH_N);
1059 REFERENCE (refmpn_sublsh_n);
1060
1061 p = ¶m[TYPE_SUBLSH1_N_IP1];
1062 COPY (TYPE_ADDLSH1_N_IP1);
1063 REFERENCE (refmpn_sublsh1_n_ip1);
1064
1065 p = ¶m[TYPE_SUBLSH2_N_IP1];
1066 COPY (TYPE_ADDLSH1_N_IP1);
1067 REFERENCE (refmpn_sublsh2_n_ip1);
1068
1069 p = ¶m[TYPE_SUBLSH_N_IP1];
1070 COPY (TYPE_ADDLSH_N_IP1);
1071 REFERENCE (refmpn_sublsh_n_ip1);
1072
1073 p = ¶m[TYPE_RSBLSH1_N];
1074 COPY (TYPE_ADD_N);
1075 REFERENCE (refmpn_rsblsh1_n);
1076
1077 p = ¶m[TYPE_RSBLSH2_N];
1078 COPY (TYPE_ADD_N);
1079 REFERENCE (refmpn_rsblsh2_n);
1080
1081 p = ¶m[TYPE_RSBLSH_N];
1082 COPY (TYPE_ADDLSH_N);
1083 REFERENCE (refmpn_rsblsh_n);
1084
1085 p = ¶m[TYPE_RSH1ADD_N];
1086 COPY (TYPE_ADD_N);
1087 REFERENCE (refmpn_rsh1add_n);
1088
1089 p = ¶m[TYPE_RSH1SUB_N];
1090 COPY (TYPE_ADD_N);
1091 REFERENCE (refmpn_rsh1sub_n);
1092
1093
1094 p = ¶m[TYPE_ADDLSH1_NC];
1095 COPY (TYPE_ADDLSH1_N);
1096 p->carry = CARRY_3;
1097 REFERENCE (refmpn_addlsh1_nc);
1098
1099 p = ¶m[TYPE_ADDLSH2_NC];
1100 COPY (TYPE_ADDLSH2_N);
1101 p->carry = CARRY_4; /* FIXME */
1102 REFERENCE (refmpn_addlsh2_nc);
1103
1104 p = ¶m[TYPE_ADDLSH_NC];
1105 COPY (TYPE_ADDLSH_N);
1106 p->carry = CARRY_BIT; /* FIXME */
1107 REFERENCE (refmpn_addlsh_nc);
1108
1109 p = ¶m[TYPE_SUBLSH1_NC];
1110 COPY (TYPE_ADDLSH1_NC);
1111 REFERENCE (refmpn_sublsh1_nc);
1112
1113 p = ¶m[TYPE_SUBLSH2_NC];
1114 COPY (TYPE_ADDLSH2_NC);
1115 REFERENCE (refmpn_sublsh2_nc);
1116
1117 p = ¶m[TYPE_SUBLSH_NC];
1118 COPY (TYPE_ADDLSH_NC);
1119 REFERENCE (refmpn_sublsh_nc);
1120
1121 p = ¶m[TYPE_RSBLSH1_NC];
1122 COPY (TYPE_RSBLSH1_N);
1123 p->carry = CARRY_BIT; /* FIXME */
1124 REFERENCE (refmpn_rsblsh1_nc);
1125
1126 p = ¶m[TYPE_RSBLSH2_NC];
1127 COPY (TYPE_RSBLSH2_N);
1128 p->carry = CARRY_4; /* FIXME */
1129 REFERENCE (refmpn_rsblsh2_nc);
1130
1131 p = ¶m[TYPE_RSBLSH_NC];
1132 COPY (TYPE_RSBLSH_N);
1133 p->carry = CARRY_BIT; /* FIXME */
1134 REFERENCE (refmpn_rsblsh_nc);
1135
1136
1137 p = ¶m[TYPE_MOD_1];
1138 p->retval = 1;
1139 p->src[0] = 1;
1140 p->size = SIZE_ALLOW_ZERO;
1141 p->divisor = DIVISOR_LIMB;
1142 REFERENCE (refmpn_mod_1);
1143
1144 p = ¶m[TYPE_MOD_1C];
1145 COPY (TYPE_MOD_1);
1146 p->carry = CARRY_DIVISOR;
1147 REFERENCE (refmpn_mod_1c);
1148
1149 p = ¶m[TYPE_DIVMOD_1];
1150 COPY (TYPE_MOD_1);
1151 p->dst[0] = 1;
1152 REFERENCE (refmpn_divmod_1);
1153
1154 p = ¶m[TYPE_DIVMOD_1C];
1155 COPY (TYPE_DIVMOD_1);
1156 p->carry = CARRY_DIVISOR;
1157 REFERENCE (refmpn_divmod_1c);
1158
1159 p = ¶m[TYPE_DIVREM_1];
1160 COPY (TYPE_DIVMOD_1);
1161 p->size2 = SIZE_FRACTION;
1162 p->dst_size[0] = SIZE_SUM;
1163 REFERENCE (refmpn_divrem_1);
1164
1165 p = ¶m[TYPE_DIVREM_1C];
1166 COPY (TYPE_DIVREM_1);
1167 p->carry = CARRY_DIVISOR;
1168 REFERENCE (refmpn_divrem_1c);
1169
1170 p = ¶m[TYPE_PREINV_DIVREM_1];
1171 COPY (TYPE_DIVREM_1);
1172 p->size = SIZE_YES; /* ie. no size==0 */
1173 REFERENCE (refmpn_preinv_divrem_1);
1174
1175 p = ¶m[TYPE_DIV_QR_1N_PI1];
1176 p->retval = 1;
1177 p->src[0] = 1;
1178 p->src[1] = 1;
1179 /* SIZE_1 not supported. Always uses low limb only. */
1180 p->size2 = 1;
1181 p->dst[0] = 1;
1182 p->divisor = DIVISOR_NORM;
1183 p->data = DATA_DIV_QR_1;
1184 VALIDATE (validate_div_qr_1_pi1);
1185
1186 p = ¶m[TYPE_PREINV_MOD_1];
1187 p->retval = 1;
1188 p->src[0] = 1;
1189 p->divisor = DIVISOR_NORM;
1190 REFERENCE (refmpn_preinv_mod_1);
1191
1192 p = ¶m[TYPE_MOD_34LSUB1];
1193 p->retval = 1;
1194 p->src[0] = 1;
1195 VALIDATE (validate_mod_34lsub1);
1196
1197 p = ¶m[TYPE_UDIV_QRNND];
1198 p->retval = 1;
1199 p->src[0] = 1;
1200 p->dst[0] = 1;
1201 p->dst_size[0] = SIZE_1;
1202 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
1203 p->data = DATA_UDIV_QRNND;
1204 p->overlap = OVERLAP_NONE;
1205 REFERENCE (refmpn_udiv_qrnnd);
1206
1207 p = ¶m[TYPE_UDIV_QRNND_R];
1208 COPY (TYPE_UDIV_QRNND);
1209 REFERENCE (refmpn_udiv_qrnnd_r);
1210
1211
1212 p = ¶m[TYPE_DIVEXACT_1];
1213 p->dst[0] = 1;
1214 p->src[0] = 1;
1215 p->divisor = DIVISOR_LIMB;
1216 p->data = DATA_MULTIPLE_DIVISOR;
1217 VALIDATE (validate_divexact_1);
1218 REFERENCE (refmpn_divmod_1);
1219
1220 p = ¶m[TYPE_BDIV_Q_1];
1221 p->dst[0] = 1;
1222 p->src[0] = 1;
1223 p->divisor = DIVISOR_LIMB;
1224 VALIDATE (validate_bdiv_q_1);
1225
1226 p = ¶m[TYPE_DIVEXACT_BY3];
1227 p->retval = 1;
1228 p->dst[0] = 1;
1229 p->src[0] = 1;
1230 REFERENCE (refmpn_divexact_by3);
1231
1232 p = ¶m[TYPE_DIVEXACT_BY3C];
1233 COPY (TYPE_DIVEXACT_BY3);
1234 p->carry = CARRY_3;
1235 REFERENCE (refmpn_divexact_by3c);
1236
1237
1238 p = ¶m[TYPE_MODEXACT_1_ODD];
1239 p->retval = 1;
1240 p->src[0] = 1;
1241 p->divisor = DIVISOR_ODD;
1242 VALIDATE (validate_modexact_1_odd);
1243
1244 p = ¶m[TYPE_MODEXACT_1C_ODD];
1245 COPY (TYPE_MODEXACT_1_ODD);
1246 p->carry = CARRY_LIMB;
1247 VALIDATE (validate_modexact_1c_odd);
1248
1249
1250 p = ¶m[TYPE_GCD_1];
1251 p->retval = 1;
1252 p->src[0] = 1;
1253 p->data = DATA_NON_ZERO;
1254 p->divisor = DIVISOR_LIMB;
1255 REFERENCE (refmpn_gcd_1);
1256
1257 p = ¶m[TYPE_GCD];
1258 p->retval = 1;
1259 p->dst[0] = 1;
1260 p->src[0] = 1;
1261 p->src[1] = 1;
1262 p->size2 = 1;
1263 p->dst_size[0] = SIZE_RETVAL;
1264 p->overlap = OVERLAP_NOT_SRCS;
1265 p->data = DATA_GCD;
1266 REFERENCE (refmpn_gcd);
1267
1268
1269 p = ¶m[TYPE_MPZ_LEGENDRE];
1270 p->retval = 1;
1271 p->src[0] = 1;
1272 p->size = SIZE_ALLOW_ZERO;
1273 p->src[1] = 1;
1274 p->data = DATA_SRC1_ODD_PRIME;
1275 p->size2 = 1;
1276 p->carry = CARRY_BIT;
1277 p->carry_sign = 1;
1278 REFERENCE (refmpz_legendre);
1279
1280 p = ¶m[TYPE_MPZ_JACOBI];
1281 p->retval = 1;
1282 p->src[0] = 1;
1283 p->size = SIZE_ALLOW_ZERO;
1284 p->src[1] = 1;
1285 p->data = DATA_SRC1_ODD;
1286 p->size2 = 1;
1287 p->carry = CARRY_BIT;
1288 p->carry_sign = 1;
1289 REFERENCE (refmpz_jacobi);
1290
1291 p = ¶m[TYPE_MPZ_KRONECKER];
1292 p->retval = 1;
1293 p->src[0] = 1;
1294 p->size = SIZE_ALLOW_ZERO;
1295 p->src[1] = 1;
1296 p->data = 0;
1297 p->size2 = 1;
1298 p->carry = CARRY_4;
1299 p->carry_sign = 1;
1300 REFERENCE (refmpz_kronecker);
1301
1302
1303 p = ¶m[TYPE_MPZ_KRONECKER_UI];
1304 p->retval = 1;
1305 p->src[0] = 1;
1306 p->size = SIZE_ALLOW_ZERO;
1307 p->multiplier = 1;
1308 p->carry = CARRY_BIT;
1309 REFERENCE (refmpz_kronecker_ui);
1310
1311 p = ¶m[TYPE_MPZ_KRONECKER_SI];
1312 COPY (TYPE_MPZ_KRONECKER_UI);
1313 REFERENCE (refmpz_kronecker_si);
1314
1315 p = ¶m[TYPE_MPZ_UI_KRONECKER];
1316 COPY (TYPE_MPZ_KRONECKER_UI);
1317 REFERENCE (refmpz_ui_kronecker);
1318
1319 p = ¶m[TYPE_MPZ_SI_KRONECKER];
1320 COPY (TYPE_MPZ_KRONECKER_UI);
1321 REFERENCE (refmpz_si_kronecker);
1322
1323
1324 p = ¶m[TYPE_SQR];
1325 p->dst[0] = 1;
1326 p->src[0] = 1;
1327 p->dst_size[0] = SIZE_SUM;
1328 p->overlap = OVERLAP_NONE;
1329 REFERENCE (refmpn_sqr);
1330
1331 p = ¶m[TYPE_MUL_N];
1332 COPY (TYPE_SQR);
1333 p->src[1] = 1;
1334 REFERENCE (refmpn_mul_n);
1335
1336 p = ¶m[TYPE_MULLO_N];
1337 COPY (TYPE_MUL_N);
1338 p->dst_size[0] = 0;
1339 REFERENCE (refmpn_mullo_n);
1340
1341 p = ¶m[TYPE_SQRLO];
1342 COPY (TYPE_SQR);
1343 p->dst_size[0] = 0;
1344 REFERENCE (refmpn_sqrlo);
1345
1346 p = ¶m[TYPE_MUL_MN];
1347 COPY (TYPE_MUL_N);
1348 p->size2 = 1;
1349 REFERENCE (refmpn_mul_basecase);
1350
1351 p = ¶m[TYPE_MULMID_MN];
1352 COPY (TYPE_MUL_MN);
1353 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1354 REFERENCE (refmpn_mulmid_basecase);
1355
1356 p = ¶m[TYPE_MULMID_N];
1357 COPY (TYPE_MUL_N);
1358 p->size = SIZE_ODD;
1359 p->size2 = SIZE_CEIL_HALF;
1360 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1361 REFERENCE (refmpn_mulmid_n);
1362
1363 p = ¶m[TYPE_UMUL_PPMM];
1364 p->retval = 1;
1365 p->src[0] = 1;
1366 p->dst[0] = 1;
1367 p->dst_size[0] = SIZE_1;
1368 p->overlap = OVERLAP_NONE;
1369 REFERENCE (refmpn_umul_ppmm);
1370
1371 p = ¶m[TYPE_UMUL_PPMM_R];
1372 COPY (TYPE_UMUL_PPMM);
1373 REFERENCE (refmpn_umul_ppmm_r);
1374
1375
1376 p = ¶m[TYPE_RSHIFT];
1377 p->retval = 1;
1378 p->dst[0] = 1;
1379 p->src[0] = 1;
1380 p->shift = 1;
1381 p->overlap = OVERLAP_LOW_TO_HIGH;
1382 REFERENCE (refmpn_rshift);
1383
1384 p = ¶m[TYPE_LSHIFT];
1385 COPY (TYPE_RSHIFT);
1386 p->overlap = OVERLAP_HIGH_TO_LOW;
1387 REFERENCE (refmpn_lshift);
1388
1389 p = ¶m[TYPE_LSHIFTC];
1390 COPY (TYPE_RSHIFT);
1391 p->overlap = OVERLAP_HIGH_TO_LOW;
1392 REFERENCE (refmpn_lshiftc);
1393
1394
1395 p = ¶m[TYPE_POPCOUNT];
1396 p->retval = 1;
1397 p->src[0] = 1;
1398 REFERENCE (refmpn_popcount);
1399
1400 p = ¶m[TYPE_HAMDIST];
1401 COPY (TYPE_POPCOUNT);
1402 p->src[1] = 1;
1403 REFERENCE (refmpn_hamdist);
1404
1405
1406 p = ¶m[TYPE_SBPI1_DIV_QR];
1407 p->retval = 1;
1408 p->dst[0] = 1;
1409 p->dst[1] = 1;
1410 p->src[0] = 1;
1411 p->src[1] = 1;
1412 p->data = DATA_SRC1_HIGHBIT;
1413 p->size2 = 1;
1414 p->dst_size[0] = SIZE_DIFF;
1415 p->overlap = OVERLAP_NONE;
1416 REFERENCE (refmpn_sb_div_qr);
1417
1418 p = ¶m[TYPE_TDIV_QR];
1419 p->dst[0] = 1;
1420 p->dst[1] = 1;
1421 p->src[0] = 1;
1422 p->src[1] = 1;
1423 p->size2 = 1;
1424 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1425 p->dst_size[1] = SIZE_SIZE2;
1426 p->overlap = OVERLAP_NONE;
1427 REFERENCE (refmpn_tdiv_qr);
1428
1429 p = ¶m[TYPE_SQRTREM];
1430 p->retval = 1;
1431 p->dst[0] = 1;
1432 p->dst[1] = 1;
1433 p->src[0] = 1;
1434 p->dst_size[0] = SIZE_CEIL_HALF;
1435 p->dst_size[1] = SIZE_RETVAL;
1436 p->overlap = OVERLAP_NONE;
1437 VALIDATE (validate_sqrtrem);
1438 REFERENCE (refmpn_sqrtrem);
1439
1440 p = ¶m[TYPE_SQRT];
1441 p->retval = 1;
1442 p->dst[0] = 1;
1443 p->dst[1] = 0;
1444 p->src[0] = 1;
1445 p->dst_size[0] = SIZE_CEIL_HALF;
1446 p->overlap = OVERLAP_NONE;
1447 VALIDATE (validate_sqrt);
1448
1449 p = ¶m[TYPE_ZERO];
1450 p->dst[0] = 1;
1451 p->size = SIZE_ALLOW_ZERO;
1452 REFERENCE (refmpn_zero);
1453
1454 p = ¶m[TYPE_GET_STR];
1455 p->retval = 1;
1456 p->src[0] = 1;
1457 p->size = SIZE_ALLOW_ZERO;
1458 p->dst[0] = 1;
1459 p->dst[1] = 1;
1460 p->dst_size[0] = SIZE_GET_STR;
1461 p->dst_bytes[0] = 1;
1462 p->overlap = OVERLAP_NONE;
1463 REFERENCE (refmpn_get_str);
1464
1465 p = ¶m[TYPE_BINVERT];
1466 p->dst[0] = 1;
1467 p->src[0] = 1;
1468 p->data = DATA_SRC0_ODD;
1469 p->overlap = OVERLAP_NONE;
1470 REFERENCE (refmpn_binvert);
1471
1472 p = ¶m[TYPE_INVERT];
1473 p->dst[0] = 1;
1474 p->src[0] = 1;
1475 p->data = DATA_SRC0_HIGHBIT;
1476 p->overlap = OVERLAP_NONE;
1477 REFERENCE (refmpn_invert);
1478
1479 #ifdef EXTRA_PARAM_INIT
1480 EXTRA_PARAM_INIT
1481 #endif
1482 }
1483
1484
1485 /* The following are macros if there's no native versions, so wrap them in
1486 functions that can be in try_array[]. */
1487
1488 void
1489 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1490 { MPN_COPY (rp, sp, size); }
1491
1492 void
1493 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1494 { MPN_COPY_INCR (rp, sp, size); }
1495
1496 void
1497 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1498 { MPN_COPY_DECR (rp, sp, size); }
1499
1500 void
1501 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1502 { __GMPN_COPY (rp, sp, size); }
1503
1504 #ifdef __GMPN_COPY_INCR
1505 void
1506 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1507 { __GMPN_COPY_INCR (rp, sp, size); }
1508 #endif
1509
1510 void
1511 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1512 { mpn_com (rp, sp, size); }
1513
1514 void
1515 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1516 { mpn_and_n (rp, s1, s2, size); }
1517
1518 void
1519 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1520 { mpn_andn_n (rp, s1, s2, size); }
1521
1522 void
1523 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1524 { mpn_nand_n (rp, s1, s2, size); }
1525
1526 void
1527 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1528 { mpn_ior_n (rp, s1, s2, size); }
1529
1530 void
1531 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1532 { mpn_iorn_n (rp, s1, s2, size); }
1533
1534 void
1535 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1536 { mpn_nior_n (rp, s1, s2, size); }
1537
1538 void
1539 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1540 { mpn_xor_n (rp, s1, s2, size); }
1541
1542 void
1543 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1544 { mpn_xnor_n (rp, s1, s2, size); }
1545
1546 mp_limb_t
1547 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1548 {
1549 mp_limb_t q;
1550 udiv_qrnnd (q, *remptr, n1, n0, d);
1551 return q;
1552 }
1553
1554 mp_limb_t
1555 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1556 {
1557 return mpn_divexact_by3 (rp, sp, size);
1558 }
1559
1560 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
1561 mp_limb_t
1562 mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1563 {
1564 return mpn_addlsh1_n_ip1 (rp, sp, size);
1565 }
1566 #endif
1567 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
1568 mp_limb_t
1569 mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1570 {
1571 return mpn_addlsh2_n_ip1 (rp, sp, size);
1572 }
1573 #endif
1574 #if HAVE_NATIVE_mpn_addlsh_n_ip1
1575 mp_limb_t
1576 mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1577 {
1578 return mpn_addlsh_n_ip1 (rp, sp, size, sh);
1579 }
1580 #endif
1581 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
1582 mp_limb_t
1583 mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1584 {
1585 return mpn_addlsh1_n_ip2 (rp, sp, size);
1586 }
1587 #endif
1588 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
1589 mp_limb_t
1590 mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1591 {
1592 return mpn_addlsh2_n_ip2 (rp, sp, size);
1593 }
1594 #endif
1595 #if HAVE_NATIVE_mpn_addlsh_n_ip2
1596 mp_limb_t
1597 mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1598 {
1599 return mpn_addlsh_n_ip2 (rp, sp, size, sh);
1600 }
1601 #endif
1602 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
1603 mp_limb_t
1604 mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1605 {
1606 return mpn_sublsh1_n_ip1 (rp, sp, size);
1607 }
1608 #endif
1609 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
1610 mp_limb_t
1611 mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1612 {
1613 return mpn_sublsh2_n_ip1 (rp, sp, size);
1614 }
1615 #endif
1616 #if HAVE_NATIVE_mpn_sublsh_n_ip1
1617 mp_limb_t
1618 mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1619 {
1620 return mpn_sublsh_n_ip1 (rp, sp, size, sh);
1621 }
1622 #endif
1623
1624 mp_limb_t
1625 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1626 {
1627 return mpn_modexact_1_odd (ptr, size, divisor);
1628 }
1629
1630 void
1631 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1632 {
1633 mp_ptr tspace;
1634 TMP_DECL;
1635 TMP_MARK;
1636 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1637 mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1638 TMP_FREE;
1639 }
1640 void
1641 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1642 {
1643 mp_ptr tspace;
1644 TMP_DECL;
1645 TMP_MARK;
1646 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1647 mpn_toom2_sqr (dst, src, size, tspace);
1648 TMP_FREE;
1649 }
1650 void
1651 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1652 {
1653 mp_ptr tspace;
1654 TMP_DECL;
1655 TMP_MARK;
1656 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1657 mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1658 TMP_FREE;
1659 }
1660 void
1661 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1662 {
1663 mp_ptr tspace;
1664 TMP_DECL;
1665 TMP_MARK;
1666 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1667 mpn_toom3_sqr (dst, src, size, tspace);
1668 TMP_FREE;
1669 }
1670 void
1671 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1672 {
1673 mp_ptr tspace;
1674 TMP_DECL;
1675 TMP_MARK;
1676 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1677 mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1678 TMP_FREE;
1679 }
1680 void
1681 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1682 {
1683 mp_ptr tspace;
1684 TMP_DECL;
1685 TMP_MARK;
1686 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1687 mpn_toom4_sqr (dst, src, size, tspace);
1688 TMP_FREE;
1689 }
1690
1691 void
1692 mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
1693 mp_size_t size)
1694 {
1695 mp_ptr tspace;
1696 mp_size_t n;
1697 TMP_DECL;
1698 TMP_MARK;
1699 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
1700 mpn_toom42_mulmid (dst, src1, src2, size, tspace);
1701 TMP_FREE;
1702 }
1703
1704 mp_limb_t
1705 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1706 {
1707 mp_limb_t high;
1708 umul_ppmm (high, *lowptr, m1, m2);
1709 return high;
1710 }
1711
1712 void
1713 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1714 { MPN_ZERO (ptr, size); }
1715
1716 mp_size_t
1717 mpn_sqrt_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1718 { return mpn_sqrtrem (dst, NULL, src, size); }
1719
1720 struct choice_t {
1721 const char *name;
1722 tryfun_t function;
1723 int type;
1724 mp_size_t minsize;
1725 };
1726
1727 #define TRY(fun) #fun, (tryfun_t) fun
1728 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1729
1730 const struct choice_t choice_array[] = {
1731 { TRY(mpn_add), TYPE_ADD },
1732 { TRY(mpn_sub), TYPE_SUB },
1733
1734 { TRY(mpn_add_n), TYPE_ADD_N },
1735 { TRY(mpn_sub_n), TYPE_SUB_N },
1736
1737 #if HAVE_NATIVE_mpn_add_nc
1738 { TRY(mpn_add_nc), TYPE_ADD_NC },
1739 #endif
1740 #if HAVE_NATIVE_mpn_sub_nc
1741 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1742 #endif
1743
1744 #if HAVE_NATIVE_mpn_add_n_sub_n
1745 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1746 #endif
1747 #if HAVE_NATIVE_mpn_add_n_sub_nc
1748 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1749 #endif
1750
1751 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N },
1752 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N },
1753 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N },
1754 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N },
1755 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N },
1756 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N },
1757
1758 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1759 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1760 #if HAVE_NATIVE_mpn_addmul_1c
1761 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1762 #endif
1763 #if HAVE_NATIVE_mpn_submul_1c
1764 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1765 #endif
1766
1767 #if HAVE_NATIVE_mpn_addmul_2
1768 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1769 #endif
1770 #if HAVE_NATIVE_mpn_addmul_3
1771 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1772 #endif
1773 #if HAVE_NATIVE_mpn_addmul_4
1774 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1775 #endif
1776 #if HAVE_NATIVE_mpn_addmul_5
1777 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1778 #endif
1779 #if HAVE_NATIVE_mpn_addmul_6
1780 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1781 #endif
1782 #if HAVE_NATIVE_mpn_addmul_7
1783 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1784 #endif
1785 #if HAVE_NATIVE_mpn_addmul_8
1786 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1787 #endif
1788
1789 { TRY_FUNFUN(mpn_com), TYPE_COM },
1790
1791 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1792 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1793 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1794
1795 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1796 #ifdef __GMPN_COPY_INCR
1797 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1798 #endif
1799
1800 #if HAVE_NATIVE_mpn_copyi
1801 { TRY(mpn_copyi), TYPE_COPYI },
1802 #endif
1803 #if HAVE_NATIVE_mpn_copyd
1804 { TRY(mpn_copyd), TYPE_COPYD },
1805 #endif
1806
1807 { TRY(mpn_cnd_add_n), TYPE_ADDCND_N },
1808 { TRY(mpn_cnd_sub_n), TYPE_SUBCND_N },
1809 #if HAVE_NATIVE_mpn_addlsh1_n == 1
1810 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1811 #endif
1812 #if HAVE_NATIVE_mpn_addlsh2_n == 1
1813 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1814 #endif
1815 #if HAVE_NATIVE_mpn_addlsh_n
1816 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1817 #endif
1818 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
1819 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
1820 #endif
1821 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
1822 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
1823 #endif
1824 #if HAVE_NATIVE_mpn_addlsh_n_ip1
1825 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
1826 #endif
1827 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
1828 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
1829 #endif
1830 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
1831 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
1832 #endif
1833 #if HAVE_NATIVE_mpn_addlsh_n_ip2
1834 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
1835 #endif
1836 #if HAVE_NATIVE_mpn_sublsh1_n == 1
1837 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1838 #endif
1839 #if HAVE_NATIVE_mpn_sublsh2_n == 1
1840 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
1841 #endif
1842 #if HAVE_NATIVE_mpn_sublsh_n
1843 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1844 #endif
1845 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
1846 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
1847 #endif
1848 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
1849 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
1850 #endif
1851 #if HAVE_NATIVE_mpn_sublsh_n_ip1
1852 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
1853 #endif
1854 #if HAVE_NATIVE_mpn_rsblsh1_n == 1
1855 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1856 #endif
1857 #if HAVE_NATIVE_mpn_rsblsh2_n == 1
1858 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1859 #endif
1860 #if HAVE_NATIVE_mpn_rsblsh_n
1861 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1862 #endif
1863 #if HAVE_NATIVE_mpn_rsh1add_n
1864 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1865 #endif
1866 #if HAVE_NATIVE_mpn_rsh1sub_n
1867 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1868 #endif
1869
1870 #if HAVE_NATIVE_mpn_addlsh1_nc
1871 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
1872 #endif
1873 #if HAVE_NATIVE_mpn_addlsh2_nc
1874 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
1875 #endif
1876 #if HAVE_NATIVE_mpn_addlsh_nc
1877 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
1878 #endif
1879 #if HAVE_NATIVE_mpn_sublsh1_nc
1880 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
1881 #endif
1882 #if HAVE_NATIVE_mpn_sublsh2_nc
1883 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
1884 #endif
1885 #if HAVE_NATIVE_mpn_sublsh_nc
1886 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
1887 #endif
1888 #if HAVE_NATIVE_mpn_rsblsh1_nc
1889 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
1890 #endif
1891 #if HAVE_NATIVE_mpn_rsblsh2_nc
1892 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
1893 #endif
1894 #if HAVE_NATIVE_mpn_rsblsh_nc
1895 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
1896 #endif
1897
1898 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1899 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1900 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1901 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1902 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1903 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1904 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1905 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1906
1907 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1908 #if USE_PREINV_DIVREM_1
1909 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1910 #endif
1911 { TRY(mpn_mod_1), TYPE_MOD_1 },
1912 #if USE_PREINV_MOD_1
1913 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1914 #endif
1915 #if HAVE_NATIVE_mpn_divrem_1c
1916 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1917 #endif
1918 #if HAVE_NATIVE_mpn_mod_1c
1919 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1920 #endif
1921 { TRY(mpn_div_qr_1n_pi1), TYPE_DIV_QR_1N_PI1 },
1922 #if GMP_NUMB_BITS % 4 == 0
1923 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1924 #endif
1925
1926 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1927 #if HAVE_NATIVE_mpn_udiv_qrnnd
1928 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1929 #endif
1930 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1931 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1932 #endif
1933
1934 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1935 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 },
1936 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1937 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1938
1939 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1940 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1941
1942
1943 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1944 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1945
1946 { TRY(mpn_mul_1), TYPE_MUL_1 },
1947 #if HAVE_NATIVE_mpn_mul_1c
1948 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1949 #endif
1950 #if HAVE_NATIVE_mpn_mul_2
1951 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1952 #endif
1953 #if HAVE_NATIVE_mpn_mul_3
1954 { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1955 #endif
1956 #if HAVE_NATIVE_mpn_mul_4
1957 { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1958 #endif
1959 #if HAVE_NATIVE_mpn_mul_5
1960 { TRY(mpn_mul_5), TYPE_MUL_5, 5 },
1961 #endif
1962 #if HAVE_NATIVE_mpn_mul_6
1963 { TRY(mpn_mul_6), TYPE_MUL_6, 6 },
1964 #endif
1965
1966 { TRY(mpn_rshift), TYPE_RSHIFT },
1967 { TRY(mpn_lshift), TYPE_LSHIFT },
1968 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1969
1970
1971 { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1972 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
1973 { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1974 { TRY(mpn_sqrlo_basecase), TYPE_SQRLO },
1975 { TRY(mpn_sqrlo), TYPE_SQRLO },
1976 #if SQR_TOOM2_THRESHOLD > 0
1977 { TRY(mpn_sqr_basecase), TYPE_SQR },
1978 #endif
1979
1980 { TRY(mpn_mul), TYPE_MUL_MN },
1981 { TRY(mpn_mul_n), TYPE_MUL_N },
1982 { TRY(mpn_sqr), TYPE_SQR },
1983
1984 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1985 #if HAVE_NATIVE_mpn_umul_ppmm
1986 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1987 #endif
1988 #if HAVE_NATIVE_mpn_umul_ppmm_r
1989 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1990 #endif
1991
1992 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1993 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1994 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1995 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1996 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1997 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1998
1999 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 },
2000 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 },
2001 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N,
2002 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
2003
2004 { TRY(mpn_gcd_1), TYPE_GCD_1 },
2005 { TRY(mpn_gcd), TYPE_GCD },
2006 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE },
2007 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
2008 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER },
2009 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
2010 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
2011 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
2012 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
2013
2014 { TRY(mpn_popcount), TYPE_POPCOUNT },
2015 { TRY(mpn_hamdist), TYPE_HAMDIST },
2016
2017 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
2018 { TRY_FUNFUN(mpn_sqrt), TYPE_SQRT },
2019
2020 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
2021
2022 { TRY(mpn_get_str), TYPE_GET_STR },
2023
2024 { TRY(mpn_binvert), TYPE_BINVERT },
2025 { TRY(mpn_invert), TYPE_INVERT },
2026
2027 #ifdef EXTRA_ROUTINES
2028 EXTRA_ROUTINES
2029 #endif
2030 };
2031
2032 const struct choice_t *choice = NULL;
2033
2034
2035 void
2036 mprotect_maybe (void *addr, size_t len, int prot)
2037 {
2038 if (!option_redzones)
2039 return;
2040
2041 #if HAVE_MPROTECT
2042 if (mprotect (addr, len, prot) != 0)
2043 {
2044 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
2045 addr, (unsigned) len, prot, strerror (errno));
2046 exit (1);
2047 }
2048 #else
2049 {
2050 static int warned = 0;
2051 if (!warned)
2052 {
2053 fprintf (stderr,
2054 "mprotect not available, bounds testing not performed\n");
2055 warned = 1;
2056 }
2057 }
2058 #endif
2059 }
2060
2061 /* round "a" up to a multiple of "m" */
2062 size_t
2063 round_up_multiple (size_t a, size_t m)
2064 {
2065 unsigned long r;
2066
2067 r = a % m;
2068 if (r == 0)
2069 return a;
2070 else
2071 return a + (m - r);
2072 }
2073
2074
2075 /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
2076 for instance HP-UX 10.
2077
2078 mmap will almost certainly return a pointer already aligned to a page
2079 boundary, but it's easy enough to share the alignment handling with the
2080 malloc case. */
2081
2082 void
2083 malloc_region (struct region_t *r, mp_size_t n)
2084 {
2085 mp_ptr p;
2086 size_t nbytes;
2087
2088 ASSERT ((pagesize % GMP_LIMB_BYTES) == 0);
2089
2090 n = round_up_multiple (n, PAGESIZE_LIMBS);
2091 r->size = n;
2092
2093 nbytes = n*GMP_LIMB_BYTES + 2*REDZONE_BYTES + pagesize;
2094
2095 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
2096 #define MAP_ANON MAP_ANONYMOUS
2097 #endif
2098
2099 #if HAVE_MMAP && defined (MAP_ANON)
2100 /* note must pass fd=-1 for MAP_ANON on BSD */
2101 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
2102 if (p == (void *) -1)
2103 {
2104 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
2105 (unsigned) nbytes, strerror (errno));
2106 exit (1);
2107 }
2108 #else
2109 p = (mp_ptr) malloc (nbytes);
2110 ASSERT_ALWAYS (p != NULL);
2111 #endif
2112
2113 p = (mp_ptr) align_pointer (p, pagesize);
2114
2115 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
2116 p += REDZONE_LIMBS;
2117 r->ptr = p;
2118
2119 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
2120 }
2121
2122 void
2123 mprotect_region (const struct region_t *r, int prot)
2124 {
2125 mprotect_maybe (r->ptr, r->size, prot);
2126 }
2127
2128
2129 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
2130 and CARRY_4 */
2131 mp_limb_t carry_array[] = {
2132 0, 1, 2, 3,
2133 4,
2134 CNST_LIMB(1) << 8,
2135 CNST_LIMB(1) << 16,
2136 GMP_NUMB_MAX
2137 };
2138 int carry_index;
2139
2140 #define CARRY_COUNT \
2141 ((tr->carry == CARRY_BIT) ? 2 \
2142 : tr->carry == CARRY_3 ? 3 \
2143 : tr->carry == CARRY_4 ? 4 \
2144 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
2145 ? numberof(carry_array) + CARRY_RANDOMS \
2146 : 1)
2147
2148 #define MPN_RANDOM_ALT(index,dst,size) \
2149 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
2150
2151 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2152 the same type */
2153 #define CARRY_ITERATION \
2154 for (carry_index = 0; \
2155 (carry_index < numberof (carry_array) \
2156 ? (carry = carry_array[carry_index]) \
2157 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
2158 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
2159 carry_index < CARRY_COUNT; \
2160 carry_index++)
2161
2162
2163 mp_limb_t multiplier_array[] = {
2164 0, 1, 2, 3,
2165 CNST_LIMB(1) << 8,
2166 CNST_LIMB(1) << 16,
2167 GMP_NUMB_MAX - 2,
2168 GMP_NUMB_MAX - 1,
2169 GMP_NUMB_MAX
2170 };
2171 int multiplier_index;
2172
2173 mp_limb_t divisor_array[] = {
2174 1, 2, 3,
2175 CNST_LIMB(1) << 8,
2176 CNST_LIMB(1) << 16,
2177 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
2178 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
2179 GMP_NUMB_HIGHBIT,
2180 GMP_NUMB_HIGHBIT + 1,
2181 GMP_NUMB_MAX - 2,
2182 GMP_NUMB_MAX - 1,
2183 GMP_NUMB_MAX
2184 };
2185
2186 int divisor_index;
2187
2188 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2189 the same type */
2190 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
2191 for (index = 0; \
2192 (index < numberof (array) \
2193 ? (var = array[index]) \
2194 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
2195 index < limit; \
2196 index++)
2197
2198 #define MULTIPLIER_COUNT \
2199 (tr->multiplier \
2200 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
2201 : 1)
2202
2203 #define MULTIPLIER_ITERATION \
2204 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
2205 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
2206
2207 #define DIVISOR_COUNT \
2208 (tr->divisor \
2209 ? numberof (divisor_array) + DIVISOR_RANDOMS \
2210 : 1)
2211
2212 #define DIVISOR_ITERATION \
2213 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
2214 DIVISOR_RANDOMS, TRY_DIVISOR)
2215
2216
2217 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
2218 d[0] or d[1] respectively, -1 means a separate (write-protected)
2219 location. */
2220
2221 struct overlap_t {
2222 int s[NUM_SOURCES];
2223 } overlap_array[] = {
2224 { { -1, -1, -1, -1, -1 } },
2225 { { 0, -1, -1, -1, -1 } },
2226 { { -1, 0, -1, -1, -1 } },
2227 { { 0, 0, -1, -1, -1 } },
2228 { { 1, -1, -1, -1, -1 } },
2229 { { -1, 1, -1, -1, -1 } },
2230 { { 1, 1, -1, -1, -1 } },
2231 { { 0, 1, -1, -1, -1 } },
2232 { { 1, 0, -1, -1, -1 } },
2233 };
2234
2235 struct overlap_t *overlap, *overlap_limit;
2236
2237 #define OVERLAP_COUNT \
2238 (tr->overlap & OVERLAP_NONE ? 1 \
2239 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
2240 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
2241 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \
2242 : tr->dst[1] ? 9 \
2243 : tr->src[1] ? 4 \
2244 : tr->dst[0] ? 2 \
2245 : 1)
2246
2247 #define OVERLAP_ITERATION \
2248 for (overlap = &overlap_array[0], \
2249 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
2250 overlap < overlap_limit; \
2251 overlap++)
2252
2253
2254 int base = 10;
2255
2256 #define T_RAND_COUNT 2
2257 int t_rand;
2258
2259 void
2260 t_random (mp_ptr ptr, mp_size_t n)
2261 {
2262 if (n == 0)
2263 return;
2264
2265 switch (option_data) {
2266 case DATA_TRAND:
2267 switch (t_rand) {
2268 case 0: refmpn_random (ptr, n); break;
2269 case 1: refmpn_random2 (ptr, n); break;
2270 default: abort();
2271 }
2272 break;
2273 case DATA_SEQ:
2274 {
2275 static mp_limb_t counter = 0;
2276 mp_size_t i;
2277 for (i = 0; i < n; i++)
2278 ptr[i] = ++counter;
2279 }
2280 break;
2281 case DATA_ZEROS:
2282 refmpn_zero (ptr, n);
2283 break;
2284 case DATA_FFS:
2285 refmpn_fill (ptr, n, GMP_NUMB_MAX);
2286 break;
2287 case DATA_2FD:
2288 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
2289 inducing the q1_ff special case in the mul-by-inverse part of some
2290 versions of divrem_1 and mod_1. */
2291 refmpn_fill (ptr, n, (mp_limb_t) -1);
2292 ptr[n-1] = 2;
2293 ptr[0] -= 2;
2294 break;
2295
2296 default:
2297 abort();
2298 }
2299 }
2300 #define T_RAND_ITERATION \
2301 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
2302
2303
2304 void
2305 print_each (const struct each_t *e)
2306 {
2307 int i;
2308
2309 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
2310 if (tr->retval)
2311 mpn_trace (" retval", &e->retval, 1);
2312
2313 for (i = 0; i < NUM_DESTS; i++)
2314 {
2315 if (tr->dst[i])
2316 {
2317 if (tr->dst_bytes[i])
2318 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2319 else
2320 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2321 printf (" located %p\n", (void *) (e->d[i].p));
2322 }
2323 }
2324
2325 for (i = 0; i < NUM_SOURCES; i++)
2326 if (tr->src[i])
2327 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
2328 }
2329
2330
2331 void
2332 print_all (void)
2333 {
2334 int i;
2335
2336 printf ("\n");
2337 printf ("size %ld\n", (long) size);
2338 if (tr->size2)
2339 printf ("size2 %ld\n", (long) size2);
2340
2341 for (i = 0; i < NUM_DESTS; i++)
2342 if (d[i].size != size)
2343 printf ("d[%d].size %ld\n", i, (long) d[i].size);
2344
2345 if (tr->multiplier)
2346 mpn_trace (" multiplier", &multiplier, 1);
2347 if (tr->divisor)
2348 mpn_trace (" divisor", &divisor, 1);
2349 if (tr->shift)
2350 printf (" shift %lu\n", shift);
2351 if (tr->carry)
2352 mpn_trace (" carry", &carry, 1);
2353 if (tr->msize)
2354 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
2355
2356 for (i = 0; i < NUM_DESTS; i++)
2357 if (tr->dst[i])
2358 printf (" d[%d] %s, align %ld, size %ld\n",
2359 i, d[i].high ? "high" : "low",
2360 (long) d[i].align, (long) d[i].size);
2361
2362 for (i = 0; i < NUM_SOURCES; i++)
2363 {
2364 if (tr->src[i])
2365 {
2366 printf (" s[%d] %s, align %ld, ",
2367 i, s[i].high ? "high" : "low", (long) s[i].align);
2368 switch (overlap->s[i]) {
2369 case -1:
2370 printf ("no overlap\n");
2371 break;
2372 default:
2373 printf ("==d[%d]%s\n",
2374 overlap->s[i],
2375 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
2376 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
2377 : "");
2378 break;
2379 }
2380 printf (" s[%d]=", i);
2381 if (tr->carry_sign && (carry & (1 << i)))
2382 printf ("-");
2383 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
2384 }
2385 }
2386
2387 if (tr->dst0_from_src1)
2388 mpn_trace (" d[0]", s[1].region.ptr, size);
2389
2390 if (tr->reference)
2391 print_each (&ref);
2392 print_each (&fun);
2393 }
2394
2395 void
2396 compare (void)
2397 {
2398 int error = 0;
2399 int i;
2400
2401 if (tr->retval && ref.retval != fun.retval)
2402 {
2403 gmp_printf ("Different return values (%Mu, %Mu)\n",
2404 ref.retval, fun.retval);
2405 error = 1;
2406 }
2407
2408 for (i = 0; i < NUM_DESTS; i++)
2409 {
2410 switch (tr->dst_size[i]) {
2411 case SIZE_RETVAL:
2412 case SIZE_GET_STR:
2413 d[i].size = ref.retval;
2414 break;
2415 }
2416 }
2417
2418 for (i = 0; i < NUM_DESTS; i++)
2419 {
2420 if (! tr->dst[i])
2421 continue;
2422
2423 if (tr->dst_bytes[i])
2424 {
2425 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2426 {
2427 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2428 i,
2429 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2430 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2431 error = 1;
2432 }
2433 }
2434 else
2435 {
2436 if (d[i].size != 0
2437 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2438 {
2439 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2440 i,
2441 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2442 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2443 error = 1;
2444 }
2445 }
2446 }
2447
2448 if (error)
2449 {
2450 print_all();
2451 abort();
2452 }
2453 }
2454
2455
2456 /* The functions are cast if the return value should be a long rather than
2457 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2458 might not be enough if some actual calling conventions checking is
2459 implemented on a long long limb system. */
2460
2461 void
2462 call (struct each_t *e, tryfun_t function)
2463 {
2464 switch (choice->type) {
2465 case TYPE_ADD:
2466 case TYPE_SUB:
2467 e->retval = CALLING_CONVENTIONS (function)
2468 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2469 break;
2470
2471 case TYPE_ADD_N:
2472 case TYPE_SUB_N:
2473 case TYPE_ADDLSH1_N:
2474 case TYPE_ADDLSH2_N:
2475 case TYPE_SUBLSH1_N:
2476 case TYPE_SUBLSH2_N:
2477 case TYPE_RSBLSH1_N:
2478 case TYPE_RSBLSH2_N:
2479 case TYPE_RSH1ADD_N:
2480 case TYPE_RSH1SUB_N:
2481 e->retval = CALLING_CONVENTIONS (function)
2482 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2483 break;
2484 case TYPE_ADDLSH_N:
2485 case TYPE_SUBLSH_N:
2486 case TYPE_RSBLSH_N:
2487 e->retval = CALLING_CONVENTIONS (function)
2488 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2489 break;
2490 case TYPE_ADDLSH_NC:
2491 case TYPE_SUBLSH_NC:
2492 case TYPE_RSBLSH_NC:
2493 e->retval = CALLING_CONVENTIONS (function)
2494 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
2495 break;
2496 case TYPE_ADDLSH1_NC:
2497 case TYPE_ADDLSH2_NC:
2498 case TYPE_SUBLSH1_NC:
2499 case TYPE_SUBLSH2_NC:
2500 case TYPE_RSBLSH1_NC:
2501 case TYPE_RSBLSH2_NC:
2502 case TYPE_ADD_NC:
2503 case TYPE_SUB_NC:
2504 e->retval = CALLING_CONVENTIONS (function)
2505 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2506 break;
2507 case TYPE_ADDCND_N:
2508 case TYPE_SUBCND_N:
2509 e->retval = CALLING_CONVENTIONS (function)
2510 (carry, e->d[0].p, e->s[0].p, e->s[1].p, size);
2511 break;
2512 case TYPE_ADD_ERR1_N:
2513 case TYPE_SUB_ERR1_N:
2514 e->retval = CALLING_CONVENTIONS (function)
2515 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
2516 break;
2517 case TYPE_ADD_ERR2_N:
2518 case TYPE_SUB_ERR2_N:
2519 e->retval = CALLING_CONVENTIONS (function)
2520 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
2521 break;
2522 case TYPE_ADD_ERR3_N:
2523 case TYPE_SUB_ERR3_N:
2524 e->retval = CALLING_CONVENTIONS (function)
2525 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
2526 break;
2527
2528 case TYPE_MUL_1:
2529 case TYPE_ADDMUL_1:
2530 case TYPE_SUBMUL_1:
2531 e->retval = CALLING_CONVENTIONS (function)
2532 (e->d[0].p, e->s[0].p, size, multiplier);
2533 break;
2534 case TYPE_MUL_1C:
2535 case TYPE_ADDMUL_1C:
2536 case TYPE_SUBMUL_1C:
2537 e->retval = CALLING_CONVENTIONS (function)
2538 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2539 break;
2540
2541 case TYPE_MUL_2:
2542 case TYPE_MUL_3:
2543 case TYPE_MUL_4:
2544 case TYPE_MUL_5:
2545 case TYPE_MUL_6:
2546 if (size == 1)
2547 abort ();
2548 e->retval = CALLING_CONVENTIONS (function)
2549 (e->d[0].p, e->s[0].p, size, multiplier_N);
2550 break;
2551
2552 case TYPE_ADDMUL_2:
2553 case TYPE_ADDMUL_3:
2554 case TYPE_ADDMUL_4:
2555 case TYPE_ADDMUL_5:
2556 case TYPE_ADDMUL_6:
2557 case TYPE_ADDMUL_7:
2558 case TYPE_ADDMUL_8:
2559 if (size == 1)
2560 abort ();
2561 e->retval = CALLING_CONVENTIONS (function)
2562 (e->d[0].p, e->s[0].p, size, multiplier_N);
2563 break;
2564
2565 case TYPE_AND_N:
2566 case TYPE_ANDN_N:
2567 case TYPE_NAND_N:
2568 case TYPE_IOR_N:
2569 case TYPE_IORN_N:
2570 case TYPE_NIOR_N:
2571 case TYPE_XOR_N:
2572 case TYPE_XNOR_N:
2573 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2574 break;
2575
2576 case TYPE_ADDSUB_N:
2577 e->retval = CALLING_CONVENTIONS (function)
2578 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2579 break;
2580 case TYPE_ADDSUB_NC:
2581 e->retval = CALLING_CONVENTIONS (function)
2582 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2583 break;
2584
2585 case TYPE_COPY:
2586 case TYPE_COPYI:
2587 case TYPE_COPYD:
2588 case TYPE_COM:
2589 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2590 break;
2591
2592 case TYPE_ADDLSH1_N_IP1:
2593 case TYPE_ADDLSH2_N_IP1:
2594 case TYPE_ADDLSH1_N_IP2:
2595 case TYPE_ADDLSH2_N_IP2:
2596 case TYPE_SUBLSH1_N_IP1:
2597 case TYPE_SUBLSH2_N_IP1:
2598 case TYPE_DIVEXACT_BY3:
2599 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2600 break;
2601 case TYPE_DIVEXACT_BY3C:
2602 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2603 carry);
2604 break;
2605
2606
2607 case TYPE_DIVMOD_1:
2608 case TYPE_DIVEXACT_1:
2609 case TYPE_BDIV_Q_1:
2610 e->retval = CALLING_CONVENTIONS (function)
2611 (e->d[0].p, e->s[0].p, size, divisor);
2612 break;
2613 case TYPE_DIVMOD_1C:
2614 e->retval = CALLING_CONVENTIONS (function)
2615 (e->d[0].p, e->s[0].p, size, divisor, carry);
2616 break;
2617 case TYPE_DIVREM_1:
2618 e->retval = CALLING_CONVENTIONS (function)
2619 (e->d[0].p, size2, e->s[0].p, size, divisor);
2620 break;
2621 case TYPE_DIVREM_1C:
2622 e->retval = CALLING_CONVENTIONS (function)
2623 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2624 break;
2625 case TYPE_PREINV_DIVREM_1:
2626 {
2627 mp_limb_t dinv;
2628 unsigned shift;
2629 shift = refmpn_count_leading_zeros (divisor);
2630 dinv = refmpn_invert_limb (divisor << shift);
2631 e->retval = CALLING_CONVENTIONS (function)
2632 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2633 }
2634 break;
2635 case TYPE_MOD_1:
2636 case TYPE_MODEXACT_1_ODD:
2637 e->retval = CALLING_CONVENTIONS (function)
2638 (e->s[0].p, size, divisor);
2639 break;
2640 case TYPE_MOD_1C:
2641 case TYPE_MODEXACT_1C_ODD:
2642 e->retval = CALLING_CONVENTIONS (function)
2643 (e->s[0].p, size, divisor, carry);
2644 break;
2645 case TYPE_PREINV_MOD_1:
2646 e->retval = CALLING_CONVENTIONS (function)
2647 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2648 break;
2649 case TYPE_DIV_QR_1N_PI1:
2650 {
2651 mp_limb_t dinv = refmpn_invert_limb (divisor);
2652 e->retval = CALLING_CONVENTIONS (function)
2653 (e->d[0].p, e->s[0].p, size, e->s[1].p[0], divisor, dinv);
2654 break;
2655 }
2656
2657 case TYPE_MOD_34LSUB1:
2658 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2659 break;
2660
2661 case TYPE_UDIV_QRNND:
2662 e->retval = CALLING_CONVENTIONS (function)
2663 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2664 break;
2665 case TYPE_UDIV_QRNND_R:
2666 e->retval = CALLING_CONVENTIONS (function)
2667 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2668 break;
2669
2670 case TYPE_SBPI1_DIV_QR:
2671 {
2672 gmp_pi1_t dinv;
2673 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2674 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2675 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2676 e->retval = CALLING_CONVENTIONS (function)
2677 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2678 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2679 }
2680 break;
2681
2682 case TYPE_TDIV_QR:
2683 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2684 e->s[0].p, size, e->s[1].p, size2);
2685 break;
2686
2687 case TYPE_GCD_1:
2688 /* Must have a non-zero src, but this probably isn't the best way to do
2689 it. */
2690 if (refmpn_zero_p (e->s[0].p, size))
2691 e->retval = 0;
2692 else
2693 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2694 break;
2695
2696 case TYPE_GCD:
2697 /* Sources are destroyed, so they're saved and replaced, but a general
2698 approach to this might be better. Note that it's still e->s[0].p and
2699 e->s[1].p that are passed, to get the desired alignments. */
2700 {
2701 mp_ptr s0 = refmpn_malloc_limbs (size);
2702 mp_ptr s1 = refmpn_malloc_limbs (size2);
2703 refmpn_copyi (s0, e->s[0].p, size);
2704 refmpn_copyi (s1, e->s[1].p, size2);
2705
2706 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2707 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2708 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2709 e->s[0].p, size,
2710 e->s[1].p, size2);
2711 refmpn_copyi (e->s[0].p, s0, size);
2712 refmpn_copyi (e->s[1].p, s1, size2);
2713 free (s0);
2714 free (s1);
2715 }
2716 break;
2717
2718 case TYPE_GCD_FINDA:
2719 {
2720 /* FIXME: do this with a flag */
2721 mp_limb_t c[2];
2722 c[0] = e->s[0].p[0];
2723 c[0] += (c[0] == 0);
2724 c[1] = e->s[0].p[0];
2725 c[1] += (c[1] == 0);
2726 e->retval = CALLING_CONVENTIONS (function) (c);
2727 }
2728 break;
2729
2730 case TYPE_MPZ_LEGENDRE:
2731 case TYPE_MPZ_JACOBI:
2732 {
2733 mpz_t a, b;
2734 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2735 PTR(b) = e->s[1].p; SIZ(b) = size2;
2736 e->retval = CALLING_CONVENTIONS (function) (a, b);
2737 }
2738 break;
2739 case TYPE_MPZ_KRONECKER:
2740 {
2741 mpz_t a, b;
2742 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2743 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2744 e->retval = CALLING_CONVENTIONS (function) (a, b);
2745 }
2746 break;
2747 case TYPE_MPZ_KRONECKER_UI:
2748 {
2749 mpz_t a;
2750 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2751 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2752 }
2753 break;
2754 case TYPE_MPZ_KRONECKER_SI:
2755 {
2756 mpz_t a;
2757 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2758 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2759 }
2760 break;
2761 case TYPE_MPZ_UI_KRONECKER:
2762 {
2763 mpz_t b;
2764 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2765 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2766 }
2767 break;
2768 case TYPE_MPZ_SI_KRONECKER:
2769 {
2770 mpz_t b;
2771 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2772 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2773 }
2774 break;
2775
2776 case TYPE_MUL_MN:
2777 case TYPE_MULMID_MN:
2778 CALLING_CONVENTIONS (function)
2779 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2780 break;
2781 case TYPE_MUL_N:
2782 case TYPE_MULLO_N:
2783 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2784 break;
2785 case TYPE_MULMID_N:
2786 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
2787 (size + 1) / 2);
2788 break;
2789 case TYPE_SQR:
2790 case TYPE_SQRLO:
2791 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2792 break;
2793
2794 case TYPE_UMUL_PPMM:
2795 e->retval = CALLING_CONVENTIONS (function)
2796 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2797 break;
2798 case TYPE_UMUL_PPMM_R:
2799 e->retval = CALLING_CONVENTIONS (function)
2800 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2801 break;
2802
2803 case TYPE_ADDLSH_N_IP1:
2804 case TYPE_ADDLSH_N_IP2:
2805 case TYPE_SUBLSH_N_IP1:
2806 case TYPE_LSHIFT:
2807 case TYPE_LSHIFTC:
2808 case TYPE_RSHIFT:
2809 e->retval = CALLING_CONVENTIONS (function)
2810 (e->d[0].p, e->s[0].p, size, shift);
2811 break;
2812
2813 case TYPE_POPCOUNT:
2814 e->retval = (* (unsigned long (*)(ANYARGS))
2815 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2816 break;
2817 case TYPE_HAMDIST:
2818 e->retval = (* (unsigned long (*)(ANYARGS))
2819 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2820 break;
2821
2822 case TYPE_SQRTREM:
2823 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2824 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2825 break;
2826
2827 case TYPE_SQRT:
2828 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2829 (e->d[0].p, e->s[0].p, size);
2830 break;
2831
2832 case TYPE_ZERO:
2833 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2834 break;
2835
2836 case TYPE_GET_STR:
2837 {
2838 size_t sizeinbase, fill;
2839 char *dst;
2840 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2841 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2842 fill = d[0].size - sizeinbase;
2843 if (d[0].high)
2844 {
2845 memset (e->d[0].p, 0xBA, fill);
2846 dst = (char *) e->d[0].p + fill;
2847 }
2848 else
2849 {
2850 dst = (char *) e->d[0].p;
2851 memset (dst + sizeinbase, 0xBA, fill);
2852 }
2853 if (POW2_P (base))
2854 {
2855 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2856 e->s[0].p, size);
2857 }
2858 else
2859 {
2860 refmpn_copy (e->d[1].p, e->s[0].p, size);
2861 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2862 e->d[1].p, size);
2863 }
2864 refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2865 }
2866 break;
2867
2868 case TYPE_INVERT:
2869 {
2870 mp_ptr scratch;
2871 TMP_DECL;
2872 TMP_MARK;
2873 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2874 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2875 TMP_FREE;
2876 }
2877 break;
2878 case TYPE_BINVERT:
2879 {
2880 mp_ptr scratch;
2881 TMP_DECL;
2882 TMP_MARK;
2883 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2884 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2885 TMP_FREE;
2886 }
2887 break;
2888
2889 #ifdef EXTRA_CALL
2890 EXTRA_CALL
2891 #endif
2892
2893 default:
2894 printf ("Unknown routine type %d\n", choice->type);
2895 abort ();
2896 break;
2897 }
2898 }
2899
2900
2901 void
2902 pointer_setup (struct each_t *e)
2903 {
2904 int i, j;
2905
2906 for (i = 0; i < NUM_DESTS; i++)
2907 {
2908 switch (tr->dst_size[i]) {
2909 case 0:
2910 case SIZE_RETVAL: /* will be adjusted later */
2911 d[i].size = size;
2912 break;
2913
2914 case SIZE_1:
2915 d[i].size = 1;
2916 break;
2917 case SIZE_2:
2918 d[i].size = 2;
2919 break;
2920 case SIZE_3:
2921 d[i].size = 3;
2922 break;
2923 case SIZE_4:
2924 d[i].size = 4;
2925 break;
2926 case SIZE_6:
2927 d[i].size = 6;
2928 break;
2929
2930 case SIZE_PLUS_1:
2931 d[i].size = size+1;
2932 break;
2933 case SIZE_PLUS_MSIZE_SUB_1:
2934 d[i].size = size + tr->msize - 1;
2935 break;
2936
2937 case SIZE_SUM:
2938 if (tr->size2)
2939 d[i].size = size + size2;
2940 else
2941 d[i].size = 2*size;
2942 break;
2943
2944 case SIZE_SIZE2:
2945 d[i].size = size2;
2946 break;
2947
2948 case SIZE_DIFF:
2949 d[i].size = size - size2;
2950 break;
2951
2952 case SIZE_DIFF_PLUS_1:
2953 d[i].size = size - size2 + 1;
2954 break;
2955
2956 case SIZE_DIFF_PLUS_3:
2957 d[i].size = size - size2 + 3;
2958 break;
2959
2960 case SIZE_CEIL_HALF:
2961 d[i].size = (size+1)/2;
2962 break;
2963
2964 case SIZE_GET_STR:
2965 {
2966 mp_limb_t ff = GMP_NUMB_MAX;
2967 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2968 }
2969 break;
2970
2971 default:
2972 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2973 abort ();
2974 }
2975 }
2976
2977 /* establish e->d[].p destinations */
2978 for (i = 0; i < NUM_DESTS; i++)
2979 {
2980 mp_size_t offset = 0;
2981
2982 /* possible room for overlapping sources */
2983 for (j = 0; j < numberof (overlap->s); j++)
2984 if (overlap->s[j] == i)
2985 offset = MAX (offset, s[j].align);
2986
2987 if (d[i].high)
2988 {
2989 if (tr->dst_bytes[i])
2990 {
2991 e->d[i].p = (mp_ptr)
2992 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2993 - d[i].size - d[i].align);
2994 }
2995 else
2996 {
2997 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2998 - d[i].size - d[i].align;
2999 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
3000 e->d[i].p -= offset;
3001 }
3002 }
3003 else
3004 {
3005 if (tr->dst_bytes[i])
3006 {
3007 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
3008 }
3009 else
3010 {
3011 e->d[i].p = e->d[i].region.ptr + d[i].align;
3012 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3013 e->d[i].p += offset;
3014 }
3015 }
3016 }
3017
3018 /* establish e->s[].p sources */
3019 for (i = 0; i < NUM_SOURCES; i++)
3020 {
3021 int o = overlap->s[i];
3022 switch (o) {
3023 case -1:
3024 /* no overlap */
3025 e->s[i].p = s[i].p;
3026 break;
3027 case 0:
3028 case 1:
3029 /* overlap with d[o] */
3030 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3031 e->s[i].p = e->d[o].p - s[i].align;
3032 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
3033 e->s[i].p = e->d[o].p + s[i].align;
3034 else if (tr->size2 == SIZE_FRACTION)
3035 e->s[i].p = e->d[o].p + size2;
3036 else
3037 e->s[i].p = e->d[o].p;
3038 break;
3039 default:
3040 abort();
3041 break;
3042 }
3043 }
3044 }
3045
3046
3047 void
3048 validate_fail (void)
3049 {
3050 if (tr->reference)
3051 {
3052 trap_location = TRAP_REF;
3053 call (&ref, tr->reference);
3054 trap_location = TRAP_NOWHERE;
3055 }
3056
3057 print_all();
3058 abort();
3059 }
3060
3061
3062 void
3063 try_one (void)
3064 {
3065 int i;
3066
3067 if (option_spinner)
3068 spinner();
3069 spinner_count++;
3070
3071 trap_location = TRAP_SETUPS;
3072
3073 if (tr->divisor == DIVISOR_NORM)
3074 divisor |= GMP_NUMB_HIGHBIT;
3075 if (tr->divisor == DIVISOR_ODD)
3076 divisor |= 1;
3077
3078 for (i = 0; i < NUM_SOURCES; i++)
3079 {
3080 if (s[i].high)
3081 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
3082 else
3083 s[i].p = s[i].region.ptr + s[i].align;
3084 }
3085
3086 pointer_setup (&ref);
3087 pointer_setup (&fun);
3088
3089 ref.retval = 0x04152637;
3090 fun.retval = 0x8C9DAEBF;
3091
3092 t_random (multiplier_N, tr->msize);
3093
3094 for (i = 0; i < NUM_SOURCES; i++)
3095 {
3096 if (! tr->src[i])
3097 continue;
3098
3099 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
3100 t_random (s[i].p, SRC_SIZE(i));
3101
3102 switch (tr->data) {
3103 case DATA_NON_ZERO:
3104 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
3105 s[i].p[0] = 1;
3106 break;
3107
3108 case DATA_MULTIPLE_DIVISOR:
3109 /* same number of low zero bits as divisor */
3110 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
3111 refmpn_sub_1 (s[i].p, s[i].p, size,
3112 refmpn_mod_1 (s[i].p, size, divisor));
3113 break;
3114
3115 case DATA_GCD:
3116 /* s[1] no more bits than s[0] */
3117 if (i == 1 && size2 == size)
3118 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
3119
3120 /* high limb non-zero */
3121 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
3122
3123 /* odd */
3124 s[i].p[0] |= 1;
3125 break;
3126
3127 case DATA_SRC0_ODD:
3128 if (i == 0)
3129 s[i].p[0] |= 1;
3130 break;
3131
3132 case DATA_SRC1_ODD:
3133 if (i == 1)
3134 s[i].p[0] |= 1;
3135 break;
3136
3137 case DATA_SRC1_ODD_PRIME:
3138 if (i == 1)
3139 {
3140 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
3141 && s[i].p[0] <=3)
3142 s[i].p[0] = 3;
3143 else
3144 {
3145 mpz_t p;
3146 mpz_init (p);
3147 for (;;)
3148 {
3149 _mpz_realloc (p, SRC_SIZE(i));
3150 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
3151 SIZ(p) = SRC_SIZE(i);
3152 MPN_NORMALIZE (PTR(p), SIZ(p));
3153 mpz_nextprime (p, p);
3154 if (mpz_size (p) <= SRC_SIZE(i))
3155 break;
3156
3157 t_random (s[i].p, SRC_SIZE(i));
3158 }
3159 MPN_COPY (s[i].p, PTR(p), SIZ(p));
3160 if (SIZ(p) < SRC_SIZE(i))
3161 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
3162 mpz_clear (p);
3163 }
3164 }
3165 break;
3166
3167 case DATA_SRC1_HIGHBIT:
3168 if (i == 1)
3169 {
3170 if (tr->size2)
3171 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
3172 else
3173 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3174 }
3175 break;
3176
3177 case DATA_SRC0_HIGHBIT:
3178 if (i == 0)
3179 {
3180 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3181 }
3182 break;
3183
3184 case DATA_UDIV_QRNND:
3185 s[i].p[1] %= divisor;
3186 break;
3187 case DATA_DIV_QR_1:
3188 if (i == 1)
3189 s[i].p[0] %= divisor;
3190 break;
3191 }
3192
3193 mprotect_region (&s[i].region, PROT_READ);
3194 }
3195
3196 for (i = 0; i < NUM_DESTS; i++)
3197 {
3198 if (! tr->dst[i])
3199 continue;
3200
3201 if (tr->dst0_from_src1 && i==0)
3202 {
3203 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
3204 mp_size_t fill = MAX (0, d[0].size - copy);
3205 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
3206 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
3207 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
3208 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
3209 }
3210 else if (tr->dst_bytes[i])
3211 {
3212 memset (ref.d[i].p, 0xBA, d[i].size);
3213 memset (fun.d[i].p, 0xBA, d[i].size);
3214 }
3215 else
3216 {
3217 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
3218 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
3219 }
3220 }
3221
3222 for (i = 0; i < NUM_SOURCES; i++)
3223 {
3224 if (! tr->src[i])
3225 continue;
3226
3227 if (ref.s[i].p != s[i].p)
3228 {
3229 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
3230 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
3231 }
3232 }
3233
3234 if (option_print)
3235 print_all();
3236
3237 if (tr->validate != NULL)
3238 {
3239 trap_location = TRAP_FUN;
3240 call (&fun, choice->function);
3241 trap_location = TRAP_NOWHERE;
3242
3243 if (! CALLING_CONVENTIONS_CHECK ())
3244 {
3245 print_all();
3246 abort();
3247 }
3248
3249 (*tr->validate) ();
3250 }
3251 else
3252 {
3253 trap_location = TRAP_REF;
3254 call (&ref, tr->reference);
3255 trap_location = TRAP_FUN;
3256 call (&fun, choice->function);
3257 trap_location = TRAP_NOWHERE;
3258
3259 if (! CALLING_CONVENTIONS_CHECK ())
3260 {
3261 print_all();
3262 abort();
3263 }
3264
3265 compare ();
3266 }
3267 }
3268
3269
3270 #define SIZE_ITERATION \
3271 for (size = MAX3 (option_firstsize, \
3272 choice->minsize, \
3273 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \
3274 size += (tr->size == SIZE_ODD) && !(size & 1); \
3275 size <= option_lastsize; \
3276 size += (tr->size == SIZE_ODD) ? 2 : 1)
3277
3278 #define SIZE2_FIRST \
3279 (tr->size2 == SIZE_2 ? 2 \
3280 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
3281 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3282 : tr->size2 ? \
3283 MAX (choice->minsize, (option_firstsize2 != 0 \
3284 ? option_firstsize2 : 1)) \
3285 : 0)
3286
3287 #define SIZE2_LAST \
3288 (tr->size2 == SIZE_2 ? 2 \
3289 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
3290 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3291 : tr->size2 ? size \
3292 : 0)
3293
3294 #define SIZE2_ITERATION \
3295 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
3296
3297 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
3298 #define ALIGN_ITERATION(w,n,cond) \
3299 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
3300
3301 #define HIGH_LIMIT(cond) ((cond) != 0)
3302 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
3303 #define HIGH_ITERATION(w,n,cond) \
3304 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
3305
3306 #define SHIFT_LIMIT \
3307 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
3308
3309 #define SHIFT_ITERATION \
3310 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
3311
3312
3313 void
3314 try_many (void)
3315 {
3316 int i;
3317
3318 {
3319 unsigned long total = 1;
3320
3321 total *= option_repetitions;
3322 total *= option_lastsize;
3323 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
3324 else if (tr->size2) total *= (option_lastsize+1)/2;
3325
3326 total *= SHIFT_LIMIT;
3327 total *= MULTIPLIER_COUNT;
3328 total *= DIVISOR_COUNT;
3329 total *= CARRY_COUNT;
3330 total *= T_RAND_COUNT;
3331
3332 total *= HIGH_COUNT (tr->dst[0]);
3333 total *= HIGH_COUNT (tr->dst[1]);
3334 total *= HIGH_COUNT (tr->src[0]);
3335 total *= HIGH_COUNT (tr->src[1]);
3336
3337 total *= ALIGN_COUNT (tr->dst[0]);
3338 total *= ALIGN_COUNT (tr->dst[1]);
3339 total *= ALIGN_COUNT (tr->src[0]);
3340 total *= ALIGN_COUNT (tr->src[1]);
3341
3342 total *= OVERLAP_COUNT;
3343
3344 printf ("%s %lu\n", choice->name, total);
3345 }
3346
3347 spinner_count = 0;
3348
3349 for (i = 0; i < option_repetitions; i++)
3350 SIZE_ITERATION
3351 SIZE2_ITERATION
3352
3353 SHIFT_ITERATION
3354 MULTIPLIER_ITERATION
3355 DIVISOR_ITERATION
3356 CARRY_ITERATION /* must be after divisor */
3357 T_RAND_ITERATION
3358
3359 HIGH_ITERATION(d,0, tr->dst[0])
3360 HIGH_ITERATION(d,1, tr->dst[1])
3361 HIGH_ITERATION(s,0, tr->src[0])
3362 HIGH_ITERATION(s,1, tr->src[1])
3363
3364 ALIGN_ITERATION(d,0, tr->dst[0])
3365 ALIGN_ITERATION(d,1, tr->dst[1])
3366 ALIGN_ITERATION(s,0, tr->src[0])
3367 ALIGN_ITERATION(s,1, tr->src[1])
3368
3369 OVERLAP_ITERATION
3370 try_one();
3371
3372 printf("\n");
3373 }
3374
3375
3376 /* Usually print_all() doesn't show much, but it might give a hint as to
3377 where the function was up to when it died. */
3378 void
3379 trap (int sig)
3380 {
3381 const char *name = "noname";
3382
3383 switch (sig) {
3384 case SIGILL: name = "SIGILL"; break;
3385 #ifdef SIGBUS
3386 case SIGBUS: name = "SIGBUS"; break;
3387 #endif
3388 case SIGSEGV: name = "SIGSEGV"; break;
3389 case SIGFPE: name = "SIGFPE"; break;
3390 }
3391
3392 printf ("\n\nSIGNAL TRAP: %s\n", name);
3393
3394 switch (trap_location) {
3395 case TRAP_REF:
3396 printf (" in reference function: %s\n", tr->reference_name);
3397 break;
3398 case TRAP_FUN:
3399 printf (" in test function: %s\n", choice->name);
3400 print_all ();
3401 break;
3402 case TRAP_SETUPS:
3403 printf (" in parameter setups\n");
3404 print_all ();
3405 break;
3406 default:
3407 printf (" somewhere unknown\n");
3408 break;
3409 }
3410 exit (1);
3411 }
3412
3413
3414 void
3415 try_init (void)
3416 {
3417 #if HAVE_GETPAGESIZE
3418 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
3419 know _SC_PAGESIZE. */
3420 pagesize = getpagesize ();
3421 #else
3422 #if HAVE_SYSCONF
3423 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
3424 {
3425 /* According to the linux man page, sysconf doesn't set errno */
3426 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
3427 exit (1);
3428 }
3429 #else
3430 Error, error, cannot get page size
3431 #endif
3432 #endif
3433
3434 printf ("pagesize is 0x%lX bytes\n", pagesize);
3435
3436 signal (SIGILL, trap);
3437 #ifdef SIGBUS
3438 signal (SIGBUS, trap);
3439 #endif
3440 signal (SIGSEGV, trap);
3441 signal (SIGFPE, trap);
3442
3443 {
3444 int i;
3445
3446 for (i = 0; i < NUM_SOURCES; i++)
3447 {
3448 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
3449 printf ("s[%d] %p to %p (0x%lX bytes)\n",
3450 i, (void *) (s[i].region.ptr),
3451 (void *) (s[i].region.ptr + s[i].region.size),
3452 (long) s[i].region.size * GMP_LIMB_BYTES);
3453 }
3454
3455 #define INIT_EACH(e,es) \
3456 for (i = 0; i < NUM_DESTS; i++) \
3457 { \
3458 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
3459 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
3460 es, i, (void *) (e.d[i].region.ptr), \
3461 (void *) (e.d[i].region.ptr + e.d[i].region.size), \
3462 (long) e.d[i].region.size * GMP_LIMB_BYTES); \
3463 }
3464
3465 INIT_EACH(ref, "ref");
3466 INIT_EACH(fun, "fun");
3467 }
3468 }
3469
3470 int
3471 strmatch_wild (const char *pattern, const char *str)
3472 {
3473 size_t plen, slen;
3474
3475 /* wildcard at start */
3476 if (pattern[0] == '*')
3477 {
3478 pattern++;
3479 plen = strlen (pattern);
3480 slen = strlen (str);
3481 return (plen == 0
3482 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
3483 }
3484
3485 /* wildcard at end */
3486 plen = strlen (pattern);
3487 if (plen >= 1 && pattern[plen-1] == '*')
3488 return (memcmp (pattern, str, plen-1) == 0);
3489
3490 /* no wildcards */
3491 return (strcmp (pattern, str) == 0);
3492 }
3493
3494 void
3495 try_name (const char *name)
3496 {
3497 int found = 0;
3498 int i;
3499
3500 for (i = 0; i < numberof (choice_array); i++)
3501 {
3502 if (strmatch_wild (name, choice_array[i].name))
3503 {
3504 choice = &choice_array[i];
3505 tr = ¶m[choice->type];
3506 try_many ();
3507 found = 1;
3508 }
3509 }
3510
3511 if (!found)
3512 {
3513 printf ("%s unknown\n", name);
3514 /* exit (1); */
3515 }
3516 }
3517
3518
3519 void
3520 usage (const char *prog)
3521 {
3522 int col = 0;
3523 int i;
3524
3525 printf ("Usage: %s [options] function...\n", prog);
3526 printf (" -1 use limb data 1,2,3,etc\n");
3527 printf (" -9 use limb data all 0xFF..FFs\n");
3528 printf (" -a zeros use limb data all zeros\n");
3529 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
3530 printf (" -a 2fd use data 0x2FFF...FFFD\n");
3531 printf (" -p print each case tried (try this if seg faulting)\n");
3532 printf (" -R seed random numbers from time()\n");
3533 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
3534 printf (" -s size starting size to test\n");
3535 printf (" -S size2 starting size2 to test\n");
3536 printf (" -s s1-s2 range of sizes to test\n");
3537 printf (" -W don't show the spinner (use this in gdb)\n");
3538 printf (" -z disable mprotect() redzones\n");
3539 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3540 printf ("\n");
3541 printf ("Functions that can be tested:\n");
3542
3543 for (i = 0; i < numberof (choice_array); i++)
3544 {
3545 if (col + 1 + strlen (choice_array[i].name) > 79)
3546 {
3547 printf ("\n");
3548 col = 0;
3549 }
3550 printf (" %s", choice_array[i].name);
3551 col += 1 + strlen (choice_array[i].name);
3552 }
3553 printf ("\n");
3554
3555 exit(1);
3556 }
3557
3558
3559 int
3560 main (int argc, char *argv[])
3561 {
3562 int i;
3563
3564 /* unbuffered output */
3565 setbuf (stdout, NULL);
3566 setbuf (stderr, NULL);
3567
3568 /* default trace in hex, and in upper-case so can paste into bc */
3569 mp_trace_base = -16;
3570
3571 param_init ();
3572
3573 {
3574 unsigned long seed = 123;
3575 int opt;
3576
3577 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3578 {
3579 switch (opt) {
3580 case '1':
3581 /* use limb data values 1, 2, 3, ... etc */
3582 option_data = DATA_SEQ;
3583 break;
3584 case '9':
3585 /* use limb data values 0xFFF...FFF always */
3586 option_data = DATA_FFS;
3587 break;
3588 case 'a':
3589 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3590 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3591 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3592 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3593 else
3594 {
3595 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3596 exit (1);
3597 }
3598 break;
3599 case 'b':
3600 mp_trace_base = atoi (optarg);
3601 break;
3602 case 'E':
3603 /* re-seed */
3604 sscanf (optarg, "%lu", &seed);
3605 printf ("Re-seeding with %lu\n", seed);
3606 break;
3607 case 'p':
3608 option_print = 1;
3609 break;
3610 case 'R':
3611 /* randomize */
3612 seed = time (NULL);
3613 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3614 break;
3615 case 'r':
3616 option_repetitions = atoi (optarg);
3617 break;
3618 case 's':
3619 {
3620 char *p;
3621 option_firstsize = strtol (optarg, 0, 0);
3622 if ((p = strchr (optarg, '-')) != NULL)
3623 option_lastsize = strtol (p+1, 0, 0);
3624 }
3625 break;
3626 case 'S':
3627 /* -S <size> sets the starting size for the second of a two size
3628 routine (like mpn_mul_basecase) */
3629 option_firstsize2 = strtol (optarg, 0, 0);
3630 break;
3631 case 'W':
3632 /* use this when running in the debugger */
3633 option_spinner = 0;
3634 break;
3635 case 'z':
3636 /* disable redzones */
3637 option_redzones = 0;
3638 break;
3639 case '?':
3640 usage (argv[0]);
3641 break;
3642 }
3643 }
3644
3645 gmp_randinit_default (__gmp_rands);
3646 __gmp_rands_initialized = 1;
3647 gmp_randseed_ui (__gmp_rands, seed);
3648 }
3649
3650 try_init();
3651
3652 if (argc <= optind)
3653 usage (argv[0]);
3654
3655 for (i = optind; i < argc; i++)
3656 try_name (argv[i]);
3657
3658 return 0;
3659 }
3660