try.c revision 1.1.1.1.8.1 1 /* Run some tests on various mpn routines.
2
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6 Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2011, 2012
7 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library test suite.
10
11 The GNU MP Library test suite is free software; you can redistribute it
12 and/or modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 3 of the License,
14 or (at your option) any later version.
15
16 The GNU MP Library test suite is distributed in the hope that it will be
17 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
19 Public License for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
23
24
25 /* Usage: try [options] <function>...
26
27 For example, "./try mpn_add_n" to run tests of that function.
28
29 Combinations of alignments and overlaps are tested, with redzones above
30 or below the destinations, and with the sources write-protected.
31
32 The number of tests performed becomes ridiculously large with all the
33 combinations, and for that reason this can't be a part of a "make check",
34 it's meant only for development. The code isn't very pretty either.
35
36 During development it can help to disable the redzones, since seeing the
37 rest of the destination written can show where the wrong part is, or if
38 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
39 fill (see below) will show locations never written.
40
41 The -s option can be used to test only certain size operands, which is
42 useful if some new code doesn't yet support say sizes less than the
43 unrolling, or whatever.
44
45 When a problem occurs it'll of course be necessary to run the program
46 under gdb to find out quite where, how and why it's going wrong. Disable
47 the spinner with the -W option when doing this, or single stepping won't
48 work. Using the "-1" option to run with simple data can be useful.
49
50 New functions to test can be added in try_array[]. If a new TYPE is
51 required then add it to the existing constants, set up its parameters in
52 param_init(), and add it to the call() function. Extra parameter fields
53 can be added if necessary, or further interpretations given to existing
54 fields.
55
56
57 Portability:
58
59 This program is not designed for use on Cray vector systems under Unicos,
60 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
61 don't really have pages or mprotect. We could arrange to run the tests
62 without the redzones, but we haven't bothered currently.
63
64
65 Enhancements:
66
67 umul_ppmm support is not very good, lots of source data is generated
68 whereas only two limbs are needed.
69
70 Make a little scheme for interpreting the "SIZE" selections uniformly.
71
72 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
73 source limbs. Possibly increase the default repetitions in that case.
74
75 Automatically detect gdb and disable the spinner (use -W for now).
76
77 Make a way to re-run a failing case in the debugger. Have an option to
78 snapshot each test case before it's run so the data is available if a
79 segv occurs. (This should be more reliable than the current print_all()
80 in the signal handler.)
81
82 When alignment means a dst isn't hard against the redzone, check the
83 space in between remains unchanged.
84
85 When a source overlaps a destination, don't run both s[i].high 0 and 1,
86 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
87
88 When partial overlaps aren't done, don't loop over source alignments
89 during overlaps.
90
91 Try to make the looping code a bit less horrible. Right now it's pretty
92 hard to see what iterations are actually done.
93
94 Perhaps specific setups and loops for each style of function under test
95 would be clearer than a parameterized general loop. There's lots of
96 stuff common to all functions, but the exceptions get messy.
97
98 When there's no overlap, run with both src>dst and src<dst. A subtle
99 calling-conventions violation occurred in a P6 copy which depended on the
100 relative location of src and dst.
101
102 multiplier_N is more or less a third source region for the addmul_N
103 routines, and could be done with the redzoned region scheme.
104
105 */
106
107
108 /* always do assertion checking */
109 #define WANT_ASSERT 1
110
111 #include "config.h"
112
113 #include <errno.h>
114 #include <limits.h>
115 #include <signal.h>
116 #include <stdio.h>
117 #include <stdlib.h>
118 #include <string.h>
119 #include <time.h>
120
121 #if HAVE_UNISTD_H
122 #include <unistd.h>
123 #endif
124
125 #if HAVE_SYS_MMAN_H
126 #include <sys/mman.h>
127 #endif
128
129 #include "gmp.h"
130 #include "gmp-impl.h"
131 #include "longlong.h"
132 #include "tests.h"
133
134
135 #if !HAVE_DECL_OPTARG
136 extern char *optarg;
137 extern int optind, opterr;
138 #endif
139
140 #if ! HAVE_DECL_SYS_NERR
141 extern int sys_nerr;
142 #endif
143
144 #if ! HAVE_DECL_SYS_ERRLIST
145 extern char *sys_errlist[];
146 #endif
147
148 #if ! HAVE_STRERROR
149 char *
150 strerror (int n)
151 {
152 if (n < 0 || n >= sys_nerr)
153 return "errno out of range";
154 else
155 return sys_errlist[n];
156 }
157 #endif
158
159 /* Rumour has it some systems lack a define of PROT_NONE. */
160 #ifndef PROT_NONE
161 #define PROT_NONE 0
162 #endif
163
164 /* Dummy defines for when mprotect doesn't exist. */
165 #ifndef PROT_READ
166 #define PROT_READ 0
167 #endif
168 #ifndef PROT_WRITE
169 #define PROT_WRITE 0
170 #endif
171
172 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
173 _SC_PAGE_SIZE instead. */
174 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
175 #define _SC_PAGESIZE _SC_PAGE_SIZE
176 #endif
177
178
179 #ifdef EXTRA_PROTOS
180 EXTRA_PROTOS
181 #endif
182 #ifdef EXTRA_PROTOS2
183 EXTRA_PROTOS2
184 #endif
185
186
187 #define DEFAULT_REPETITIONS 10
188
189 int option_repetitions = DEFAULT_REPETITIONS;
190 int option_spinner = 1;
191 int option_redzones = 1;
192 int option_firstsize = 0;
193 int option_lastsize = 500;
194 int option_firstsize2 = 0;
195
196 #define ALIGNMENTS 4
197 #define OVERLAPS 4
198 #define CARRY_RANDOMS 5
199 #define MULTIPLIER_RANDOMS 5
200 #define DIVISOR_RANDOMS 5
201 #define FRACTION_COUNT 4
202
203 int option_print = 0;
204
205 #define DATA_TRAND 0
206 #define DATA_ZEROS 1
207 #define DATA_SEQ 2
208 #define DATA_FFS 3
209 #define DATA_2FD 4
210 int option_data = DATA_TRAND;
211
212
213 mp_size_t pagesize;
214 #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
215
216 /* must be a multiple of the page size */
217 #define REDZONE_BYTES (pagesize * 16)
218 #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
219
220
221 #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
222
223 #if GMP_LIMB_BITS == 32
224 #define DEADVAL CNST_LIMB(0xDEADBEEF)
225 #else
226 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
227 #endif
228
229
230 struct region_t {
231 mp_ptr ptr;
232 mp_size_t size;
233 };
234
235
236 #define TRAP_NOWHERE 0
237 #define TRAP_REF 1
238 #define TRAP_FUN 2
239 #define TRAP_SETUPS 3
240 int trap_location = TRAP_NOWHERE;
241
242
243 #define NUM_SOURCES 5
244 #define NUM_DESTS 2
245
246 struct source_t {
247 struct region_t region;
248 int high;
249 mp_size_t align;
250 mp_ptr p;
251 };
252
253 struct source_t s[NUM_SOURCES];
254
255 struct dest_t {
256 int high;
257 mp_size_t align;
258 mp_size_t size;
259 };
260
261 struct dest_t d[NUM_DESTS];
262
263 struct source_each_t {
264 mp_ptr p;
265 };
266
267 struct dest_each_t {
268 struct region_t region;
269 mp_ptr p;
270 };
271
272 mp_size_t size;
273 mp_size_t size2;
274 unsigned long shift;
275 mp_limb_t carry;
276 mp_limb_t divisor;
277 mp_limb_t multiplier;
278 mp_limb_t multiplier_N[8];
279
280 struct each_t {
281 const char *name;
282 struct dest_each_t d[NUM_DESTS];
283 struct source_each_t s[NUM_SOURCES];
284 mp_limb_t retval;
285 };
286
287 struct each_t ref = { "Ref" };
288 struct each_t fun = { "Fun" };
289
290 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
291
292 void validate_fail (void);
293
294
295 #if HAVE_TRY_NEW_C
296 #include "try-new.c"
297 #endif
298
299
300 typedef mp_limb_t (*tryfun_t) (ANYARGS);
301
302 struct try_t {
303 char retval;
304
305 char src[NUM_SOURCES];
306 char dst[NUM_DESTS];
307
308 #define SIZE_YES 1
309 #define SIZE_ALLOW_ZERO 2
310 #define SIZE_1 3 /* 1 limb */
311 #define SIZE_2 4 /* 2 limbs */
312 #define SIZE_3 5 /* 3 limbs */
313 #define SIZE_4 6 /* 4 limbs */
314 #define SIZE_6 7 /* 6 limbs */
315 #define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */
316 #define SIZE_SIZE2 9
317 #define SIZE_PLUS_1 10
318 #define SIZE_SUM 11
319 #define SIZE_DIFF 12
320 #define SIZE_DIFF_PLUS_1 13
321 #define SIZE_DIFF_PLUS_3 14
322 #define SIZE_RETVAL 15
323 #define SIZE_CEIL_HALF 16
324 #define SIZE_GET_STR 17
325 #define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */
326 #define SIZE_ODD 19
327 char size;
328 char size2;
329 char dst_size[NUM_DESTS];
330
331 /* multiplier_N size in limbs */
332 mp_size_t msize;
333
334 char dst_bytes[NUM_DESTS];
335
336 char dst0_from_src1;
337
338 #define CARRY_BIT 1 /* single bit 0 or 1 */
339 #define CARRY_3 2 /* 0, 1, 2 */
340 #define CARRY_4 3 /* 0 to 3 */
341 #define CARRY_LIMB 4 /* any limb value */
342 #define CARRY_DIVISOR 5 /* carry<divisor */
343 char carry;
344
345 /* a fudge to tell the output when to print negatives */
346 char carry_sign;
347
348 char multiplier;
349 char shift;
350
351 #define DIVISOR_LIMB 1
352 #define DIVISOR_NORM 2
353 #define DIVISOR_ODD 3
354 char divisor;
355
356 #define DATA_NON_ZERO 1
357 #define DATA_GCD 2
358 #define DATA_SRC0_ODD 3
359 #define DATA_SRC0_HIGHBIT 4
360 #define DATA_SRC1_ODD 5
361 #define DATA_SRC1_ODD_PRIME 6
362 #define DATA_SRC1_HIGHBIT 7
363 #define DATA_MULTIPLE_DIVISOR 8
364 #define DATA_UDIV_QRNND 9
365 char data;
366
367 /* Default is allow full overlap. */
368 #define OVERLAP_NONE 1
369 #define OVERLAP_LOW_TO_HIGH 2
370 #define OVERLAP_HIGH_TO_LOW 3
371 #define OVERLAP_NOT_SRCS 4
372 #define OVERLAP_NOT_SRC2 8
373 #define OVERLAP_NOT_DST2 16
374 char overlap;
375
376 tryfun_t reference;
377 const char *reference_name;
378
379 void (*validate) (void);
380 const char *validate_name;
381 };
382
383 struct try_t *tr;
384
385
386 void
387 validate_mod_34lsub1 (void)
388 {
389 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
390
391 mp_srcptr ptr = s[0].p;
392 int error = 0;
393 mp_limb_t got, got_mod, want, want_mod;
394
395 ASSERT (size >= 1);
396
397 got = fun.retval;
398 got_mod = got % CNST_34LSUB1;
399
400 want = refmpn_mod_34lsub1 (ptr, size);
401 want_mod = want % CNST_34LSUB1;
402
403 if (got_mod != want_mod)
404 {
405 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
406 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
407 error = 1;
408 }
409
410 if (error)
411 validate_fail ();
412 }
413
414 void
415 validate_divexact_1 (void)
416 {
417 mp_srcptr src = s[0].p;
418 mp_srcptr dst = fun.d[0].p;
419 int error = 0;
420
421 ASSERT (size >= 1);
422
423 {
424 mp_ptr tp = refmpn_malloc_limbs (size);
425 mp_limb_t rem;
426
427 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
428 if (rem != 0)
429 {
430 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
431 error = 1;
432 }
433 if (! refmpn_equal_anynail (tp, dst, size))
434 {
435 printf ("Quotient a/d wrong\n");
436 mpn_trace ("fun ", dst, size);
437 mpn_trace ("want", tp, size);
438 error = 1;
439 }
440 free (tp);
441 }
442
443 if (error)
444 validate_fail ();
445 }
446
447 void
448 validate_bdiv_q_1
449 (void)
450 {
451 mp_srcptr src = s[0].p;
452 mp_srcptr dst = fun.d[0].p;
453 int error = 0;
454
455 ASSERT (size >= 1);
456
457 {
458 mp_ptr tp = refmpn_malloc_limbs (size + 1);
459
460 refmpn_mul_1 (tp, dst, size, divisor);
461 /* Set ignored low bits */
462 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
463 if (! refmpn_equal_anynail (tp, src, size))
464 {
465 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
466 mpn_trace ("res ", dst, size);
467 mpn_trace ("src ", src, size);
468 error = 1;
469 }
470 free (tp);
471 }
472
473 if (error)
474 validate_fail ();
475 }
476
477
478 void
479 validate_modexact_1c_odd (void)
480 {
481 mp_srcptr ptr = s[0].p;
482 mp_limb_t r = fun.retval;
483 int error = 0;
484
485 ASSERT (size >= 1);
486 ASSERT (divisor & 1);
487
488 if ((r & GMP_NAIL_MASK) != 0)
489 printf ("r has non-zero nail\n");
490
491 if (carry < divisor)
492 {
493 if (! (r < divisor))
494 {
495 printf ("Don't have r < divisor\n");
496 error = 1;
497 }
498 }
499 else /* carry >= divisor */
500 {
501 if (! (r <= divisor))
502 {
503 printf ("Don't have r <= divisor\n");
504 error = 1;
505 }
506 }
507
508 {
509 mp_limb_t c = carry % divisor;
510 mp_ptr tp = refmpn_malloc_limbs (size+1);
511 mp_size_t k;
512
513 for (k = size-1; k <= size; k++)
514 {
515 /* set {tp,size+1} to r*b^k + a - c */
516 refmpn_copyi (tp, ptr, size);
517 tp[size] = 0;
518 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
519 if (refmpn_sub_1 (tp, tp, size+1, c))
520 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
521
522 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
523 goto good_remainder;
524 }
525 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
526 error = 1;
527
528 good_remainder:
529 free (tp);
530 }
531
532 if (error)
533 validate_fail ();
534 }
535
536 void
537 validate_modexact_1_odd (void)
538 {
539 carry = 0;
540 validate_modexact_1c_odd ();
541 }
542
543
544 void
545 validate_sqrtrem (void)
546 {
547 mp_srcptr orig_ptr = s[0].p;
548 mp_size_t orig_size = size;
549 mp_size_t root_size = (size+1)/2;
550 mp_srcptr root_ptr = fun.d[0].p;
551 mp_size_t rem_size = fun.retval;
552 mp_srcptr rem_ptr = fun.d[1].p;
553 mp_size_t prod_size = 2*root_size;
554 mp_ptr p;
555 int error = 0;
556
557 if (rem_size < 0 || rem_size > size)
558 {
559 printf ("Bad remainder size retval %ld\n", (long) rem_size);
560 validate_fail ();
561 }
562
563 p = refmpn_malloc_limbs (prod_size);
564
565 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
566 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
567 {
568 printf ("Remainder bigger than 2*root\n");
569 error = 1;
570 }
571
572 refmpn_sqr (p, root_ptr, root_size);
573 if (rem_size != 0)
574 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
575 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
576 {
577 printf ("root^2+rem != original\n");
578 mpn_trace ("prod", p, prod_size);
579 error = 1;
580 }
581 free (p);
582
583 if (error)
584 validate_fail ();
585 }
586
587
588 /* These types are indexes into the param[] array and are arbitrary so long
589 as they're all distinct and within the size of param[]. Renumber
590 whenever necessary or desired. */
591
592 enum {
593 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
594
595 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
596 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
597
598 TYPE_MUL_1, TYPE_MUL_1C,
599
600 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
601
602 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
603
604 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
605 TYPE_ADDMUL_7, TYPE_ADDMUL_8,
606
607 TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
608
609 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
610
611 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
612
613 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
614 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
615 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
616 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
617 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
618 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
619 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
620
621 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
622 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
623 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
624
625 TYPE_ADDCND_N, TYPE_SUBCND_N,
626
627 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
628 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
629 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
630
631 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
632 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
633
634 TYPE_INVERT, TYPE_BINVERT,
635
636 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
637 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
638 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
639
640 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
641 TYPE_XOR_N, TYPE_XNOR_N,
642
643 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
644 TYPE_MULLO_N, TYPE_MULMID_MN, TYPE_MULMID_N,
645
646 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
647
648 TYPE_SQRTREM, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
649
650 TYPE_EXTRA
651 };
652
653 struct try_t param[TYPE_EXTRA];
654
655
656 void
657 param_init (void)
658 {
659 struct try_t *p;
660
661 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
662
663 #if HAVE_STRINGIZE
664 #define REFERENCE(fun) \
665 p->reference = (tryfun_t) fun; \
666 p->reference_name = #fun
667 #define VALIDATE(fun) \
668 p->validate = fun; \
669 p->validate_name = #fun
670 #else
671 #define REFERENCE(fun) \
672 p->reference = (tryfun_t) fun; \
673 p->reference_name = "fun"
674 #define VALIDATE(fun) \
675 p->validate = fun; \
676 p->validate_name = "fun"
677 #endif
678
679
680 p = ¶m[TYPE_ADD_N];
681 p->retval = 1;
682 p->dst[0] = 1;
683 p->src[0] = 1;
684 p->src[1] = 1;
685 REFERENCE (refmpn_add_n);
686
687 p = ¶m[TYPE_ADD_NC];
688 COPY (TYPE_ADD_N);
689 p->carry = CARRY_BIT;
690 REFERENCE (refmpn_add_nc);
691
692 p = ¶m[TYPE_SUB_N];
693 COPY (TYPE_ADD_N);
694 REFERENCE (refmpn_sub_n);
695
696 p = ¶m[TYPE_SUB_NC];
697 COPY (TYPE_ADD_NC);
698 REFERENCE (refmpn_sub_nc);
699
700 p = ¶m[TYPE_ADD];
701 COPY (TYPE_ADD_N);
702 p->size = SIZE_ALLOW_ZERO;
703 p->size2 = 1;
704 REFERENCE (refmpn_add);
705
706 p = ¶m[TYPE_SUB];
707 COPY (TYPE_ADD);
708 REFERENCE (refmpn_sub);
709
710
711 p = ¶m[TYPE_ADD_ERR1_N];
712 p->retval = 1;
713 p->dst[0] = 1;
714 p->dst[1] = 1;
715 p->src[0] = 1;
716 p->src[1] = 1;
717 p->src[2] = 1;
718 p->dst_size[1] = SIZE_2;
719 p->carry = CARRY_BIT;
720 p->overlap = OVERLAP_NOT_DST2;
721 REFERENCE (refmpn_add_err1_n);
722
723 p = ¶m[TYPE_SUB_ERR1_N];
724 COPY (TYPE_ADD_ERR1_N);
725 REFERENCE (refmpn_sub_err1_n);
726
727 p = ¶m[TYPE_ADD_ERR2_N];
728 COPY (TYPE_ADD_ERR1_N);
729 p->src[3] = 1;
730 p->dst_size[1] = SIZE_4;
731 REFERENCE (refmpn_add_err2_n);
732
733 p = ¶m[TYPE_SUB_ERR2_N];
734 COPY (TYPE_ADD_ERR2_N);
735 REFERENCE (refmpn_sub_err2_n);
736
737 p = ¶m[TYPE_ADD_ERR3_N];
738 COPY (TYPE_ADD_ERR2_N);
739 p->src[4] = 1;
740 p->dst_size[1] = SIZE_6;
741 REFERENCE (refmpn_add_err3_n);
742
743 p = ¶m[TYPE_SUB_ERR3_N];
744 COPY (TYPE_ADD_ERR3_N);
745 REFERENCE (refmpn_sub_err3_n);
746
747 p = ¶m[TYPE_ADDCND_N];
748 COPY (TYPE_ADD_N);
749 p->carry = CARRY_BIT;
750 REFERENCE (refmpn_addcnd_n);
751
752 p = ¶m[TYPE_SUBCND_N];
753 COPY (TYPE_ADD_N);
754 p->carry = CARRY_BIT;
755 REFERENCE (refmpn_subcnd_n);
756
757
758 p = ¶m[TYPE_MUL_1];
759 p->retval = 1;
760 p->dst[0] = 1;
761 p->src[0] = 1;
762 p->multiplier = 1;
763 p->overlap = OVERLAP_LOW_TO_HIGH;
764 REFERENCE (refmpn_mul_1);
765
766 p = ¶m[TYPE_MUL_1C];
767 COPY (TYPE_MUL_1);
768 p->carry = CARRY_LIMB;
769 REFERENCE (refmpn_mul_1c);
770
771
772 p = ¶m[TYPE_MUL_2];
773 p->retval = 1;
774 p->dst[0] = 1;
775 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
776 p->src[0] = 1;
777 p->src[1] = 1;
778 p->msize = 2;
779 p->overlap = OVERLAP_NOT_SRC2;
780 REFERENCE (refmpn_mul_2);
781
782 p = ¶m[TYPE_MUL_3];
783 COPY (TYPE_MUL_2);
784 p->msize = 3;
785 REFERENCE (refmpn_mul_3);
786
787 p = ¶m[TYPE_MUL_4];
788 COPY (TYPE_MUL_2);
789 p->msize = 4;
790 REFERENCE (refmpn_mul_4);
791
792 p = ¶m[TYPE_MUL_5];
793 COPY (TYPE_MUL_2);
794 p->msize = 5;
795 REFERENCE (refmpn_mul_5);
796
797 p = ¶m[TYPE_MUL_6];
798 COPY (TYPE_MUL_2);
799 p->msize = 6;
800 REFERENCE (refmpn_mul_6);
801
802
803 p = ¶m[TYPE_ADDMUL_1];
804 p->retval = 1;
805 p->dst[0] = 1;
806 p->src[0] = 1;
807 p->multiplier = 1;
808 p->dst0_from_src1 = 1;
809 REFERENCE (refmpn_addmul_1);
810
811 p = ¶m[TYPE_ADDMUL_1C];
812 COPY (TYPE_ADDMUL_1);
813 p->carry = CARRY_LIMB;
814 REFERENCE (refmpn_addmul_1c);
815
816 p = ¶m[TYPE_SUBMUL_1];
817 COPY (TYPE_ADDMUL_1);
818 REFERENCE (refmpn_submul_1);
819
820 p = ¶m[TYPE_SUBMUL_1C];
821 COPY (TYPE_ADDMUL_1C);
822 REFERENCE (refmpn_submul_1c);
823
824
825 p = ¶m[TYPE_ADDMUL_2];
826 p->retval = 1;
827 p->dst[0] = 1;
828 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
829 p->src[0] = 1;
830 p->src[1] = 1;
831 p->msize = 2;
832 p->dst0_from_src1 = 1;
833 p->overlap = OVERLAP_NONE;
834 REFERENCE (refmpn_addmul_2);
835
836 p = ¶m[TYPE_ADDMUL_3];
837 COPY (TYPE_ADDMUL_2);
838 p->msize = 3;
839 REFERENCE (refmpn_addmul_3);
840
841 p = ¶m[TYPE_ADDMUL_4];
842 COPY (TYPE_ADDMUL_2);
843 p->msize = 4;
844 REFERENCE (refmpn_addmul_4);
845
846 p = ¶m[TYPE_ADDMUL_5];
847 COPY (TYPE_ADDMUL_2);
848 p->msize = 5;
849 REFERENCE (refmpn_addmul_5);
850
851 p = ¶m[TYPE_ADDMUL_6];
852 COPY (TYPE_ADDMUL_2);
853 p->msize = 6;
854 REFERENCE (refmpn_addmul_6);
855
856 p = ¶m[TYPE_ADDMUL_7];
857 COPY (TYPE_ADDMUL_2);
858 p->msize = 7;
859 REFERENCE (refmpn_addmul_7);
860
861 p = ¶m[TYPE_ADDMUL_8];
862 COPY (TYPE_ADDMUL_2);
863 p->msize = 8;
864 REFERENCE (refmpn_addmul_8);
865
866
867 p = ¶m[TYPE_AND_N];
868 p->dst[0] = 1;
869 p->src[0] = 1;
870 p->src[1] = 1;
871 REFERENCE (refmpn_and_n);
872
873 p = ¶m[TYPE_ANDN_N];
874 COPY (TYPE_AND_N);
875 REFERENCE (refmpn_andn_n);
876
877 p = ¶m[TYPE_NAND_N];
878 COPY (TYPE_AND_N);
879 REFERENCE (refmpn_nand_n);
880
881 p = ¶m[TYPE_IOR_N];
882 COPY (TYPE_AND_N);
883 REFERENCE (refmpn_ior_n);
884
885 p = ¶m[TYPE_IORN_N];
886 COPY (TYPE_AND_N);
887 REFERENCE (refmpn_iorn_n);
888
889 p = ¶m[TYPE_NIOR_N];
890 COPY (TYPE_AND_N);
891 REFERENCE (refmpn_nior_n);
892
893 p = ¶m[TYPE_XOR_N];
894 COPY (TYPE_AND_N);
895 REFERENCE (refmpn_xor_n);
896
897 p = ¶m[TYPE_XNOR_N];
898 COPY (TYPE_AND_N);
899 REFERENCE (refmpn_xnor_n);
900
901
902 p = ¶m[TYPE_ADDSUB_N];
903 p->retval = 1;
904 p->dst[0] = 1;
905 p->dst[1] = 1;
906 p->src[0] = 1;
907 p->src[1] = 1;
908 REFERENCE (refmpn_add_n_sub_n);
909
910 p = ¶m[TYPE_ADDSUB_NC];
911 COPY (TYPE_ADDSUB_N);
912 p->carry = CARRY_4;
913 REFERENCE (refmpn_add_n_sub_nc);
914
915
916 p = ¶m[TYPE_COPY];
917 p->dst[0] = 1;
918 p->src[0] = 1;
919 p->overlap = OVERLAP_NONE;
920 p->size = SIZE_ALLOW_ZERO;
921 REFERENCE (refmpn_copy);
922
923 p = ¶m[TYPE_COPYI];
924 p->dst[0] = 1;
925 p->src[0] = 1;
926 p->overlap = OVERLAP_LOW_TO_HIGH;
927 p->size = SIZE_ALLOW_ZERO;
928 REFERENCE (refmpn_copyi);
929
930 p = ¶m[TYPE_COPYD];
931 p->dst[0] = 1;
932 p->src[0] = 1;
933 p->overlap = OVERLAP_HIGH_TO_LOW;
934 p->size = SIZE_ALLOW_ZERO;
935 REFERENCE (refmpn_copyd);
936
937 p = ¶m[TYPE_COM];
938 p->dst[0] = 1;
939 p->src[0] = 1;
940 REFERENCE (refmpn_com);
941
942
943 p = ¶m[TYPE_ADDLSH1_N];
944 COPY (TYPE_ADD_N);
945 REFERENCE (refmpn_addlsh1_n);
946
947 p = ¶m[TYPE_ADDLSH2_N];
948 COPY (TYPE_ADD_N);
949 REFERENCE (refmpn_addlsh2_n);
950
951 p = ¶m[TYPE_ADDLSH_N];
952 COPY (TYPE_ADD_N);
953 p->shift = 1;
954 REFERENCE (refmpn_addlsh_n);
955
956 p = ¶m[TYPE_ADDLSH1_N_IP1];
957 p->retval = 1;
958 p->dst[0] = 1;
959 p->src[0] = 1;
960 p->dst0_from_src1 = 1;
961 REFERENCE (refmpn_addlsh1_n_ip1);
962
963 p = ¶m[TYPE_ADDLSH2_N_IP1];
964 COPY (TYPE_ADDLSH1_N_IP1);
965 REFERENCE (refmpn_addlsh2_n_ip1);
966
967 p = ¶m[TYPE_ADDLSH_N_IP1];
968 COPY (TYPE_ADDLSH1_N_IP1);
969 p->shift = 1;
970 REFERENCE (refmpn_addlsh_n_ip1);
971
972 p = ¶m[TYPE_ADDLSH1_N_IP2];
973 COPY (TYPE_ADDLSH1_N_IP1);
974 REFERENCE (refmpn_addlsh1_n_ip2);
975
976 p = ¶m[TYPE_ADDLSH2_N_IP2];
977 COPY (TYPE_ADDLSH1_N_IP1);
978 REFERENCE (refmpn_addlsh2_n_ip2);
979
980 p = ¶m[TYPE_ADDLSH_N_IP2];
981 COPY (TYPE_ADDLSH_N_IP1);
982 REFERENCE (refmpn_addlsh_n_ip2);
983
984 p = ¶m[TYPE_SUBLSH1_N];
985 COPY (TYPE_ADD_N);
986 REFERENCE (refmpn_sublsh1_n);
987
988 p = ¶m[TYPE_SUBLSH2_N];
989 COPY (TYPE_ADD_N);
990 REFERENCE (refmpn_sublsh2_n);
991
992 p = ¶m[TYPE_SUBLSH_N];
993 COPY (TYPE_ADDLSH_N);
994 REFERENCE (refmpn_sublsh_n);
995
996 p = ¶m[TYPE_SUBLSH1_N_IP1];
997 COPY (TYPE_ADDLSH1_N_IP1);
998 REFERENCE (refmpn_sublsh1_n_ip1);
999
1000 p = ¶m[TYPE_SUBLSH2_N_IP1];
1001 COPY (TYPE_ADDLSH1_N_IP1);
1002 REFERENCE (refmpn_sublsh2_n_ip1);
1003
1004 p = ¶m[TYPE_SUBLSH_N_IP1];
1005 COPY (TYPE_ADDLSH_N_IP1);
1006 REFERENCE (refmpn_sublsh_n_ip1);
1007
1008 p = ¶m[TYPE_RSBLSH1_N];
1009 COPY (TYPE_ADD_N);
1010 REFERENCE (refmpn_rsblsh1_n);
1011
1012 p = ¶m[TYPE_RSBLSH2_N];
1013 COPY (TYPE_ADD_N);
1014 REFERENCE (refmpn_rsblsh2_n);
1015
1016 p = ¶m[TYPE_RSBLSH_N];
1017 COPY (TYPE_ADDLSH_N);
1018 REFERENCE (refmpn_rsblsh_n);
1019
1020 p = ¶m[TYPE_RSH1ADD_N];
1021 COPY (TYPE_ADD_N);
1022 REFERENCE (refmpn_rsh1add_n);
1023
1024 p = ¶m[TYPE_RSH1SUB_N];
1025 COPY (TYPE_ADD_N);
1026 REFERENCE (refmpn_rsh1sub_n);
1027
1028
1029 p = ¶m[TYPE_ADDLSH1_NC];
1030 COPY (TYPE_ADDLSH1_N);
1031 p->carry = CARRY_3;
1032 REFERENCE (refmpn_addlsh1_nc);
1033
1034 p = ¶m[TYPE_ADDLSH2_NC];
1035 COPY (TYPE_ADDLSH2_N);
1036 p->carry = CARRY_4; /* FIXME */
1037 REFERENCE (refmpn_addlsh2_nc);
1038
1039 p = ¶m[TYPE_ADDLSH_NC];
1040 COPY (TYPE_ADDLSH_N);
1041 p->carry = CARRY_BIT; /* FIXME */
1042 REFERENCE (refmpn_addlsh_nc);
1043
1044 p = ¶m[TYPE_SUBLSH1_NC];
1045 COPY (TYPE_ADDLSH1_NC);
1046 REFERENCE (refmpn_sublsh1_nc);
1047
1048 p = ¶m[TYPE_SUBLSH2_NC];
1049 COPY (TYPE_ADDLSH2_NC);
1050 REFERENCE (refmpn_sublsh2_nc);
1051
1052 p = ¶m[TYPE_SUBLSH_NC];
1053 COPY (TYPE_ADDLSH_NC);
1054 REFERENCE (refmpn_sublsh_nc);
1055
1056 p = ¶m[TYPE_RSBLSH1_NC];
1057 COPY (TYPE_RSBLSH1_N);
1058 p->carry = CARRY_BIT; /* FIXME */
1059 REFERENCE (refmpn_rsblsh1_nc);
1060
1061 p = ¶m[TYPE_RSBLSH2_NC];
1062 COPY (TYPE_RSBLSH2_N);
1063 p->carry = CARRY_4; /* FIXME */
1064 REFERENCE (refmpn_rsblsh2_nc);
1065
1066 p = ¶m[TYPE_RSBLSH_NC];
1067 COPY (TYPE_RSBLSH_N);
1068 p->carry = CARRY_BIT; /* FIXME */
1069 REFERENCE (refmpn_rsblsh_nc);
1070
1071
1072 p = ¶m[TYPE_MOD_1];
1073 p->retval = 1;
1074 p->src[0] = 1;
1075 p->size = SIZE_ALLOW_ZERO;
1076 p->divisor = DIVISOR_LIMB;
1077 REFERENCE (refmpn_mod_1);
1078
1079 p = ¶m[TYPE_MOD_1C];
1080 COPY (TYPE_MOD_1);
1081 p->carry = CARRY_DIVISOR;
1082 REFERENCE (refmpn_mod_1c);
1083
1084 p = ¶m[TYPE_DIVMOD_1];
1085 COPY (TYPE_MOD_1);
1086 p->dst[0] = 1;
1087 REFERENCE (refmpn_divmod_1);
1088
1089 p = ¶m[TYPE_DIVMOD_1C];
1090 COPY (TYPE_DIVMOD_1);
1091 p->carry = CARRY_DIVISOR;
1092 REFERENCE (refmpn_divmod_1c);
1093
1094 p = ¶m[TYPE_DIVREM_1];
1095 COPY (TYPE_DIVMOD_1);
1096 p->size2 = SIZE_FRACTION;
1097 p->dst_size[0] = SIZE_SUM;
1098 REFERENCE (refmpn_divrem_1);
1099
1100 p = ¶m[TYPE_DIVREM_1C];
1101 COPY (TYPE_DIVREM_1);
1102 p->carry = CARRY_DIVISOR;
1103 REFERENCE (refmpn_divrem_1c);
1104
1105 p = ¶m[TYPE_PREINV_DIVREM_1];
1106 COPY (TYPE_DIVREM_1);
1107 p->size = SIZE_YES; /* ie. no size==0 */
1108 REFERENCE (refmpn_preinv_divrem_1);
1109
1110 p = ¶m[TYPE_PREINV_MOD_1];
1111 p->retval = 1;
1112 p->src[0] = 1;
1113 p->divisor = DIVISOR_NORM;
1114 REFERENCE (refmpn_preinv_mod_1);
1115
1116 p = ¶m[TYPE_MOD_34LSUB1];
1117 p->retval = 1;
1118 p->src[0] = 1;
1119 VALIDATE (validate_mod_34lsub1);
1120
1121 p = ¶m[TYPE_UDIV_QRNND];
1122 p->retval = 1;
1123 p->src[0] = 1;
1124 p->dst[0] = 1;
1125 p->dst_size[0] = SIZE_1;
1126 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
1127 p->data = DATA_UDIV_QRNND;
1128 p->overlap = OVERLAP_NONE;
1129 REFERENCE (refmpn_udiv_qrnnd);
1130
1131 p = ¶m[TYPE_UDIV_QRNND_R];
1132 COPY (TYPE_UDIV_QRNND);
1133 REFERENCE (refmpn_udiv_qrnnd_r);
1134
1135
1136 p = ¶m[TYPE_DIVEXACT_1];
1137 p->dst[0] = 1;
1138 p->src[0] = 1;
1139 p->divisor = DIVISOR_LIMB;
1140 p->data = DATA_MULTIPLE_DIVISOR;
1141 VALIDATE (validate_divexact_1);
1142 REFERENCE (refmpn_divmod_1);
1143
1144 p = ¶m[TYPE_BDIV_Q_1];
1145 p->dst[0] = 1;
1146 p->src[0] = 1;
1147 p->divisor = DIVISOR_LIMB;
1148 VALIDATE (validate_bdiv_q_1);
1149
1150 p = ¶m[TYPE_DIVEXACT_BY3];
1151 p->retval = 1;
1152 p->dst[0] = 1;
1153 p->src[0] = 1;
1154 REFERENCE (refmpn_divexact_by3);
1155
1156 p = ¶m[TYPE_DIVEXACT_BY3C];
1157 COPY (TYPE_DIVEXACT_BY3);
1158 p->carry = CARRY_3;
1159 REFERENCE (refmpn_divexact_by3c);
1160
1161
1162 p = ¶m[TYPE_MODEXACT_1_ODD];
1163 p->retval = 1;
1164 p->src[0] = 1;
1165 p->divisor = DIVISOR_ODD;
1166 VALIDATE (validate_modexact_1_odd);
1167
1168 p = ¶m[TYPE_MODEXACT_1C_ODD];
1169 COPY (TYPE_MODEXACT_1_ODD);
1170 p->carry = CARRY_LIMB;
1171 VALIDATE (validate_modexact_1c_odd);
1172
1173
1174 p = ¶m[TYPE_GCD_1];
1175 p->retval = 1;
1176 p->src[0] = 1;
1177 p->data = DATA_NON_ZERO;
1178 p->divisor = DIVISOR_LIMB;
1179 REFERENCE (refmpn_gcd_1);
1180
1181 p = ¶m[TYPE_GCD];
1182 p->retval = 1;
1183 p->dst[0] = 1;
1184 p->src[0] = 1;
1185 p->src[1] = 1;
1186 p->size2 = 1;
1187 p->dst_size[0] = SIZE_RETVAL;
1188 p->overlap = OVERLAP_NOT_SRCS;
1189 p->data = DATA_GCD;
1190 REFERENCE (refmpn_gcd);
1191
1192
1193 p = ¶m[TYPE_MPZ_LEGENDRE];
1194 p->retval = 1;
1195 p->src[0] = 1;
1196 p->size = SIZE_ALLOW_ZERO;
1197 p->src[1] = 1;
1198 p->data = DATA_SRC1_ODD_PRIME;
1199 p->size2 = 1;
1200 p->carry = CARRY_BIT;
1201 p->carry_sign = 1;
1202 REFERENCE (refmpz_legendre);
1203
1204 p = ¶m[TYPE_MPZ_JACOBI];
1205 p->retval = 1;
1206 p->src[0] = 1;
1207 p->size = SIZE_ALLOW_ZERO;
1208 p->src[1] = 1;
1209 p->data = DATA_SRC1_ODD;
1210 p->size2 = 1;
1211 p->carry = CARRY_BIT;
1212 p->carry_sign = 1;
1213 REFERENCE (refmpz_jacobi);
1214
1215 p = ¶m[TYPE_MPZ_KRONECKER];
1216 p->retval = 1;
1217 p->src[0] = 1;
1218 p->size = SIZE_ALLOW_ZERO;
1219 p->src[1] = 1;
1220 p->data = 0;
1221 p->size2 = 1;
1222 p->carry = CARRY_4;
1223 p->carry_sign = 1;
1224 REFERENCE (refmpz_kronecker);
1225
1226
1227 p = ¶m[TYPE_MPZ_KRONECKER_UI];
1228 p->retval = 1;
1229 p->src[0] = 1;
1230 p->size = SIZE_ALLOW_ZERO;
1231 p->multiplier = 1;
1232 p->carry = CARRY_BIT;
1233 REFERENCE (refmpz_kronecker_ui);
1234
1235 p = ¶m[TYPE_MPZ_KRONECKER_SI];
1236 COPY (TYPE_MPZ_KRONECKER_UI);
1237 REFERENCE (refmpz_kronecker_si);
1238
1239 p = ¶m[TYPE_MPZ_UI_KRONECKER];
1240 COPY (TYPE_MPZ_KRONECKER_UI);
1241 REFERENCE (refmpz_ui_kronecker);
1242
1243 p = ¶m[TYPE_MPZ_SI_KRONECKER];
1244 COPY (TYPE_MPZ_KRONECKER_UI);
1245 REFERENCE (refmpz_si_kronecker);
1246
1247
1248 p = ¶m[TYPE_SQR];
1249 p->dst[0] = 1;
1250 p->src[0] = 1;
1251 p->dst_size[0] = SIZE_SUM;
1252 p->overlap = OVERLAP_NONE;
1253 REFERENCE (refmpn_sqr);
1254
1255 p = ¶m[TYPE_MUL_N];
1256 COPY (TYPE_SQR);
1257 p->src[1] = 1;
1258 REFERENCE (refmpn_mul_n);
1259
1260 p = ¶m[TYPE_MULLO_N];
1261 COPY (TYPE_MUL_N);
1262 p->dst_size[0] = 0;
1263 REFERENCE (refmpn_mullo_n);
1264
1265 p = ¶m[TYPE_MUL_MN];
1266 COPY (TYPE_MUL_N);
1267 p->size2 = 1;
1268 REFERENCE (refmpn_mul_basecase);
1269
1270 p = ¶m[TYPE_MULMID_MN];
1271 COPY (TYPE_MUL_MN);
1272 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1273 REFERENCE (refmpn_mulmid_basecase);
1274
1275 p = ¶m[TYPE_MULMID_N];
1276 COPY (TYPE_MUL_N);
1277 p->size = SIZE_ODD;
1278 p->size2 = SIZE_CEIL_HALF;
1279 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1280 REFERENCE (refmpn_mulmid_n);
1281
1282 p = ¶m[TYPE_UMUL_PPMM];
1283 p->retval = 1;
1284 p->src[0] = 1;
1285 p->dst[0] = 1;
1286 p->dst_size[0] = SIZE_1;
1287 p->overlap = OVERLAP_NONE;
1288 REFERENCE (refmpn_umul_ppmm);
1289
1290 p = ¶m[TYPE_UMUL_PPMM_R];
1291 COPY (TYPE_UMUL_PPMM);
1292 REFERENCE (refmpn_umul_ppmm_r);
1293
1294
1295 p = ¶m[TYPE_RSHIFT];
1296 p->retval = 1;
1297 p->dst[0] = 1;
1298 p->src[0] = 1;
1299 p->shift = 1;
1300 p->overlap = OVERLAP_LOW_TO_HIGH;
1301 REFERENCE (refmpn_rshift);
1302
1303 p = ¶m[TYPE_LSHIFT];
1304 COPY (TYPE_RSHIFT);
1305 p->overlap = OVERLAP_HIGH_TO_LOW;
1306 REFERENCE (refmpn_lshift);
1307
1308 p = ¶m[TYPE_LSHIFTC];
1309 COPY (TYPE_RSHIFT);
1310 p->overlap = OVERLAP_HIGH_TO_LOW;
1311 REFERENCE (refmpn_lshiftc);
1312
1313
1314 p = ¶m[TYPE_POPCOUNT];
1315 p->retval = 1;
1316 p->src[0] = 1;
1317 REFERENCE (refmpn_popcount);
1318
1319 p = ¶m[TYPE_HAMDIST];
1320 COPY (TYPE_POPCOUNT);
1321 p->src[1] = 1;
1322 REFERENCE (refmpn_hamdist);
1323
1324
1325 p = ¶m[TYPE_SBPI1_DIV_QR];
1326 p->retval = 1;
1327 p->dst[0] = 1;
1328 p->dst[1] = 1;
1329 p->src[0] = 1;
1330 p->src[1] = 1;
1331 p->data = DATA_SRC1_HIGHBIT;
1332 p->size2 = 1;
1333 p->dst_size[0] = SIZE_DIFF;
1334 p->overlap = OVERLAP_NONE;
1335 REFERENCE (refmpn_sb_div_qr);
1336
1337 p = ¶m[TYPE_TDIV_QR];
1338 p->dst[0] = 1;
1339 p->dst[1] = 1;
1340 p->src[0] = 1;
1341 p->src[1] = 1;
1342 p->size2 = 1;
1343 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1344 p->dst_size[1] = SIZE_SIZE2;
1345 p->overlap = OVERLAP_NONE;
1346 REFERENCE (refmpn_tdiv_qr);
1347
1348 p = ¶m[TYPE_SQRTREM];
1349 p->retval = 1;
1350 p->dst[0] = 1;
1351 p->dst[1] = 1;
1352 p->src[0] = 1;
1353 p->dst_size[0] = SIZE_CEIL_HALF;
1354 p->dst_size[1] = SIZE_RETVAL;
1355 p->overlap = OVERLAP_NONE;
1356 VALIDATE (validate_sqrtrem);
1357 REFERENCE (refmpn_sqrtrem);
1358
1359 p = ¶m[TYPE_ZERO];
1360 p->dst[0] = 1;
1361 p->size = SIZE_ALLOW_ZERO;
1362 REFERENCE (refmpn_zero);
1363
1364 p = ¶m[TYPE_GET_STR];
1365 p->retval = 1;
1366 p->src[0] = 1;
1367 p->size = SIZE_ALLOW_ZERO;
1368 p->dst[0] = 1;
1369 p->dst[1] = 1;
1370 p->dst_size[0] = SIZE_GET_STR;
1371 p->dst_bytes[0] = 1;
1372 p->overlap = OVERLAP_NONE;
1373 REFERENCE (refmpn_get_str);
1374
1375 p = ¶m[TYPE_BINVERT];
1376 p->dst[0] = 1;
1377 p->src[0] = 1;
1378 p->data = DATA_SRC0_ODD;
1379 p->overlap = OVERLAP_NONE;
1380 REFERENCE (refmpn_binvert);
1381
1382 p = ¶m[TYPE_INVERT];
1383 p->dst[0] = 1;
1384 p->src[0] = 1;
1385 p->data = DATA_SRC0_HIGHBIT;
1386 p->overlap = OVERLAP_NONE;
1387 REFERENCE (refmpn_invert);
1388
1389 #ifdef EXTRA_PARAM_INIT
1390 EXTRA_PARAM_INIT
1391 #endif
1392 }
1393
1394
1395 /* The following are macros if there's no native versions, so wrap them in
1396 functions that can be in try_array[]. */
1397
1398 void
1399 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1400 { MPN_COPY (rp, sp, size); }
1401
1402 void
1403 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1404 { MPN_COPY_INCR (rp, sp, size); }
1405
1406 void
1407 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1408 { MPN_COPY_DECR (rp, sp, size); }
1409
1410 void
1411 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1412 { __GMPN_COPY (rp, sp, size); }
1413
1414 #ifdef __GMPN_COPY_INCR
1415 void
1416 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1417 { __GMPN_COPY_INCR (rp, sp, size); }
1418 #endif
1419
1420 void
1421 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1422 { mpn_com (rp, sp, size); }
1423
1424 void
1425 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1426 { mpn_and_n (rp, s1, s2, size); }
1427
1428 void
1429 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1430 { mpn_andn_n (rp, s1, s2, size); }
1431
1432 void
1433 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1434 { mpn_nand_n (rp, s1, s2, size); }
1435
1436 void
1437 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1438 { mpn_ior_n (rp, s1, s2, size); }
1439
1440 void
1441 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1442 { mpn_iorn_n (rp, s1, s2, size); }
1443
1444 void
1445 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1446 { mpn_nior_n (rp, s1, s2, size); }
1447
1448 void
1449 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1450 { mpn_xor_n (rp, s1, s2, size); }
1451
1452 void
1453 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1454 { mpn_xnor_n (rp, s1, s2, size); }
1455
1456 mp_limb_t
1457 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1458 {
1459 mp_limb_t q;
1460 udiv_qrnnd (q, *remptr, n1, n0, d);
1461 return q;
1462 }
1463
1464 mp_limb_t
1465 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1466 {
1467 return mpn_divexact_by3 (rp, sp, size);
1468 }
1469
1470 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
1471 mp_limb_t
1472 mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1473 {
1474 return mpn_addlsh1_n_ip1 (rp, sp, size);
1475 }
1476 #endif
1477 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
1478 mp_limb_t
1479 mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1480 {
1481 return mpn_addlsh2_n_ip1 (rp, sp, size);
1482 }
1483 #endif
1484 #if HAVE_NATIVE_mpn_addlsh_n_ip1
1485 mp_limb_t
1486 mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1487 {
1488 return mpn_addlsh_n_ip1 (rp, sp, size, sh);
1489 }
1490 #endif
1491 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
1492 mp_limb_t
1493 mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1494 {
1495 return mpn_addlsh1_n_ip2 (rp, sp, size);
1496 }
1497 #endif
1498 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
1499 mp_limb_t
1500 mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1501 {
1502 return mpn_addlsh2_n_ip2 (rp, sp, size);
1503 }
1504 #endif
1505 #if HAVE_NATIVE_mpn_addlsh_n_ip2
1506 mp_limb_t
1507 mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1508 {
1509 return mpn_addlsh_n_ip2 (rp, sp, size, sh);
1510 }
1511 #endif
1512 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
1513 mp_limb_t
1514 mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1515 {
1516 return mpn_sublsh1_n_ip1 (rp, sp, size);
1517 }
1518 #endif
1519 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
1520 mp_limb_t
1521 mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1522 {
1523 return mpn_sublsh2_n_ip1 (rp, sp, size);
1524 }
1525 #endif
1526 #if HAVE_NATIVE_mpn_sublsh_n_ip1
1527 mp_limb_t
1528 mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1529 {
1530 return mpn_sublsh_n_ip1 (rp, sp, size, sh);
1531 }
1532 #endif
1533
1534 mp_limb_t
1535 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1536 {
1537 return mpn_modexact_1_odd (ptr, size, divisor);
1538 }
1539
1540 void
1541 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1542 {
1543 mp_ptr tspace;
1544 TMP_DECL;
1545 TMP_MARK;
1546 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1547 mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1548 TMP_FREE;
1549 }
1550 void
1551 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1552 {
1553 mp_ptr tspace;
1554 TMP_DECL;
1555 TMP_MARK;
1556 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1557 mpn_toom2_sqr (dst, src, size, tspace);
1558 TMP_FREE;
1559 }
1560 void
1561 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1562 {
1563 mp_ptr tspace;
1564 TMP_DECL;
1565 TMP_MARK;
1566 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1567 mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1568 TMP_FREE;
1569 }
1570 void
1571 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1572 {
1573 mp_ptr tspace;
1574 TMP_DECL;
1575 TMP_MARK;
1576 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1577 mpn_toom3_sqr (dst, src, size, tspace);
1578 TMP_FREE;
1579 }
1580 void
1581 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1582 {
1583 mp_ptr tspace;
1584 TMP_DECL;
1585 TMP_MARK;
1586 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1587 mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1588 TMP_FREE;
1589 }
1590 void
1591 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1592 {
1593 mp_ptr tspace;
1594 TMP_DECL;
1595 TMP_MARK;
1596 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1597 mpn_toom4_sqr (dst, src, size, tspace);
1598 TMP_FREE;
1599 }
1600
1601 void
1602 mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
1603 mp_size_t size)
1604 {
1605 mp_ptr tspace;
1606 mp_size_t n;
1607 TMP_DECL;
1608 TMP_MARK;
1609 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
1610 mpn_toom42_mulmid (dst, src1, src2, size, tspace);
1611 TMP_FREE;
1612 }
1613
1614 mp_limb_t
1615 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1616 {
1617 mp_limb_t high;
1618 umul_ppmm (high, *lowptr, m1, m2);
1619 return high;
1620 }
1621
1622 void
1623 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1624 { MPN_ZERO (ptr, size); }
1625
1626
1627 struct choice_t {
1628 const char *name;
1629 tryfun_t function;
1630 int type;
1631 mp_size_t minsize;
1632 };
1633
1634 #if HAVE_STRINGIZE
1635 #define TRY(fun) #fun, (tryfun_t) fun
1636 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1637 #else
1638 #define TRY(fun) "fun", (tryfun_t) fun
1639 #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1640 #endif
1641
1642 const struct choice_t choice_array[] = {
1643 { TRY(mpn_add), TYPE_ADD },
1644 { TRY(mpn_sub), TYPE_SUB },
1645
1646 { TRY(mpn_add_n), TYPE_ADD_N },
1647 { TRY(mpn_sub_n), TYPE_SUB_N },
1648
1649 #if HAVE_NATIVE_mpn_add_nc
1650 { TRY(mpn_add_nc), TYPE_ADD_NC },
1651 #endif
1652 #if HAVE_NATIVE_mpn_sub_nc
1653 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1654 #endif
1655
1656 #if HAVE_NATIVE_mpn_add_n_sub_n
1657 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1658 #endif
1659 #if HAVE_NATIVE_mpn_add_n_sub_nc
1660 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1661 #endif
1662
1663 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N },
1664 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N },
1665 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N },
1666 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N },
1667 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N },
1668 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N },
1669
1670 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1671 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1672 #if HAVE_NATIVE_mpn_addmul_1c
1673 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1674 #endif
1675 #if HAVE_NATIVE_mpn_submul_1c
1676 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1677 #endif
1678
1679 #if HAVE_NATIVE_mpn_addmul_2
1680 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1681 #endif
1682 #if HAVE_NATIVE_mpn_addmul_3
1683 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1684 #endif
1685 #if HAVE_NATIVE_mpn_addmul_4
1686 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1687 #endif
1688 #if HAVE_NATIVE_mpn_addmul_5
1689 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1690 #endif
1691 #if HAVE_NATIVE_mpn_addmul_6
1692 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1693 #endif
1694 #if HAVE_NATIVE_mpn_addmul_7
1695 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1696 #endif
1697 #if HAVE_NATIVE_mpn_addmul_8
1698 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1699 #endif
1700
1701 { TRY_FUNFUN(mpn_com), TYPE_COM },
1702
1703 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1704 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1705 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1706
1707 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1708 #ifdef __GMPN_COPY_INCR
1709 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1710 #endif
1711
1712 #if HAVE_NATIVE_mpn_copyi
1713 { TRY(mpn_copyi), TYPE_COPYI },
1714 #endif
1715 #if HAVE_NATIVE_mpn_copyd
1716 { TRY(mpn_copyd), TYPE_COPYD },
1717 #endif
1718
1719 { TRY(mpn_addcnd_n), TYPE_ADDCND_N },
1720 { TRY(mpn_subcnd_n), TYPE_SUBCND_N },
1721 #if HAVE_NATIVE_mpn_addlsh1_n
1722 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1723 #endif
1724 #if HAVE_NATIVE_mpn_addlsh2_n
1725 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1726 #endif
1727 #if HAVE_NATIVE_mpn_addlsh_n
1728 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1729 #endif
1730 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
1731 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
1732 #endif
1733 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
1734 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
1735 #endif
1736 #if HAVE_NATIVE_mpn_addlsh_n_ip1
1737 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
1738 #endif
1739 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
1740 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
1741 #endif
1742 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
1743 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
1744 #endif
1745 #if HAVE_NATIVE_mpn_addlsh_n_ip2
1746 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
1747 #endif
1748 #if HAVE_NATIVE_mpn_sublsh1_n
1749 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1750 #endif
1751 #if HAVE_NATIVE_mpn_sublsh2_n
1752 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
1753 #endif
1754 #if HAVE_NATIVE_mpn_sublsh_n
1755 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1756 #endif
1757 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
1758 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
1759 #endif
1760 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
1761 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
1762 #endif
1763 #if HAVE_NATIVE_mpn_sublsh_n_ip1
1764 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
1765 #endif
1766 #if HAVE_NATIVE_mpn_rsblsh1_n
1767 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1768 #endif
1769 #if HAVE_NATIVE_mpn_rsblsh2_n
1770 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1771 #endif
1772 #if HAVE_NATIVE_mpn_rsblsh_n
1773 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1774 #endif
1775 #if HAVE_NATIVE_mpn_rsh1add_n
1776 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1777 #endif
1778 #if HAVE_NATIVE_mpn_rsh1sub_n
1779 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1780 #endif
1781
1782 #if HAVE_NATIVE_mpn_addlsh1_nc
1783 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
1784 #endif
1785 #if HAVE_NATIVE_mpn_addlsh2_nc
1786 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
1787 #endif
1788 #if HAVE_NATIVE_mpn_addlsh_nc
1789 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
1790 #endif
1791 #if HAVE_NATIVE_mpn_sublsh1_nc
1792 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
1793 #endif
1794 #if HAVE_NATIVE_mpn_sublsh2_nc
1795 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
1796 #endif
1797 #if HAVE_NATIVE_mpn_sublsh_nc
1798 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
1799 #endif
1800 #if HAVE_NATIVE_mpn_rsblsh1_nc
1801 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
1802 #endif
1803 #if HAVE_NATIVE_mpn_rsblsh2_nc
1804 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
1805 #endif
1806 #if HAVE_NATIVE_mpn_rsblsh_nc
1807 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
1808 #endif
1809
1810 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1811 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1812 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1813 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1814 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1815 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1816 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1817 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1818
1819 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1820 #if USE_PREINV_DIVREM_1
1821 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1822 #endif
1823 { TRY(mpn_mod_1), TYPE_MOD_1 },
1824 #if USE_PREINV_MOD_1
1825 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1826 #endif
1827 #if HAVE_NATIVE_mpn_divrem_1c
1828 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1829 #endif
1830 #if HAVE_NATIVE_mpn_mod_1c
1831 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1832 #endif
1833 #if GMP_NUMB_BITS % 4 == 0
1834 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1835 #endif
1836
1837 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1838 #if HAVE_NATIVE_mpn_udiv_qrnnd
1839 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1840 #endif
1841 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1842 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1843 #endif
1844
1845 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1846 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 },
1847 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1848 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1849
1850 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1851 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1852
1853
1854 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1855 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1856
1857 { TRY(mpn_mul_1), TYPE_MUL_1 },
1858 #if HAVE_NATIVE_mpn_mul_1c
1859 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1860 #endif
1861 #if HAVE_NATIVE_mpn_mul_2
1862 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1863 #endif
1864 #if HAVE_NATIVE_mpn_mul_3
1865 { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1866 #endif
1867 #if HAVE_NATIVE_mpn_mul_4
1868 { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1869 #endif
1870 #if HAVE_NATIVE_mpn_mul_5
1871 { TRY(mpn_mul_5), TYPE_MUL_5, 5 },
1872 #endif
1873 #if HAVE_NATIVE_mpn_mul_6
1874 { TRY(mpn_mul_6), TYPE_MUL_6, 6 },
1875 #endif
1876
1877 { TRY(mpn_rshift), TYPE_RSHIFT },
1878 { TRY(mpn_lshift), TYPE_LSHIFT },
1879 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1880
1881
1882 { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1883 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
1884 { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1885 #if SQR_TOOM2_THRESHOLD > 0
1886 { TRY(mpn_sqr_basecase), TYPE_SQR },
1887 #endif
1888
1889 { TRY(mpn_mul), TYPE_MUL_MN },
1890 { TRY(mpn_mul_n), TYPE_MUL_N },
1891 { TRY(mpn_sqr), TYPE_SQR },
1892
1893 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1894 #if HAVE_NATIVE_mpn_umul_ppmm
1895 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1896 #endif
1897 #if HAVE_NATIVE_mpn_umul_ppmm_r
1898 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1899 #endif
1900
1901 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1902 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1903 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1904 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1905 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1906 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1907
1908 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 },
1909 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 },
1910 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N,
1911 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
1912
1913 { TRY(mpn_gcd_1), TYPE_GCD_1 },
1914 { TRY(mpn_gcd), TYPE_GCD },
1915 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE },
1916 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1917 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER },
1918 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1919 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1920 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1921 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1922
1923 { TRY(mpn_popcount), TYPE_POPCOUNT },
1924 { TRY(mpn_hamdist), TYPE_HAMDIST },
1925
1926 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
1927
1928 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1929
1930 { TRY(mpn_get_str), TYPE_GET_STR },
1931
1932 { TRY(mpn_binvert), TYPE_BINVERT },
1933 { TRY(mpn_invert), TYPE_INVERT },
1934
1935 #ifdef EXTRA_ROUTINES
1936 EXTRA_ROUTINES
1937 #endif
1938 };
1939
1940 const struct choice_t *choice = NULL;
1941
1942
1943 void
1944 mprotect_maybe (void *addr, size_t len, int prot)
1945 {
1946 if (!option_redzones)
1947 return;
1948
1949 #if HAVE_MPROTECT
1950 if (mprotect (addr, len, prot) != 0)
1951 {
1952 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1953 addr, (unsigned) len, prot, strerror (errno));
1954 exit (1);
1955 }
1956 #else
1957 {
1958 static int warned = 0;
1959 if (!warned)
1960 {
1961 fprintf (stderr,
1962 "mprotect not available, bounds testing not performed\n");
1963 warned = 1;
1964 }
1965 }
1966 #endif
1967 }
1968
1969 /* round "a" up to a multiple of "m" */
1970 size_t
1971 round_up_multiple (size_t a, size_t m)
1972 {
1973 unsigned long r;
1974
1975 r = a % m;
1976 if (r == 0)
1977 return a;
1978 else
1979 return a + (m - r);
1980 }
1981
1982
1983 /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1984 for instance HP-UX 10.
1985
1986 mmap will almost certainly return a pointer already aligned to a page
1987 boundary, but it's easy enough to share the alignment handling with the
1988 malloc case. */
1989
1990 void
1991 malloc_region (struct region_t *r, mp_size_t n)
1992 {
1993 mp_ptr p;
1994 size_t nbytes;
1995
1996 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1997
1998 n = round_up_multiple (n, PAGESIZE_LIMBS);
1999 r->size = n;
2000
2001 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
2002
2003 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
2004 #define MAP_ANON MAP_ANONYMOUS
2005 #endif
2006
2007 #if HAVE_MMAP && defined (MAP_ANON)
2008 /* note must pass fd=-1 for MAP_ANON on BSD */
2009 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
2010 if (p == (void *) -1)
2011 {
2012 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
2013 (unsigned) nbytes, strerror (errno));
2014 exit (1);
2015 }
2016 #else
2017 p = (mp_ptr) malloc (nbytes);
2018 ASSERT_ALWAYS (p != NULL);
2019 #endif
2020
2021 p = (mp_ptr) align_pointer (p, pagesize);
2022
2023 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
2024 p += REDZONE_LIMBS;
2025 r->ptr = p;
2026
2027 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
2028 }
2029
2030 void
2031 mprotect_region (const struct region_t *r, int prot)
2032 {
2033 mprotect_maybe (r->ptr, r->size, prot);
2034 }
2035
2036
2037 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
2038 and CARRY_4 */
2039 mp_limb_t carry_array[] = {
2040 0, 1, 2, 3,
2041 4,
2042 CNST_LIMB(1) << 8,
2043 CNST_LIMB(1) << 16,
2044 GMP_NUMB_MAX
2045 };
2046 int carry_index;
2047
2048 #define CARRY_COUNT \
2049 ((tr->carry == CARRY_BIT) ? 2 \
2050 : tr->carry == CARRY_3 ? 3 \
2051 : tr->carry == CARRY_4 ? 4 \
2052 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
2053 ? numberof(carry_array) + CARRY_RANDOMS \
2054 : 1)
2055
2056 #define MPN_RANDOM_ALT(index,dst,size) \
2057 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
2058
2059 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2060 the same type */
2061 #define CARRY_ITERATION \
2062 for (carry_index = 0; \
2063 (carry_index < numberof (carry_array) \
2064 ? (carry = carry_array[carry_index]) \
2065 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
2066 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
2067 carry_index < CARRY_COUNT; \
2068 carry_index++)
2069
2070
2071 mp_limb_t multiplier_array[] = {
2072 0, 1, 2, 3,
2073 CNST_LIMB(1) << 8,
2074 CNST_LIMB(1) << 16,
2075 GMP_NUMB_MAX - 2,
2076 GMP_NUMB_MAX - 1,
2077 GMP_NUMB_MAX
2078 };
2079 int multiplier_index;
2080
2081 mp_limb_t divisor_array[] = {
2082 1, 2, 3,
2083 CNST_LIMB(1) << 8,
2084 CNST_LIMB(1) << 16,
2085 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
2086 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
2087 GMP_NUMB_HIGHBIT,
2088 GMP_NUMB_HIGHBIT + 1,
2089 GMP_NUMB_MAX - 2,
2090 GMP_NUMB_MAX - 1,
2091 GMP_NUMB_MAX
2092 };
2093
2094 int divisor_index;
2095
2096 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2097 the same type */
2098 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
2099 for (index = 0; \
2100 (index < numberof (array) \
2101 ? (var = array[index]) \
2102 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
2103 index < limit; \
2104 index++)
2105
2106 #define MULTIPLIER_COUNT \
2107 (tr->multiplier \
2108 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
2109 : 1)
2110
2111 #define MULTIPLIER_ITERATION \
2112 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
2113 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
2114
2115 #define DIVISOR_COUNT \
2116 (tr->divisor \
2117 ? numberof (divisor_array) + DIVISOR_RANDOMS \
2118 : 1)
2119
2120 #define DIVISOR_ITERATION \
2121 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
2122 DIVISOR_RANDOMS, TRY_DIVISOR)
2123
2124
2125 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
2126 d[0] or d[1] respectively, -1 means a separate (write-protected)
2127 location. */
2128
2129 struct overlap_t {
2130 int s[NUM_SOURCES];
2131 } overlap_array[] = {
2132 { { -1, -1, -1, -1, -1 } },
2133 { { 0, -1, -1, -1, -1 } },
2134 { { -1, 0, -1, -1, -1 } },
2135 { { 0, 0, -1, -1, -1 } },
2136 { { 1, -1, -1, -1, -1 } },
2137 { { -1, 1, -1, -1, -1 } },
2138 { { 1, 1, -1, -1, -1 } },
2139 { { 0, 1, -1, -1, -1 } },
2140 { { 1, 0, -1, -1, -1 } },
2141 };
2142
2143 struct overlap_t *overlap, *overlap_limit;
2144
2145 #define OVERLAP_COUNT \
2146 (tr->overlap & OVERLAP_NONE ? 1 \
2147 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
2148 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
2149 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \
2150 : tr->dst[1] ? 9 \
2151 : tr->src[1] ? 4 \
2152 : tr->dst[0] ? 2 \
2153 : 1)
2154
2155 #define OVERLAP_ITERATION \
2156 for (overlap = &overlap_array[0], \
2157 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
2158 overlap < overlap_limit; \
2159 overlap++)
2160
2161
2162 int base = 10;
2163
2164 #define T_RAND_COUNT 2
2165 int t_rand;
2166
2167 void
2168 t_random (mp_ptr ptr, mp_size_t n)
2169 {
2170 if (n == 0)
2171 return;
2172
2173 switch (option_data) {
2174 case DATA_TRAND:
2175 switch (t_rand) {
2176 case 0: refmpn_random (ptr, n); break;
2177 case 1: refmpn_random2 (ptr, n); break;
2178 default: abort();
2179 }
2180 break;
2181 case DATA_SEQ:
2182 {
2183 static mp_limb_t counter = 0;
2184 mp_size_t i;
2185 for (i = 0; i < n; i++)
2186 ptr[i] = ++counter;
2187 }
2188 break;
2189 case DATA_ZEROS:
2190 refmpn_zero (ptr, n);
2191 break;
2192 case DATA_FFS:
2193 refmpn_fill (ptr, n, GMP_NUMB_MAX);
2194 break;
2195 case DATA_2FD:
2196 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
2197 inducing the q1_ff special case in the mul-by-inverse part of some
2198 versions of divrem_1 and mod_1. */
2199 refmpn_fill (ptr, n, (mp_limb_t) -1);
2200 ptr[n-1] = 2;
2201 ptr[0] -= 2;
2202 break;
2203
2204 default:
2205 abort();
2206 }
2207 }
2208 #define T_RAND_ITERATION \
2209 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
2210
2211
2212 void
2213 print_each (const struct each_t *e)
2214 {
2215 int i;
2216
2217 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
2218 if (tr->retval)
2219 mpn_trace (" retval", &e->retval, 1);
2220
2221 for (i = 0; i < NUM_DESTS; i++)
2222 {
2223 if (tr->dst[i])
2224 {
2225 if (tr->dst_bytes[i])
2226 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2227 else
2228 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2229 printf (" located %p\n", (void *) (e->d[i].p));
2230 }
2231 }
2232
2233 for (i = 0; i < NUM_SOURCES; i++)
2234 if (tr->src[i])
2235 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
2236 }
2237
2238
2239 void
2240 print_all (void)
2241 {
2242 int i;
2243
2244 printf ("\n");
2245 printf ("size %ld\n", (long) size);
2246 if (tr->size2)
2247 printf ("size2 %ld\n", (long) size2);
2248
2249 for (i = 0; i < NUM_DESTS; i++)
2250 if (d[i].size != size)
2251 printf ("d[%d].size %ld\n", i, (long) d[i].size);
2252
2253 if (tr->multiplier)
2254 mpn_trace (" multiplier", &multiplier, 1);
2255 if (tr->divisor)
2256 mpn_trace (" divisor", &divisor, 1);
2257 if (tr->shift)
2258 printf (" shift %lu\n", shift);
2259 if (tr->carry)
2260 mpn_trace (" carry", &carry, 1);
2261 if (tr->msize)
2262 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
2263
2264 for (i = 0; i < NUM_DESTS; i++)
2265 if (tr->dst[i])
2266 printf (" d[%d] %s, align %ld, size %ld\n",
2267 i, d[i].high ? "high" : "low",
2268 (long) d[i].align, (long) d[i].size);
2269
2270 for (i = 0; i < NUM_SOURCES; i++)
2271 {
2272 if (tr->src[i])
2273 {
2274 printf (" s[%d] %s, align %ld, ",
2275 i, s[i].high ? "high" : "low", (long) s[i].align);
2276 switch (overlap->s[i]) {
2277 case -1:
2278 printf ("no overlap\n");
2279 break;
2280 default:
2281 printf ("==d[%d]%s\n",
2282 overlap->s[i],
2283 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
2284 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
2285 : "");
2286 break;
2287 }
2288 printf (" s[%d]=", i);
2289 if (tr->carry_sign && (carry & (1 << i)))
2290 printf ("-");
2291 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
2292 }
2293 }
2294
2295 if (tr->dst0_from_src1)
2296 mpn_trace (" d[0]", s[1].region.ptr, size);
2297
2298 if (tr->reference)
2299 print_each (&ref);
2300 print_each (&fun);
2301 }
2302
2303 void
2304 compare (void)
2305 {
2306 int error = 0;
2307 int i;
2308
2309 if (tr->retval && ref.retval != fun.retval)
2310 {
2311 gmp_printf ("Different return values (%Mu, %Mu)\n",
2312 ref.retval, fun.retval);
2313 error = 1;
2314 }
2315
2316 for (i = 0; i < NUM_DESTS; i++)
2317 {
2318 switch (tr->dst_size[i]) {
2319 case SIZE_RETVAL:
2320 case SIZE_GET_STR:
2321 d[i].size = ref.retval;
2322 break;
2323 }
2324 }
2325
2326 for (i = 0; i < NUM_DESTS; i++)
2327 {
2328 if (! tr->dst[i])
2329 continue;
2330
2331 if (tr->dst_bytes[i])
2332 {
2333 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2334 {
2335 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2336 i,
2337 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2338 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2339 error = 1;
2340 }
2341 }
2342 else
2343 {
2344 if (d[i].size != 0
2345 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2346 {
2347 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2348 i,
2349 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2350 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2351 error = 1;
2352 }
2353 }
2354 }
2355
2356 if (error)
2357 {
2358 print_all();
2359 abort();
2360 }
2361 }
2362
2363
2364 /* The functions are cast if the return value should be a long rather than
2365 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2366 might not be enough if some actual calling conventions checking is
2367 implemented on a long long limb system. */
2368
2369 void
2370 call (struct each_t *e, tryfun_t function)
2371 {
2372 switch (choice->type) {
2373 case TYPE_ADD:
2374 case TYPE_SUB:
2375 e->retval = CALLING_CONVENTIONS (function)
2376 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2377 break;
2378
2379 case TYPE_ADD_N:
2380 case TYPE_SUB_N:
2381 case TYPE_ADDLSH1_N:
2382 case TYPE_ADDLSH2_N:
2383 case TYPE_SUBLSH1_N:
2384 case TYPE_SUBLSH2_N:
2385 case TYPE_RSBLSH1_N:
2386 case TYPE_RSBLSH2_N:
2387 case TYPE_RSH1ADD_N:
2388 case TYPE_RSH1SUB_N:
2389 e->retval = CALLING_CONVENTIONS (function)
2390 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2391 break;
2392 case TYPE_ADDLSH_N:
2393 case TYPE_SUBLSH_N:
2394 case TYPE_RSBLSH_N:
2395 e->retval = CALLING_CONVENTIONS (function)
2396 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2397 break;
2398 case TYPE_ADDLSH_NC:
2399 case TYPE_SUBLSH_NC:
2400 case TYPE_RSBLSH_NC:
2401 e->retval = CALLING_CONVENTIONS (function)
2402 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
2403 break;
2404 case TYPE_ADDLSH1_NC:
2405 case TYPE_ADDLSH2_NC:
2406 case TYPE_SUBLSH1_NC:
2407 case TYPE_SUBLSH2_NC:
2408 case TYPE_RSBLSH1_NC:
2409 case TYPE_RSBLSH2_NC:
2410 case TYPE_ADD_NC:
2411 case TYPE_SUB_NC:
2412 case TYPE_ADDCND_N:
2413 case TYPE_SUBCND_N:
2414 e->retval = CALLING_CONVENTIONS (function)
2415 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2416 break;
2417 case TYPE_ADD_ERR1_N:
2418 case TYPE_SUB_ERR1_N:
2419 e->retval = CALLING_CONVENTIONS (function)
2420 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
2421 break;
2422 case TYPE_ADD_ERR2_N:
2423 case TYPE_SUB_ERR2_N:
2424 e->retval = CALLING_CONVENTIONS (function)
2425 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
2426 break;
2427 case TYPE_ADD_ERR3_N:
2428 case TYPE_SUB_ERR3_N:
2429 e->retval = CALLING_CONVENTIONS (function)
2430 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
2431 break;
2432
2433 case TYPE_MUL_1:
2434 case TYPE_ADDMUL_1:
2435 case TYPE_SUBMUL_1:
2436 e->retval = CALLING_CONVENTIONS (function)
2437 (e->d[0].p, e->s[0].p, size, multiplier);
2438 break;
2439 case TYPE_MUL_1C:
2440 case TYPE_ADDMUL_1C:
2441 case TYPE_SUBMUL_1C:
2442 e->retval = CALLING_CONVENTIONS (function)
2443 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2444 break;
2445
2446 case TYPE_MUL_2:
2447 case TYPE_MUL_3:
2448 case TYPE_MUL_4:
2449 case TYPE_MUL_5:
2450 case TYPE_MUL_6:
2451 if (size == 1)
2452 abort ();
2453 e->retval = CALLING_CONVENTIONS (function)
2454 (e->d[0].p, e->s[0].p, size, multiplier_N);
2455 break;
2456
2457 case TYPE_ADDMUL_2:
2458 case TYPE_ADDMUL_3:
2459 case TYPE_ADDMUL_4:
2460 case TYPE_ADDMUL_5:
2461 case TYPE_ADDMUL_6:
2462 case TYPE_ADDMUL_7:
2463 case TYPE_ADDMUL_8:
2464 if (size == 1)
2465 abort ();
2466 e->retval = CALLING_CONVENTIONS (function)
2467 (e->d[0].p, e->s[0].p, size, multiplier_N);
2468 break;
2469
2470 case TYPE_AND_N:
2471 case TYPE_ANDN_N:
2472 case TYPE_NAND_N:
2473 case TYPE_IOR_N:
2474 case TYPE_IORN_N:
2475 case TYPE_NIOR_N:
2476 case TYPE_XOR_N:
2477 case TYPE_XNOR_N:
2478 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2479 break;
2480
2481 case TYPE_ADDSUB_N:
2482 e->retval = CALLING_CONVENTIONS (function)
2483 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2484 break;
2485 case TYPE_ADDSUB_NC:
2486 e->retval = CALLING_CONVENTIONS (function)
2487 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2488 break;
2489
2490 case TYPE_COPY:
2491 case TYPE_COPYI:
2492 case TYPE_COPYD:
2493 case TYPE_COM:
2494 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2495 break;
2496
2497 case TYPE_ADDLSH1_N_IP1:
2498 case TYPE_ADDLSH2_N_IP1:
2499 case TYPE_ADDLSH1_N_IP2:
2500 case TYPE_ADDLSH2_N_IP2:
2501 case TYPE_SUBLSH1_N_IP1:
2502 case TYPE_SUBLSH2_N_IP1:
2503 case TYPE_DIVEXACT_BY3:
2504 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2505 break;
2506 case TYPE_DIVEXACT_BY3C:
2507 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2508 carry);
2509 break;
2510
2511
2512 case TYPE_DIVMOD_1:
2513 case TYPE_DIVEXACT_1:
2514 case TYPE_BDIV_Q_1:
2515 e->retval = CALLING_CONVENTIONS (function)
2516 (e->d[0].p, e->s[0].p, size, divisor);
2517 break;
2518 case TYPE_DIVMOD_1C:
2519 e->retval = CALLING_CONVENTIONS (function)
2520 (e->d[0].p, e->s[0].p, size, divisor, carry);
2521 break;
2522 case TYPE_DIVREM_1:
2523 e->retval = CALLING_CONVENTIONS (function)
2524 (e->d[0].p, size2, e->s[0].p, size, divisor);
2525 break;
2526 case TYPE_DIVREM_1C:
2527 e->retval = CALLING_CONVENTIONS (function)
2528 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2529 break;
2530 case TYPE_PREINV_DIVREM_1:
2531 {
2532 mp_limb_t dinv;
2533 unsigned shift;
2534 shift = refmpn_count_leading_zeros (divisor);
2535 dinv = refmpn_invert_limb (divisor << shift);
2536 e->retval = CALLING_CONVENTIONS (function)
2537 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2538 }
2539 break;
2540 case TYPE_MOD_1:
2541 case TYPE_MODEXACT_1_ODD:
2542 e->retval = CALLING_CONVENTIONS (function)
2543 (e->s[0].p, size, divisor);
2544 break;
2545 case TYPE_MOD_1C:
2546 case TYPE_MODEXACT_1C_ODD:
2547 e->retval = CALLING_CONVENTIONS (function)
2548 (e->s[0].p, size, divisor, carry);
2549 break;
2550 case TYPE_PREINV_MOD_1:
2551 e->retval = CALLING_CONVENTIONS (function)
2552 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2553 break;
2554 case TYPE_MOD_34LSUB1:
2555 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2556 break;
2557
2558 case TYPE_UDIV_QRNND:
2559 e->retval = CALLING_CONVENTIONS (function)
2560 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2561 break;
2562 case TYPE_UDIV_QRNND_R:
2563 e->retval = CALLING_CONVENTIONS (function)
2564 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2565 break;
2566
2567 case TYPE_SBPI1_DIV_QR:
2568 {
2569 gmp_pi1_t dinv;
2570 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2571 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2572 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2573 e->retval = CALLING_CONVENTIONS (function)
2574 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2575 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2576 }
2577 break;
2578
2579 case TYPE_TDIV_QR:
2580 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2581 e->s[0].p, size, e->s[1].p, size2);
2582 break;
2583
2584 case TYPE_GCD_1:
2585 /* Must have a non-zero src, but this probably isn't the best way to do
2586 it. */
2587 if (refmpn_zero_p (e->s[0].p, size))
2588 e->retval = 0;
2589 else
2590 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2591 break;
2592
2593 case TYPE_GCD:
2594 /* Sources are destroyed, so they're saved and replaced, but a general
2595 approach to this might be better. Note that it's still e->s[0].p and
2596 e->s[1].p that are passed, to get the desired alignments. */
2597 {
2598 mp_ptr s0 = refmpn_malloc_limbs (size);
2599 mp_ptr s1 = refmpn_malloc_limbs (size2);
2600 refmpn_copyi (s0, e->s[0].p, size);
2601 refmpn_copyi (s1, e->s[1].p, size2);
2602
2603 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2604 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2605 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2606 e->s[0].p, size,
2607 e->s[1].p, size2);
2608 refmpn_copyi (e->s[0].p, s0, size);
2609 refmpn_copyi (e->s[1].p, s1, size2);
2610 free (s0);
2611 free (s1);
2612 }
2613 break;
2614
2615 case TYPE_GCD_FINDA:
2616 {
2617 /* FIXME: do this with a flag */
2618 mp_limb_t c[2];
2619 c[0] = e->s[0].p[0];
2620 c[0] += (c[0] == 0);
2621 c[1] = e->s[0].p[0];
2622 c[1] += (c[1] == 0);
2623 e->retval = CALLING_CONVENTIONS (function) (c);
2624 }
2625 break;
2626
2627 case TYPE_MPZ_LEGENDRE:
2628 case TYPE_MPZ_JACOBI:
2629 {
2630 mpz_t a, b;
2631 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2632 PTR(b) = e->s[1].p; SIZ(b) = size2;
2633 e->retval = CALLING_CONVENTIONS (function) (a, b);
2634 }
2635 break;
2636 case TYPE_MPZ_KRONECKER:
2637 {
2638 mpz_t a, b;
2639 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2640 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2641 e->retval = CALLING_CONVENTIONS (function) (a, b);
2642 }
2643 break;
2644 case TYPE_MPZ_KRONECKER_UI:
2645 {
2646 mpz_t a;
2647 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2648 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2649 }
2650 break;
2651 case TYPE_MPZ_KRONECKER_SI:
2652 {
2653 mpz_t a;
2654 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2655 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2656 }
2657 break;
2658 case TYPE_MPZ_UI_KRONECKER:
2659 {
2660 mpz_t b;
2661 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2662 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2663 }
2664 break;
2665 case TYPE_MPZ_SI_KRONECKER:
2666 {
2667 mpz_t b;
2668 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2669 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2670 }
2671 break;
2672
2673 case TYPE_MUL_MN:
2674 case TYPE_MULMID_MN:
2675 CALLING_CONVENTIONS (function)
2676 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2677 break;
2678 case TYPE_MUL_N:
2679 case TYPE_MULLO_N:
2680 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2681 break;
2682 case TYPE_MULMID_N:
2683 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
2684 (size + 1) / 2);
2685 break;
2686 case TYPE_SQR:
2687 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2688 break;
2689
2690 case TYPE_UMUL_PPMM:
2691 e->retval = CALLING_CONVENTIONS (function)
2692 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2693 break;
2694 case TYPE_UMUL_PPMM_R:
2695 e->retval = CALLING_CONVENTIONS (function)
2696 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2697 break;
2698
2699 case TYPE_ADDLSH_N_IP1:
2700 case TYPE_ADDLSH_N_IP2:
2701 case TYPE_SUBLSH_N_IP1:
2702 case TYPE_LSHIFT:
2703 case TYPE_LSHIFTC:
2704 case TYPE_RSHIFT:
2705 e->retval = CALLING_CONVENTIONS (function)
2706 (e->d[0].p, e->s[0].p, size, shift);
2707 break;
2708
2709 case TYPE_POPCOUNT:
2710 e->retval = (* (unsigned long (*)(ANYARGS))
2711 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2712 break;
2713 case TYPE_HAMDIST:
2714 e->retval = (* (unsigned long (*)(ANYARGS))
2715 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2716 break;
2717
2718 case TYPE_SQRTREM:
2719 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2720 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2721 break;
2722
2723 case TYPE_ZERO:
2724 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2725 break;
2726
2727 case TYPE_GET_STR:
2728 {
2729 size_t sizeinbase, fill;
2730 char *dst;
2731 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2732 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2733 fill = d[0].size - sizeinbase;
2734 if (d[0].high)
2735 {
2736 memset (e->d[0].p, 0xBA, fill);
2737 dst = (char *) e->d[0].p + fill;
2738 }
2739 else
2740 {
2741 dst = (char *) e->d[0].p;
2742 memset (dst + sizeinbase, 0xBA, fill);
2743 }
2744 if (POW2_P (base))
2745 {
2746 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2747 e->s[0].p, size);
2748 }
2749 else
2750 {
2751 refmpn_copy (e->d[1].p, e->s[0].p, size);
2752 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2753 e->d[1].p, size);
2754 }
2755 refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2756 }
2757 break;
2758
2759 case TYPE_INVERT:
2760 {
2761 mp_ptr scratch;
2762 TMP_DECL;
2763 TMP_MARK;
2764 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2765 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2766 TMP_FREE;
2767 }
2768 break;
2769 case TYPE_BINVERT:
2770 {
2771 mp_ptr scratch;
2772 TMP_DECL;
2773 TMP_MARK;
2774 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2775 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2776 TMP_FREE;
2777 }
2778 break;
2779
2780 #ifdef EXTRA_CALL
2781 EXTRA_CALL
2782 #endif
2783
2784 default:
2785 printf ("Unknown routine type %d\n", choice->type);
2786 abort ();
2787 break;
2788 }
2789 }
2790
2791
2792 void
2793 pointer_setup (struct each_t *e)
2794 {
2795 int i, j;
2796
2797 for (i = 0; i < NUM_DESTS; i++)
2798 {
2799 switch (tr->dst_size[i]) {
2800 case 0:
2801 case SIZE_RETVAL: /* will be adjusted later */
2802 d[i].size = size;
2803 break;
2804
2805 case SIZE_1:
2806 d[i].size = 1;
2807 break;
2808 case SIZE_2:
2809 d[i].size = 2;
2810 break;
2811 case SIZE_3:
2812 d[i].size = 3;
2813 break;
2814 case SIZE_4:
2815 d[i].size = 4;
2816 break;
2817 case SIZE_6:
2818 d[i].size = 6;
2819 break;
2820
2821 case SIZE_PLUS_1:
2822 d[i].size = size+1;
2823 break;
2824 case SIZE_PLUS_MSIZE_SUB_1:
2825 d[i].size = size + tr->msize - 1;
2826 break;
2827
2828 case SIZE_SUM:
2829 if (tr->size2)
2830 d[i].size = size + size2;
2831 else
2832 d[i].size = 2*size;
2833 break;
2834
2835 case SIZE_SIZE2:
2836 d[i].size = size2;
2837 break;
2838
2839 case SIZE_DIFF:
2840 d[i].size = size - size2;
2841 break;
2842
2843 case SIZE_DIFF_PLUS_1:
2844 d[i].size = size - size2 + 1;
2845 break;
2846
2847 case SIZE_DIFF_PLUS_3:
2848 d[i].size = size - size2 + 3;
2849 break;
2850
2851 case SIZE_CEIL_HALF:
2852 d[i].size = (size+1)/2;
2853 break;
2854
2855 case SIZE_GET_STR:
2856 {
2857 mp_limb_t ff = GMP_NUMB_MAX;
2858 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2859 }
2860 break;
2861
2862 default:
2863 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2864 abort ();
2865 }
2866 }
2867
2868 /* establish e->d[].p destinations */
2869 for (i = 0; i < NUM_DESTS; i++)
2870 {
2871 mp_size_t offset = 0;
2872
2873 /* possible room for overlapping sources */
2874 for (j = 0; j < numberof (overlap->s); j++)
2875 if (overlap->s[j] == i)
2876 offset = MAX (offset, s[j].align);
2877
2878 if (d[i].high)
2879 {
2880 if (tr->dst_bytes[i])
2881 {
2882 e->d[i].p = (mp_ptr)
2883 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2884 - d[i].size - d[i].align);
2885 }
2886 else
2887 {
2888 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2889 - d[i].size - d[i].align;
2890 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2891 e->d[i].p -= offset;
2892 }
2893 }
2894 else
2895 {
2896 if (tr->dst_bytes[i])
2897 {
2898 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2899 }
2900 else
2901 {
2902 e->d[i].p = e->d[i].region.ptr + d[i].align;
2903 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2904 e->d[i].p += offset;
2905 }
2906 }
2907 }
2908
2909 /* establish e->s[].p sources */
2910 for (i = 0; i < NUM_SOURCES; i++)
2911 {
2912 int o = overlap->s[i];
2913 switch (o) {
2914 case -1:
2915 /* no overlap */
2916 e->s[i].p = s[i].p;
2917 break;
2918 case 0:
2919 case 1:
2920 /* overlap with d[o] */
2921 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2922 e->s[i].p = e->d[o].p - s[i].align;
2923 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2924 e->s[i].p = e->d[o].p + s[i].align;
2925 else if (tr->size2 == SIZE_FRACTION)
2926 e->s[i].p = e->d[o].p + size2;
2927 else
2928 e->s[i].p = e->d[o].p;
2929 break;
2930 default:
2931 abort();
2932 break;
2933 }
2934 }
2935 }
2936
2937
2938 void
2939 validate_fail (void)
2940 {
2941 if (tr->reference)
2942 {
2943 trap_location = TRAP_REF;
2944 call (&ref, tr->reference);
2945 trap_location = TRAP_NOWHERE;
2946 }
2947
2948 print_all();
2949 abort();
2950 }
2951
2952
2953 void
2954 try_one (void)
2955 {
2956 int i;
2957
2958 if (option_spinner)
2959 spinner();
2960 spinner_count++;
2961
2962 trap_location = TRAP_SETUPS;
2963
2964 if (tr->divisor == DIVISOR_NORM)
2965 divisor |= GMP_NUMB_HIGHBIT;
2966 if (tr->divisor == DIVISOR_ODD)
2967 divisor |= 1;
2968
2969 for (i = 0; i < NUM_SOURCES; i++)
2970 {
2971 if (s[i].high)
2972 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2973 else
2974 s[i].p = s[i].region.ptr + s[i].align;
2975 }
2976
2977 pointer_setup (&ref);
2978 pointer_setup (&fun);
2979
2980 ref.retval = 0x04152637;
2981 fun.retval = 0x8C9DAEBF;
2982
2983 t_random (multiplier_N, tr->msize);
2984
2985 for (i = 0; i < NUM_SOURCES; i++)
2986 {
2987 if (! tr->src[i])
2988 continue;
2989
2990 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2991 t_random (s[i].p, SRC_SIZE(i));
2992
2993 switch (tr->data) {
2994 case DATA_NON_ZERO:
2995 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2996 s[i].p[0] = 1;
2997 break;
2998
2999 case DATA_MULTIPLE_DIVISOR:
3000 /* same number of low zero bits as divisor */
3001 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
3002 refmpn_sub_1 (s[i].p, s[i].p, size,
3003 refmpn_mod_1 (s[i].p, size, divisor));
3004 break;
3005
3006 case DATA_GCD:
3007 /* s[1] no more bits than s[0] */
3008 if (i == 1 && size2 == size)
3009 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
3010
3011 /* high limb non-zero */
3012 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
3013
3014 /* odd */
3015 s[i].p[0] |= 1;
3016 break;
3017
3018 case DATA_SRC0_ODD:
3019 if (i == 0)
3020 s[i].p[0] |= 1;
3021 break;
3022
3023 case DATA_SRC1_ODD:
3024 if (i == 1)
3025 s[i].p[0] |= 1;
3026 break;
3027
3028 case DATA_SRC1_ODD_PRIME:
3029 if (i == 1)
3030 {
3031 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
3032 && s[i].p[0] <=3)
3033 s[i].p[0] = 3;
3034 else
3035 {
3036 mpz_t p;
3037 mpz_init (p);
3038 for (;;)
3039 {
3040 _mpz_realloc (p, SRC_SIZE(i));
3041 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
3042 SIZ(p) = SRC_SIZE(i);
3043 MPN_NORMALIZE (PTR(p), SIZ(p));
3044 mpz_nextprime (p, p);
3045 if (mpz_size (p) <= SRC_SIZE(i))
3046 break;
3047
3048 t_random (s[i].p, SRC_SIZE(i));
3049 }
3050 MPN_COPY (s[i].p, PTR(p), SIZ(p));
3051 if (SIZ(p) < SRC_SIZE(i))
3052 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
3053 mpz_clear (p);
3054 }
3055 }
3056 break;
3057
3058 case DATA_SRC1_HIGHBIT:
3059 if (i == 1)
3060 {
3061 if (tr->size2)
3062 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
3063 else
3064 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3065 }
3066 break;
3067
3068 case DATA_SRC0_HIGHBIT:
3069 if (i == 0)
3070 {
3071 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3072 }
3073 break;
3074
3075 case DATA_UDIV_QRNND:
3076 s[i].p[1] %= divisor;
3077 break;
3078 }
3079
3080 mprotect_region (&s[i].region, PROT_READ);
3081 }
3082
3083 for (i = 0; i < NUM_DESTS; i++)
3084 {
3085 if (! tr->dst[i])
3086 continue;
3087
3088 if (tr->dst0_from_src1 && i==0)
3089 {
3090 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
3091 mp_size_t fill = MAX (0, d[0].size - copy);
3092 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
3093 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
3094 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
3095 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
3096 }
3097 else if (tr->dst_bytes[i])
3098 {
3099 memset (ref.d[i].p, 0xBA, d[i].size);
3100 memset (fun.d[i].p, 0xBA, d[i].size);
3101 }
3102 else
3103 {
3104 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
3105 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
3106 }
3107 }
3108
3109 for (i = 0; i < NUM_SOURCES; i++)
3110 {
3111 if (! tr->src[i])
3112 continue;
3113
3114 if (ref.s[i].p != s[i].p)
3115 {
3116 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
3117 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
3118 }
3119 }
3120
3121 if (option_print)
3122 print_all();
3123
3124 if (tr->validate != NULL)
3125 {
3126 trap_location = TRAP_FUN;
3127 call (&fun, choice->function);
3128 trap_location = TRAP_NOWHERE;
3129
3130 if (! CALLING_CONVENTIONS_CHECK ())
3131 {
3132 print_all();
3133 abort();
3134 }
3135
3136 (*tr->validate) ();
3137 }
3138 else
3139 {
3140 trap_location = TRAP_REF;
3141 call (&ref, tr->reference);
3142 trap_location = TRAP_FUN;
3143 call (&fun, choice->function);
3144 trap_location = TRAP_NOWHERE;
3145
3146 if (! CALLING_CONVENTIONS_CHECK ())
3147 {
3148 print_all();
3149 abort();
3150 }
3151
3152 compare ();
3153 }
3154 }
3155
3156
3157 #define SIZE_ITERATION \
3158 for (size = MAX3 (option_firstsize, \
3159 choice->minsize, \
3160 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \
3161 size += (tr->size == SIZE_ODD) && !(size & 1); \
3162 size <= option_lastsize; \
3163 size += (tr->size == SIZE_ODD) ? 2 : 1)
3164
3165 #define SIZE2_FIRST \
3166 (tr->size2 == SIZE_2 ? 2 \
3167 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
3168 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3169 : tr->size2 ? \
3170 MAX (choice->minsize, (option_firstsize2 != 0 \
3171 ? option_firstsize2 : 1)) \
3172 : 0)
3173
3174 #define SIZE2_LAST \
3175 (tr->size2 == SIZE_2 ? 2 \
3176 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
3177 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3178 : tr->size2 ? size \
3179 : 0)
3180
3181 #define SIZE2_ITERATION \
3182 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
3183
3184 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
3185 #define ALIGN_ITERATION(w,n,cond) \
3186 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
3187
3188 #define HIGH_LIMIT(cond) ((cond) != 0)
3189 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
3190 #define HIGH_ITERATION(w,n,cond) \
3191 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
3192
3193 #define SHIFT_LIMIT \
3194 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
3195
3196 #define SHIFT_ITERATION \
3197 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
3198
3199
3200 void
3201 try_many (void)
3202 {
3203 int i;
3204
3205 {
3206 unsigned long total = 1;
3207
3208 total *= option_repetitions;
3209 total *= option_lastsize;
3210 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
3211 else if (tr->size2) total *= (option_lastsize+1)/2;
3212
3213 total *= SHIFT_LIMIT;
3214 total *= MULTIPLIER_COUNT;
3215 total *= DIVISOR_COUNT;
3216 total *= CARRY_COUNT;
3217 total *= T_RAND_COUNT;
3218
3219 total *= HIGH_COUNT (tr->dst[0]);
3220 total *= HIGH_COUNT (tr->dst[1]);
3221 total *= HIGH_COUNT (tr->src[0]);
3222 total *= HIGH_COUNT (tr->src[1]);
3223
3224 total *= ALIGN_COUNT (tr->dst[0]);
3225 total *= ALIGN_COUNT (tr->dst[1]);
3226 total *= ALIGN_COUNT (tr->src[0]);
3227 total *= ALIGN_COUNT (tr->src[1]);
3228
3229 total *= OVERLAP_COUNT;
3230
3231 printf ("%s %lu\n", choice->name, total);
3232 }
3233
3234 spinner_count = 0;
3235
3236 for (i = 0; i < option_repetitions; i++)
3237 SIZE_ITERATION
3238 SIZE2_ITERATION
3239
3240 SHIFT_ITERATION
3241 MULTIPLIER_ITERATION
3242 DIVISOR_ITERATION
3243 CARRY_ITERATION /* must be after divisor */
3244 T_RAND_ITERATION
3245
3246 HIGH_ITERATION(d,0, tr->dst[0])
3247 HIGH_ITERATION(d,1, tr->dst[1])
3248 HIGH_ITERATION(s,0, tr->src[0])
3249 HIGH_ITERATION(s,1, tr->src[1])
3250
3251 ALIGN_ITERATION(d,0, tr->dst[0])
3252 ALIGN_ITERATION(d,1, tr->dst[1])
3253 ALIGN_ITERATION(s,0, tr->src[0])
3254 ALIGN_ITERATION(s,1, tr->src[1])
3255
3256 OVERLAP_ITERATION
3257 try_one();
3258
3259 printf("\n");
3260 }
3261
3262
3263 /* Usually print_all() doesn't show much, but it might give a hint as to
3264 where the function was up to when it died. */
3265 void
3266 trap (int sig)
3267 {
3268 const char *name = "noname";
3269
3270 switch (sig) {
3271 case SIGILL: name = "SIGILL"; break;
3272 #ifdef SIGBUS
3273 case SIGBUS: name = "SIGBUS"; break;
3274 #endif
3275 case SIGSEGV: name = "SIGSEGV"; break;
3276 case SIGFPE: name = "SIGFPE"; break;
3277 }
3278
3279 printf ("\n\nSIGNAL TRAP: %s\n", name);
3280
3281 switch (trap_location) {
3282 case TRAP_REF:
3283 printf (" in reference function: %s\n", tr->reference_name);
3284 break;
3285 case TRAP_FUN:
3286 printf (" in test function: %s\n", choice->name);
3287 print_all ();
3288 break;
3289 case TRAP_SETUPS:
3290 printf (" in parameter setups\n");
3291 print_all ();
3292 break;
3293 default:
3294 printf (" somewhere unknown\n");
3295 break;
3296 }
3297 exit (1);
3298 }
3299
3300
3301 void
3302 try_init (void)
3303 {
3304 #if HAVE_GETPAGESIZE
3305 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
3306 know _SC_PAGESIZE. */
3307 pagesize = getpagesize ();
3308 #else
3309 #if HAVE_SYSCONF
3310 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
3311 {
3312 /* According to the linux man page, sysconf doesn't set errno */
3313 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
3314 exit (1);
3315 }
3316 #else
3317 Error, error, cannot get page size
3318 #endif
3319 #endif
3320
3321 printf ("pagesize is 0x%lX bytes\n", pagesize);
3322
3323 signal (SIGILL, trap);
3324 #ifdef SIGBUS
3325 signal (SIGBUS, trap);
3326 #endif
3327 signal (SIGSEGV, trap);
3328 signal (SIGFPE, trap);
3329
3330 {
3331 int i;
3332
3333 for (i = 0; i < NUM_SOURCES; i++)
3334 {
3335 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
3336 printf ("s[%d] %p to %p (0x%lX bytes)\n",
3337 i, (void *) (s[i].region.ptr),
3338 (void *) (s[i].region.ptr + s[i].region.size),
3339 (long) s[i].region.size * BYTES_PER_MP_LIMB);
3340 }
3341
3342 #define INIT_EACH(e,es) \
3343 for (i = 0; i < NUM_DESTS; i++) \
3344 { \
3345 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
3346 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
3347 es, i, (void *) (e.d[i].region.ptr), \
3348 (void *) (e.d[i].region.ptr + e.d[i].region.size), \
3349 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \
3350 }
3351
3352 INIT_EACH(ref, "ref");
3353 INIT_EACH(fun, "fun");
3354 }
3355 }
3356
3357 int
3358 strmatch_wild (const char *pattern, const char *str)
3359 {
3360 size_t plen, slen;
3361
3362 /* wildcard at start */
3363 if (pattern[0] == '*')
3364 {
3365 pattern++;
3366 plen = strlen (pattern);
3367 slen = strlen (str);
3368 return (plen == 0
3369 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
3370 }
3371
3372 /* wildcard at end */
3373 plen = strlen (pattern);
3374 if (plen >= 1 && pattern[plen-1] == '*')
3375 return (memcmp (pattern, str, plen-1) == 0);
3376
3377 /* no wildcards */
3378 return (strcmp (pattern, str) == 0);
3379 }
3380
3381 void
3382 try_name (const char *name)
3383 {
3384 int found = 0;
3385 int i;
3386
3387 for (i = 0; i < numberof (choice_array); i++)
3388 {
3389 if (strmatch_wild (name, choice_array[i].name))
3390 {
3391 choice = &choice_array[i];
3392 tr = ¶m[choice->type];
3393 try_many ();
3394 found = 1;
3395 }
3396 }
3397
3398 if (!found)
3399 {
3400 printf ("%s unknown\n", name);
3401 /* exit (1); */
3402 }
3403 }
3404
3405
3406 void
3407 usage (const char *prog)
3408 {
3409 int col = 0;
3410 int i;
3411
3412 printf ("Usage: %s [options] function...\n", prog);
3413 printf (" -1 use limb data 1,2,3,etc\n");
3414 printf (" -9 use limb data all 0xFF..FFs\n");
3415 printf (" -a zeros use limb data all zeros\n");
3416 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
3417 printf (" -a 2fd use data 0x2FFF...FFFD\n");
3418 printf (" -p print each case tried (try this if seg faulting)\n");
3419 printf (" -R seed random numbers from time()\n");
3420 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
3421 printf (" -s size starting size to test\n");
3422 printf (" -S size2 starting size2 to test\n");
3423 printf (" -s s1-s2 range of sizes to test\n");
3424 printf (" -W don't show the spinner (use this in gdb)\n");
3425 printf (" -z disable mprotect() redzones\n");
3426 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3427 printf ("\n");
3428 printf ("Functions that can be tested:\n");
3429
3430 for (i = 0; i < numberof (choice_array); i++)
3431 {
3432 if (col + 1 + strlen (choice_array[i].name) > 79)
3433 {
3434 printf ("\n");
3435 col = 0;
3436 }
3437 printf (" %s", choice_array[i].name);
3438 col += 1 + strlen (choice_array[i].name);
3439 }
3440 printf ("\n");
3441
3442 exit(1);
3443 }
3444
3445
3446 int
3447 main (int argc, char *argv[])
3448 {
3449 int i;
3450
3451 /* unbuffered output */
3452 setbuf (stdout, NULL);
3453 setbuf (stderr, NULL);
3454
3455 /* default trace in hex, and in upper-case so can paste into bc */
3456 mp_trace_base = -16;
3457
3458 param_init ();
3459
3460 {
3461 unsigned long seed = 123;
3462 int opt;
3463
3464 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3465 {
3466 switch (opt) {
3467 case '1':
3468 /* use limb data values 1, 2, 3, ... etc */
3469 option_data = DATA_SEQ;
3470 break;
3471 case '9':
3472 /* use limb data values 0xFFF...FFF always */
3473 option_data = DATA_FFS;
3474 break;
3475 case 'a':
3476 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3477 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3478 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3479 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3480 else
3481 {
3482 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3483 exit (1);
3484 }
3485 break;
3486 case 'b':
3487 mp_trace_base = atoi (optarg);
3488 break;
3489 case 'E':
3490 /* re-seed */
3491 sscanf (optarg, "%lu", &seed);
3492 printf ("Re-seeding with %lu\n", seed);
3493 break;
3494 case 'p':
3495 option_print = 1;
3496 break;
3497 case 'R':
3498 /* randomize */
3499 seed = time (NULL);
3500 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3501 break;
3502 case 'r':
3503 option_repetitions = atoi (optarg);
3504 break;
3505 case 's':
3506 {
3507 char *p;
3508 option_firstsize = strtol (optarg, 0, 0);
3509 if ((p = strchr (optarg, '-')) != NULL)
3510 option_lastsize = strtol (p+1, 0, 0);
3511 }
3512 break;
3513 case 'S':
3514 /* -S <size> sets the starting size for the second of a two size
3515 routine (like mpn_mul_basecase) */
3516 option_firstsize2 = strtol (optarg, 0, 0);
3517 break;
3518 case 'W':
3519 /* use this when running in the debugger */
3520 option_spinner = 0;
3521 break;
3522 case 'z':
3523 /* disable redzones */
3524 option_redzones = 0;
3525 break;
3526 case '?':
3527 usage (argv[0]);
3528 break;
3529 }
3530 }
3531
3532 gmp_randinit_default (__gmp_rands);
3533 __gmp_rands_initialized = 1;
3534 gmp_randseed_ui (__gmp_rands, seed);
3535 }
3536
3537 try_init();
3538
3539 if (argc <= optind)
3540 usage (argv[0]);
3541
3542 for (i = optind; i < argc; i++)
3543 try_name (argv[i]);
3544
3545 return 0;
3546 }
3547