try.c revision 1.1 1 1.1 mrg /* Run some tests on various mpn routines.
2 1.1 mrg
3 1.1 mrg THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 1.1 mrg BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5 1.1 mrg
6 1.1 mrg Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
7 1.1 mrg Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
13 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
14 1.1 mrg option) any later version.
15 1.1 mrg
16 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
17 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 1.1 mrg License for more details.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU Lesser General Public License
22 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 1.1 mrg
24 1.1 mrg
25 1.1 mrg /* Usage: try [options] <function>...
26 1.1 mrg
27 1.1 mrg For example, "./try mpn_add_n" to run tests of that function.
28 1.1 mrg
29 1.1 mrg Combinations of alignments and overlaps are tested, with redzones above
30 1.1 mrg or below the destinations, and with the sources write-protected.
31 1.1 mrg
32 1.1 mrg The number of tests performed becomes ridiculously large with all the
33 1.1 mrg combinations, and for that reason this can't be a part of a "make check",
34 1.1 mrg it's meant only for development. The code isn't very pretty either.
35 1.1 mrg
36 1.1 mrg During development it can help to disable the redzones, since seeing the
37 1.1 mrg rest of the destination written can show where the wrong part is, or if
38 1.1 mrg the dst pointers are off by 1 or whatever. The magic DEADVAL initial
39 1.1 mrg fill (see below) will show locations never written.
40 1.1 mrg
41 1.1 mrg The -s option can be used to test only certain size operands, which is
42 1.1 mrg useful if some new code doesn't yet support say sizes less than the
43 1.1 mrg unrolling, or whatever.
44 1.1 mrg
45 1.1 mrg When a problem occurs it'll of course be necessary to run the program
46 1.1 mrg under gdb to find out quite where, how and why it's going wrong. Disable
47 1.1 mrg the spinner with the -W option when doing this, or single stepping won't
48 1.1 mrg work. Using the "-1" option to run with simple data can be useful.
49 1.1 mrg
50 1.1 mrg New functions to test can be added in try_array[]. If a new TYPE is
51 1.1 mrg required then add it to the existing constants, set up its parameters in
52 1.1 mrg param_init(), and add it to the call() function. Extra parameter fields
53 1.1 mrg can be added if necessary, or further interpretations given to existing
54 1.1 mrg fields.
55 1.1 mrg
56 1.1 mrg
57 1.1 mrg Portability:
58 1.1 mrg
59 1.1 mrg This program is not designed for use on Cray vector systems under Unicos,
60 1.1 mrg it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
61 1.1 mrg don't really have pages or mprotect. We could arrange to run the tests
62 1.1 mrg without the redzones, but we haven't bothered currently.
63 1.1 mrg
64 1.1 mrg
65 1.1 mrg Enhancements:
66 1.1 mrg
67 1.1 mrg umul_ppmm support is not very good, lots of source data is generated
68 1.1 mrg whereas only two limbs are needed.
69 1.1 mrg
70 1.1 mrg Make a little scheme for interpreting the "SIZE" selections uniformly.
71 1.1 mrg
72 1.1 mrg Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
73 1.1 mrg source limbs. Possibly increase the default repetitions in that case.
74 1.1 mrg
75 1.1 mrg Automatically detect gdb and disable the spinner (use -W for now).
76 1.1 mrg
77 1.1 mrg Make a way to re-run a failing case in the debugger. Have an option to
78 1.1 mrg snapshot each test case before it's run so the data is available if a
79 1.1 mrg segv occurs. (This should be more reliable than the current print_all()
80 1.1 mrg in the signal handler.)
81 1.1 mrg
82 1.1 mrg When alignment means a dst isn't hard against the redzone, check the
83 1.1 mrg space in between remains unchanged.
84 1.1 mrg
85 1.1 mrg When a source overlaps a destination, don't run both s[i].high 0 and 1,
86 1.1 mrg as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
87 1.1 mrg
88 1.1 mrg When partial overlaps aren't done, don't loop over source alignments
89 1.1 mrg during overlaps.
90 1.1 mrg
91 1.1 mrg Try to make the looping code a bit less horrible. Right now it's pretty
92 1.1 mrg hard to see what iterations are actually done.
93 1.1 mrg
94 1.1 mrg Perhaps specific setups and loops for each style of function under test
95 1.1 mrg would be clearer than a parameterized general loop. There's lots of
96 1.1 mrg stuff common to all functions, but the exceptions get messy.
97 1.1 mrg
98 1.1 mrg When there's no overlap, run with both src>dst and src<dst. A subtle
99 1.1 mrg calling-conventions violation occurred in a P6 copy which depended on the
100 1.1 mrg relative location of src and dst.
101 1.1 mrg
102 1.1 mrg multiplier_N is more or less a third source region for the addmul_N
103 1.1 mrg routines, and could be done with the redzoned region scheme.
104 1.1 mrg
105 1.1 mrg */
106 1.1 mrg
107 1.1 mrg
108 1.1 mrg /* always do assertion checking */
109 1.1 mrg #define WANT_ASSERT 1
110 1.1 mrg
111 1.1 mrg #include "config.h"
112 1.1 mrg
113 1.1 mrg #include <errno.h>
114 1.1 mrg #include <limits.h>
115 1.1 mrg #include <signal.h>
116 1.1 mrg #include <stdio.h>
117 1.1 mrg #include <stdlib.h>
118 1.1 mrg #include <string.h>
119 1.1 mrg #include <time.h>
120 1.1 mrg
121 1.1 mrg #if HAVE_UNISTD_H
122 1.1 mrg #include <unistd.h>
123 1.1 mrg #endif
124 1.1 mrg
125 1.1 mrg #if HAVE_SYS_MMAN_H
126 1.1 mrg #include <sys/mman.h>
127 1.1 mrg #endif
128 1.1 mrg
129 1.1 mrg #include "gmp.h"
130 1.1 mrg #include "gmp-impl.h"
131 1.1 mrg #include "longlong.h"
132 1.1 mrg #include "tests.h"
133 1.1 mrg
134 1.1 mrg
135 1.1 mrg #if !HAVE_DECL_OPTARG
136 1.1 mrg extern char *optarg;
137 1.1 mrg extern int optind, opterr;
138 1.1 mrg #endif
139 1.1 mrg
140 1.1 mrg #if ! HAVE_DECL_SYS_NERR
141 1.1 mrg extern int sys_nerr;
142 1.1 mrg #endif
143 1.1 mrg
144 1.1 mrg #if ! HAVE_DECL_SYS_ERRLIST
145 1.1 mrg extern char *sys_errlist[];
146 1.1 mrg #endif
147 1.1 mrg
148 1.1 mrg #if ! HAVE_STRERROR
149 1.1 mrg char *
150 1.1 mrg strerror (int n)
151 1.1 mrg {
152 1.1 mrg if (n < 0 || n >= sys_nerr)
153 1.1 mrg return "errno out of range";
154 1.1 mrg else
155 1.1 mrg return sys_errlist[n];
156 1.1 mrg }
157 1.1 mrg #endif
158 1.1 mrg
159 1.1 mrg /* Rumour has it some systems lack a define of PROT_NONE. */
160 1.1 mrg #ifndef PROT_NONE
161 1.1 mrg #define PROT_NONE 0
162 1.1 mrg #endif
163 1.1 mrg
164 1.1 mrg /* Dummy defines for when mprotect doesn't exist. */
165 1.1 mrg #ifndef PROT_READ
166 1.1 mrg #define PROT_READ 0
167 1.1 mrg #endif
168 1.1 mrg #ifndef PROT_WRITE
169 1.1 mrg #define PROT_WRITE 0
170 1.1 mrg #endif
171 1.1 mrg
172 1.1 mrg /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
173 1.1 mrg _SC_PAGE_SIZE instead. */
174 1.1 mrg #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
175 1.1 mrg #define _SC_PAGESIZE _SC_PAGE_SIZE
176 1.1 mrg #endif
177 1.1 mrg
178 1.1 mrg
179 1.1 mrg #ifdef EXTRA_PROTOS
180 1.1 mrg EXTRA_PROTOS
181 1.1 mrg #endif
182 1.1 mrg #ifdef EXTRA_PROTOS2
183 1.1 mrg EXTRA_PROTOS2
184 1.1 mrg #endif
185 1.1 mrg
186 1.1 mrg
187 1.1 mrg #define DEFAULT_REPETITIONS 10
188 1.1 mrg
189 1.1 mrg int option_repetitions = DEFAULT_REPETITIONS;
190 1.1 mrg int option_spinner = 1;
191 1.1 mrg int option_redzones = 1;
192 1.1 mrg int option_firstsize = 0;
193 1.1 mrg int option_lastsize = 500;
194 1.1 mrg int option_firstsize2 = 0;
195 1.1 mrg
196 1.1 mrg #define ALIGNMENTS 4
197 1.1 mrg #define OVERLAPS 4
198 1.1 mrg #define CARRY_RANDOMS 5
199 1.1 mrg #define MULTIPLIER_RANDOMS 5
200 1.1 mrg #define DIVISOR_RANDOMS 5
201 1.1 mrg #define FRACTION_COUNT 4
202 1.1 mrg
203 1.1 mrg int option_print = 0;
204 1.1 mrg
205 1.1 mrg #define DATA_TRAND 0
206 1.1 mrg #define DATA_ZEROS 1
207 1.1 mrg #define DATA_SEQ 2
208 1.1 mrg #define DATA_FFS 3
209 1.1 mrg #define DATA_2FD 4
210 1.1 mrg int option_data = DATA_TRAND;
211 1.1 mrg
212 1.1 mrg
213 1.1 mrg mp_size_t pagesize;
214 1.1 mrg #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
215 1.1 mrg
216 1.1 mrg /* must be a multiple of the page size */
217 1.1 mrg #define REDZONE_BYTES (pagesize * 16)
218 1.1 mrg #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
219 1.1 mrg
220 1.1 mrg
221 1.1 mrg #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
222 1.1 mrg
223 1.1 mrg #if GMP_LIMB_BITS == 32
224 1.1 mrg #define DEADVAL CNST_LIMB(0xDEADBEEF)
225 1.1 mrg #else
226 1.1 mrg #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
227 1.1 mrg #endif
228 1.1 mrg
229 1.1 mrg
230 1.1 mrg struct region_t {
231 1.1 mrg mp_ptr ptr;
232 1.1 mrg mp_size_t size;
233 1.1 mrg };
234 1.1 mrg
235 1.1 mrg
236 1.1 mrg #define TRAP_NOWHERE 0
237 1.1 mrg #define TRAP_REF 1
238 1.1 mrg #define TRAP_FUN 2
239 1.1 mrg #define TRAP_SETUPS 3
240 1.1 mrg int trap_location = TRAP_NOWHERE;
241 1.1 mrg
242 1.1 mrg
243 1.1 mrg #define NUM_SOURCES 2
244 1.1 mrg #define NUM_DESTS 2
245 1.1 mrg
246 1.1 mrg struct source_t {
247 1.1 mrg struct region_t region;
248 1.1 mrg int high;
249 1.1 mrg mp_size_t align;
250 1.1 mrg mp_ptr p;
251 1.1 mrg };
252 1.1 mrg
253 1.1 mrg struct source_t s[NUM_SOURCES];
254 1.1 mrg
255 1.1 mrg struct dest_t {
256 1.1 mrg int high;
257 1.1 mrg mp_size_t align;
258 1.1 mrg mp_size_t size;
259 1.1 mrg };
260 1.1 mrg
261 1.1 mrg struct dest_t d[NUM_DESTS];
262 1.1 mrg
263 1.1 mrg struct source_each_t {
264 1.1 mrg mp_ptr p;
265 1.1 mrg };
266 1.1 mrg
267 1.1 mrg struct dest_each_t {
268 1.1 mrg struct region_t region;
269 1.1 mrg mp_ptr p;
270 1.1 mrg };
271 1.1 mrg
272 1.1 mrg mp_size_t size;
273 1.1 mrg mp_size_t size2;
274 1.1 mrg unsigned long shift;
275 1.1 mrg mp_limb_t carry;
276 1.1 mrg mp_limb_t divisor;
277 1.1 mrg mp_limb_t multiplier;
278 1.1 mrg mp_limb_t multiplier_N[8];
279 1.1 mrg
280 1.1 mrg struct each_t {
281 1.1 mrg const char *name;
282 1.1 mrg struct dest_each_t d[NUM_DESTS];
283 1.1 mrg struct source_each_t s[NUM_SOURCES];
284 1.1 mrg mp_limb_t retval;
285 1.1 mrg };
286 1.1 mrg
287 1.1 mrg struct each_t ref = { "Ref" };
288 1.1 mrg struct each_t fun = { "Fun" };
289 1.1 mrg
290 1.1 mrg #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
291 1.1 mrg
292 1.1 mrg void validate_fail __GMP_PROTO ((void));
293 1.1 mrg
294 1.1 mrg
295 1.1 mrg #if HAVE_TRY_NEW_C
296 1.1 mrg #include "try-new.c"
297 1.1 mrg #endif
298 1.1 mrg
299 1.1 mrg
300 1.1 mrg typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
301 1.1 mrg
302 1.1 mrg struct try_t {
303 1.1 mrg char retval;
304 1.1 mrg
305 1.1 mrg char src[2];
306 1.1 mrg char dst[2];
307 1.1 mrg
308 1.1 mrg #define SIZE_YES 1
309 1.1 mrg #define SIZE_ALLOW_ZERO 2
310 1.1 mrg #define SIZE_1 3 /* 1 limb */
311 1.1 mrg #define SIZE_2 4 /* 2 limbs */
312 1.1 mrg #define SIZE_3 5 /* 3 limbs */
313 1.1 mrg #define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
314 1.1 mrg #define SIZE_SIZE2 7
315 1.1 mrg #define SIZE_PLUS_1 8
316 1.1 mrg #define SIZE_SUM 9
317 1.1 mrg #define SIZE_DIFF 10
318 1.1 mrg #define SIZE_DIFF_PLUS_1 11
319 1.1 mrg #define SIZE_RETVAL 12
320 1.1 mrg #define SIZE_CEIL_HALF 13
321 1.1 mrg #define SIZE_GET_STR 14
322 1.1 mrg #define SIZE_PLUS_MSIZE_SUB_1 15 /* size+msize-1 */
323 1.1 mrg char size;
324 1.1 mrg char size2;
325 1.1 mrg char dst_size[2];
326 1.1 mrg
327 1.1 mrg /* multiplier_N size in limbs */
328 1.1 mrg mp_size_t msize;
329 1.1 mrg
330 1.1 mrg char dst_bytes[2];
331 1.1 mrg
332 1.1 mrg char dst0_from_src1;
333 1.1 mrg
334 1.1 mrg #define CARRY_BIT 1 /* single bit 0 or 1 */
335 1.1 mrg #define CARRY_3 2 /* 0, 1, 2 */
336 1.1 mrg #define CARRY_4 3 /* 0 to 3 */
337 1.1 mrg #define CARRY_LIMB 4 /* any limb value */
338 1.1 mrg #define CARRY_DIVISOR 5 /* carry<divisor */
339 1.1 mrg char carry;
340 1.1 mrg
341 1.1 mrg /* a fudge to tell the output when to print negatives */
342 1.1 mrg char carry_sign;
343 1.1 mrg
344 1.1 mrg char multiplier;
345 1.1 mrg char shift;
346 1.1 mrg
347 1.1 mrg #define DIVISOR_LIMB 1
348 1.1 mrg #define DIVISOR_NORM 2
349 1.1 mrg #define DIVISOR_ODD 3
350 1.1 mrg char divisor;
351 1.1 mrg
352 1.1 mrg #define DATA_NON_ZERO 1
353 1.1 mrg #define DATA_GCD 2
354 1.1 mrg #define DATA_SRC0_ODD 3
355 1.1 mrg #define DATA_SRC0_HIGHBIT 4
356 1.1 mrg #define DATA_SRC1_ODD 5
357 1.1 mrg #define DATA_SRC1_HIGHBIT 6
358 1.1 mrg #define DATA_MULTIPLE_DIVISOR 7
359 1.1 mrg #define DATA_UDIV_QRNND 8
360 1.1 mrg char data;
361 1.1 mrg
362 1.1 mrg /* Default is allow full overlap. */
363 1.1 mrg #define OVERLAP_NONE 1
364 1.1 mrg #define OVERLAP_LOW_TO_HIGH 2
365 1.1 mrg #define OVERLAP_HIGH_TO_LOW 3
366 1.1 mrg #define OVERLAP_NOT_SRCS 4
367 1.1 mrg #define OVERLAP_NOT_SRC2 8
368 1.1 mrg char overlap;
369 1.1 mrg
370 1.1 mrg tryfun_t reference;
371 1.1 mrg const char *reference_name;
372 1.1 mrg
373 1.1 mrg void (*validate) __GMP_PROTO ((void));
374 1.1 mrg const char *validate_name;
375 1.1 mrg };
376 1.1 mrg
377 1.1 mrg struct try_t *tr;
378 1.1 mrg
379 1.1 mrg
380 1.1 mrg void
381 1.1 mrg validate_mod_34lsub1 (void)
382 1.1 mrg {
383 1.1 mrg #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
384 1.1 mrg
385 1.1 mrg mp_srcptr ptr = s[0].p;
386 1.1 mrg int error = 0;
387 1.1 mrg mp_limb_t got, got_mod, want, want_mod;
388 1.1 mrg
389 1.1 mrg ASSERT (size >= 1);
390 1.1 mrg
391 1.1 mrg got = fun.retval;
392 1.1 mrg got_mod = got % CNST_34LSUB1;
393 1.1 mrg
394 1.1 mrg want = refmpn_mod_34lsub1 (ptr, size);
395 1.1 mrg want_mod = want % CNST_34LSUB1;
396 1.1 mrg
397 1.1 mrg if (got_mod != want_mod)
398 1.1 mrg {
399 1.1 mrg gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
400 1.1 mrg gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
401 1.1 mrg error = 1;
402 1.1 mrg }
403 1.1 mrg
404 1.1 mrg if (error)
405 1.1 mrg validate_fail ();
406 1.1 mrg }
407 1.1 mrg
408 1.1 mrg void
409 1.1 mrg validate_divexact_1 (void)
410 1.1 mrg {
411 1.1 mrg mp_srcptr src = s[0].p;
412 1.1 mrg mp_srcptr dst = fun.d[0].p;
413 1.1 mrg int error = 0;
414 1.1 mrg
415 1.1 mrg ASSERT (size >= 1);
416 1.1 mrg
417 1.1 mrg {
418 1.1 mrg mp_ptr tp = refmpn_malloc_limbs (size);
419 1.1 mrg mp_limb_t rem;
420 1.1 mrg
421 1.1 mrg rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
422 1.1 mrg if (rem != 0)
423 1.1 mrg {
424 1.1 mrg gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
425 1.1 mrg error = 1;
426 1.1 mrg }
427 1.1 mrg if (! refmpn_equal_anynail (tp, dst, size))
428 1.1 mrg {
429 1.1 mrg printf ("Quotient a/d wrong\n");
430 1.1 mrg mpn_trace ("fun ", dst, size);
431 1.1 mrg mpn_trace ("want", tp, size);
432 1.1 mrg error = 1;
433 1.1 mrg }
434 1.1 mrg free (tp);
435 1.1 mrg }
436 1.1 mrg
437 1.1 mrg if (error)
438 1.1 mrg validate_fail ();
439 1.1 mrg }
440 1.1 mrg
441 1.1 mrg
442 1.1 mrg void
443 1.1 mrg validate_modexact_1c_odd (void)
444 1.1 mrg {
445 1.1 mrg mp_srcptr ptr = s[0].p;
446 1.1 mrg mp_limb_t r = fun.retval;
447 1.1 mrg int error = 0;
448 1.1 mrg
449 1.1 mrg ASSERT (size >= 1);
450 1.1 mrg ASSERT (divisor & 1);
451 1.1 mrg
452 1.1 mrg if ((r & GMP_NAIL_MASK) != 0)
453 1.1 mrg printf ("r has non-zero nail\n");
454 1.1 mrg
455 1.1 mrg if (carry < divisor)
456 1.1 mrg {
457 1.1 mrg if (! (r < divisor))
458 1.1 mrg {
459 1.1 mrg printf ("Don't have r < divisor\n");
460 1.1 mrg error = 1;
461 1.1 mrg }
462 1.1 mrg }
463 1.1 mrg else /* carry >= divisor */
464 1.1 mrg {
465 1.1 mrg if (! (r <= divisor))
466 1.1 mrg {
467 1.1 mrg printf ("Don't have r <= divisor\n");
468 1.1 mrg error = 1;
469 1.1 mrg }
470 1.1 mrg }
471 1.1 mrg
472 1.1 mrg {
473 1.1 mrg mp_limb_t c = carry % divisor;
474 1.1 mrg mp_ptr tp = refmpn_malloc_limbs (size+1);
475 1.1 mrg mp_size_t k;
476 1.1 mrg
477 1.1 mrg for (k = size-1; k <= size; k++)
478 1.1 mrg {
479 1.1 mrg /* set {tp,size+1} to r*b^k + a - c */
480 1.1 mrg refmpn_copyi (tp, ptr, size);
481 1.1 mrg tp[size] = 0;
482 1.1 mrg ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
483 1.1 mrg if (refmpn_sub_1 (tp, tp, size+1, c))
484 1.1 mrg ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
485 1.1 mrg
486 1.1 mrg if (refmpn_mod_1 (tp, size+1, divisor) == 0)
487 1.1 mrg goto good_remainder;
488 1.1 mrg }
489 1.1 mrg printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
490 1.1 mrg error = 1;
491 1.1 mrg
492 1.1 mrg good_remainder:
493 1.1 mrg free (tp);
494 1.1 mrg }
495 1.1 mrg
496 1.1 mrg if (error)
497 1.1 mrg validate_fail ();
498 1.1 mrg }
499 1.1 mrg
500 1.1 mrg void
501 1.1 mrg validate_modexact_1_odd (void)
502 1.1 mrg {
503 1.1 mrg carry = 0;
504 1.1 mrg validate_modexact_1c_odd ();
505 1.1 mrg }
506 1.1 mrg
507 1.1 mrg
508 1.1 mrg void
509 1.1 mrg validate_sqrtrem (void)
510 1.1 mrg {
511 1.1 mrg mp_srcptr orig_ptr = s[0].p;
512 1.1 mrg mp_size_t orig_size = size;
513 1.1 mrg mp_size_t root_size = (size+1)/2;
514 1.1 mrg mp_srcptr root_ptr = fun.d[0].p;
515 1.1 mrg mp_size_t rem_size = fun.retval;
516 1.1 mrg mp_srcptr rem_ptr = fun.d[1].p;
517 1.1 mrg mp_size_t prod_size = 2*root_size;
518 1.1 mrg mp_ptr p;
519 1.1 mrg int error = 0;
520 1.1 mrg
521 1.1 mrg if (rem_size < 0 || rem_size > size)
522 1.1 mrg {
523 1.1 mrg printf ("Bad remainder size retval %ld\n", (long) rem_size);
524 1.1 mrg validate_fail ();
525 1.1 mrg }
526 1.1 mrg
527 1.1 mrg p = refmpn_malloc_limbs (prod_size);
528 1.1 mrg
529 1.1 mrg p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
530 1.1 mrg if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
531 1.1 mrg {
532 1.1 mrg printf ("Remainder bigger than 2*root\n");
533 1.1 mrg error = 1;
534 1.1 mrg }
535 1.1 mrg
536 1.1 mrg refmpn_sqr (p, root_ptr, root_size);
537 1.1 mrg if (rem_size != 0)
538 1.1 mrg refmpn_add (p, p, prod_size, rem_ptr, rem_size);
539 1.1 mrg if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
540 1.1 mrg {
541 1.1 mrg printf ("root^2+rem != original\n");
542 1.1 mrg mpn_trace ("prod", p, prod_size);
543 1.1 mrg error = 1;
544 1.1 mrg }
545 1.1 mrg free (p);
546 1.1 mrg
547 1.1 mrg if (error)
548 1.1 mrg validate_fail ();
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg
552 1.1 mrg /* These types are indexes into the param[] array and are arbitrary so long
553 1.1 mrg as they're all distinct and within the size of param[]. Renumber
554 1.1 mrg whenever necessary or desired. */
555 1.1 mrg
556 1.1 mrg #define TYPE_ADD 1
557 1.1 mrg #define TYPE_ADD_N 2
558 1.1 mrg #define TYPE_ADD_NC 3
559 1.1 mrg #define TYPE_SUB 4
560 1.1 mrg #define TYPE_SUB_N 5
561 1.1 mrg #define TYPE_SUB_NC 6
562 1.1 mrg
563 1.1 mrg #define TYPE_MUL_1 7
564 1.1 mrg #define TYPE_MUL_1C 8
565 1.1 mrg
566 1.1 mrg #define TYPE_MUL_2 9
567 1.1 mrg #define TYPE_MUL_3 92
568 1.1 mrg #define TYPE_MUL_4 93
569 1.1 mrg
570 1.1 mrg #define TYPE_ADDMUL_1 10
571 1.1 mrg #define TYPE_ADDMUL_1C 11
572 1.1 mrg #define TYPE_SUBMUL_1 12
573 1.1 mrg #define TYPE_SUBMUL_1C 13
574 1.1 mrg
575 1.1 mrg #define TYPE_ADDMUL_2 14
576 1.1 mrg #define TYPE_ADDMUL_3 15
577 1.1 mrg #define TYPE_ADDMUL_4 16
578 1.1 mrg #define TYPE_ADDMUL_5 17
579 1.1 mrg #define TYPE_ADDMUL_6 18
580 1.1 mrg #define TYPE_ADDMUL_7 19
581 1.1 mrg #define TYPE_ADDMUL_8 20
582 1.1 mrg
583 1.1 mrg #define TYPE_ADDSUB_N 21
584 1.1 mrg #define TYPE_ADDSUB_NC 22
585 1.1 mrg
586 1.1 mrg #define TYPE_RSHIFT 23
587 1.1 mrg #define TYPE_LSHIFT 24
588 1.1 mrg #define TYPE_LSHIFTC 25
589 1.1 mrg
590 1.1 mrg #define TYPE_COPY 26
591 1.1 mrg #define TYPE_COPYI 27
592 1.1 mrg #define TYPE_COPYD 28
593 1.1 mrg #define TYPE_COM 29
594 1.1 mrg
595 1.1 mrg #define TYPE_ADDLSH1_N 30
596 1.1 mrg #define TYPE_ADDLSH2_N 48
597 1.1 mrg #define TYPE_ADDLSH_N 49
598 1.1 mrg #define TYPE_SUBLSH1_N 31
599 1.1 mrg #define TYPE_SUBLSH_N 130
600 1.1 mrg #define TYPE_RSBLSH1_N 34
601 1.1 mrg #define TYPE_RSBLSH2_N 46
602 1.1 mrg #define TYPE_RSBLSH_N 47
603 1.1 mrg #define TYPE_RSH1ADD_N 32
604 1.1 mrg #define TYPE_RSH1SUB_N 33
605 1.1 mrg
606 1.1 mrg #define TYPE_MOD_1 35
607 1.1 mrg #define TYPE_MOD_1C 36
608 1.1 mrg #define TYPE_DIVMOD_1 37
609 1.1 mrg #define TYPE_DIVMOD_1C 38
610 1.1 mrg #define TYPE_DIVREM_1 39
611 1.1 mrg #define TYPE_DIVREM_1C 40
612 1.1 mrg #define TYPE_PREINV_DIVREM_1 41
613 1.1 mrg #define TYPE_PREINV_MOD_1 42
614 1.1 mrg #define TYPE_MOD_34LSUB1 43
615 1.1 mrg #define TYPE_UDIV_QRNND 44
616 1.1 mrg #define TYPE_UDIV_QRNND_R 45
617 1.1 mrg
618 1.1 mrg #define TYPE_DIVEXACT_1 50
619 1.1 mrg #define TYPE_DIVEXACT_BY3 51
620 1.1 mrg #define TYPE_DIVEXACT_BY3C 52
621 1.1 mrg #define TYPE_MODEXACT_1_ODD 53
622 1.1 mrg #define TYPE_MODEXACT_1C_ODD 54
623 1.1 mrg
624 1.1 mrg #define TYPE_INVERT 55
625 1.1 mrg #define TYPE_BINVERT 56
626 1.1 mrg
627 1.1 mrg #define TYPE_GCD 60
628 1.1 mrg #define TYPE_GCD_1 61
629 1.1 mrg #define TYPE_GCD_FINDA 62
630 1.1 mrg #define TYPE_MPZ_JACOBI 63
631 1.1 mrg #define TYPE_MPZ_KRONECKER 64
632 1.1 mrg #define TYPE_MPZ_KRONECKER_UI 65
633 1.1 mrg #define TYPE_MPZ_KRONECKER_SI 66
634 1.1 mrg #define TYPE_MPZ_UI_KRONECKER 67
635 1.1 mrg #define TYPE_MPZ_SI_KRONECKER 68
636 1.1 mrg
637 1.1 mrg #define TYPE_AND_N 70
638 1.1 mrg #define TYPE_NAND_N 71
639 1.1 mrg #define TYPE_ANDN_N 72
640 1.1 mrg #define TYPE_IOR_N 73
641 1.1 mrg #define TYPE_IORN_N 74
642 1.1 mrg #define TYPE_NIOR_N 75
643 1.1 mrg #define TYPE_XOR_N 76
644 1.1 mrg #define TYPE_XNOR_N 77
645 1.1 mrg
646 1.1 mrg #define TYPE_MUL_MN 80
647 1.1 mrg #define TYPE_MUL_N 81
648 1.1 mrg #define TYPE_SQR 82
649 1.1 mrg #define TYPE_UMUL_PPMM 83
650 1.1 mrg #define TYPE_UMUL_PPMM_R 84
651 1.1 mrg #define TYPE_MULLO_N 85
652 1.1 mrg
653 1.1 mrg #define TYPE_SBPI1_DIV_QR 90
654 1.1 mrg #define TYPE_TDIV_QR 91
655 1.1 mrg
656 1.1 mrg #define TYPE_SQRTREM 100
657 1.1 mrg #define TYPE_ZERO 101
658 1.1 mrg #define TYPE_GET_STR 102
659 1.1 mrg #define TYPE_POPCOUNT 103
660 1.1 mrg #define TYPE_HAMDIST 104
661 1.1 mrg
662 1.1 mrg #define TYPE_EXTRA 110
663 1.1 mrg
664 1.1 mrg struct try_t param[150];
665 1.1 mrg
666 1.1 mrg
667 1.1 mrg void
668 1.1 mrg param_init (void)
669 1.1 mrg {
670 1.1 mrg struct try_t *p;
671 1.1 mrg
672 1.1 mrg #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
673 1.1 mrg
674 1.1 mrg #if HAVE_STRINGIZE
675 1.1 mrg #define REFERENCE(fun) \
676 1.1 mrg p->reference = (tryfun_t) fun; \
677 1.1 mrg p->reference_name = #fun
678 1.1 mrg #define VALIDATE(fun) \
679 1.1 mrg p->validate = fun; \
680 1.1 mrg p->validate_name = #fun
681 1.1 mrg #else
682 1.1 mrg #define REFERENCE(fun) \
683 1.1 mrg p->reference = (tryfun_t) fun; \
684 1.1 mrg p->reference_name = "fun"
685 1.1 mrg #define VALIDATE(fun) \
686 1.1 mrg p->validate = fun; \
687 1.1 mrg p->validate_name = "fun"
688 1.1 mrg #endif
689 1.1 mrg
690 1.1 mrg
691 1.1 mrg p = ¶m[TYPE_ADD_N];
692 1.1 mrg p->retval = 1;
693 1.1 mrg p->dst[0] = 1;
694 1.1 mrg p->src[0] = 1;
695 1.1 mrg p->src[1] = 1;
696 1.1 mrg REFERENCE (refmpn_add_n);
697 1.1 mrg
698 1.1 mrg p = ¶m[TYPE_ADD_NC];
699 1.1 mrg COPY (TYPE_ADD_N);
700 1.1 mrg p->carry = CARRY_BIT;
701 1.1 mrg REFERENCE (refmpn_add_nc);
702 1.1 mrg
703 1.1 mrg p = ¶m[TYPE_SUB_N];
704 1.1 mrg COPY (TYPE_ADD_N);
705 1.1 mrg REFERENCE (refmpn_sub_n);
706 1.1 mrg
707 1.1 mrg p = ¶m[TYPE_SUB_NC];
708 1.1 mrg COPY (TYPE_ADD_NC);
709 1.1 mrg REFERENCE (refmpn_sub_nc);
710 1.1 mrg
711 1.1 mrg p = ¶m[TYPE_ADD];
712 1.1 mrg COPY (TYPE_ADD_N);
713 1.1 mrg p->size = SIZE_ALLOW_ZERO;
714 1.1 mrg p->size2 = 1;
715 1.1 mrg REFERENCE (refmpn_add);
716 1.1 mrg
717 1.1 mrg p = ¶m[TYPE_SUB];
718 1.1 mrg COPY (TYPE_ADD);
719 1.1 mrg REFERENCE (refmpn_sub);
720 1.1 mrg
721 1.1 mrg
722 1.1 mrg p = ¶m[TYPE_MUL_1];
723 1.1 mrg p->retval = 1;
724 1.1 mrg p->dst[0] = 1;
725 1.1 mrg p->src[0] = 1;
726 1.1 mrg p->multiplier = 1;
727 1.1 mrg p->overlap = OVERLAP_LOW_TO_HIGH;
728 1.1 mrg REFERENCE (refmpn_mul_1);
729 1.1 mrg
730 1.1 mrg p = ¶m[TYPE_MUL_1C];
731 1.1 mrg COPY (TYPE_MUL_1);
732 1.1 mrg p->carry = CARRY_LIMB;
733 1.1 mrg REFERENCE (refmpn_mul_1c);
734 1.1 mrg
735 1.1 mrg
736 1.1 mrg p = ¶m[TYPE_MUL_2];
737 1.1 mrg p->retval = 1;
738 1.1 mrg p->dst[0] = 1;
739 1.1 mrg p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
740 1.1 mrg p->src[0] = 1;
741 1.1 mrg p->src[1] = 1;
742 1.1 mrg p->msize = 2;
743 1.1 mrg p->overlap = OVERLAP_NOT_SRC2;
744 1.1 mrg REFERENCE (refmpn_mul_2);
745 1.1 mrg
746 1.1 mrg p = ¶m[TYPE_MUL_3];
747 1.1 mrg COPY (TYPE_MUL_2);
748 1.1 mrg p->msize = 3;
749 1.1 mrg REFERENCE (refmpn_mul_3);
750 1.1 mrg
751 1.1 mrg p = ¶m[TYPE_MUL_4];
752 1.1 mrg COPY (TYPE_MUL_2);
753 1.1 mrg p->msize = 4;
754 1.1 mrg REFERENCE (refmpn_mul_4);
755 1.1 mrg
756 1.1 mrg
757 1.1 mrg p = ¶m[TYPE_ADDMUL_1];
758 1.1 mrg p->retval = 1;
759 1.1 mrg p->dst[0] = 1;
760 1.1 mrg p->src[0] = 1;
761 1.1 mrg p->multiplier = 1;
762 1.1 mrg p->dst0_from_src1 = 1;
763 1.1 mrg REFERENCE (refmpn_addmul_1);
764 1.1 mrg
765 1.1 mrg p = ¶m[TYPE_ADDMUL_1C];
766 1.1 mrg COPY (TYPE_ADDMUL_1);
767 1.1 mrg p->carry = CARRY_LIMB;
768 1.1 mrg REFERENCE (refmpn_addmul_1c);
769 1.1 mrg
770 1.1 mrg p = ¶m[TYPE_SUBMUL_1];
771 1.1 mrg COPY (TYPE_ADDMUL_1);
772 1.1 mrg REFERENCE (refmpn_submul_1);
773 1.1 mrg
774 1.1 mrg p = ¶m[TYPE_SUBMUL_1C];
775 1.1 mrg COPY (TYPE_ADDMUL_1C);
776 1.1 mrg REFERENCE (refmpn_submul_1c);
777 1.1 mrg
778 1.1 mrg
779 1.1 mrg p = ¶m[TYPE_ADDMUL_2];
780 1.1 mrg p->retval = 1;
781 1.1 mrg p->dst[0] = 1;
782 1.1 mrg p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
783 1.1 mrg p->src[0] = 1;
784 1.1 mrg p->src[1] = 1;
785 1.1 mrg p->msize = 2;
786 1.1 mrg p->dst0_from_src1 = 1;
787 1.1 mrg p->overlap = OVERLAP_NOT_SRC2;
788 1.1 mrg REFERENCE (refmpn_addmul_2);
789 1.1 mrg
790 1.1 mrg p = ¶m[TYPE_ADDMUL_3];
791 1.1 mrg COPY (TYPE_ADDMUL_2);
792 1.1 mrg p->msize = 3;
793 1.1 mrg REFERENCE (refmpn_addmul_3);
794 1.1 mrg
795 1.1 mrg p = ¶m[TYPE_ADDMUL_4];
796 1.1 mrg COPY (TYPE_ADDMUL_2);
797 1.1 mrg p->msize = 4;
798 1.1 mrg REFERENCE (refmpn_addmul_4);
799 1.1 mrg
800 1.1 mrg p = ¶m[TYPE_ADDMUL_5];
801 1.1 mrg COPY (TYPE_ADDMUL_2);
802 1.1 mrg p->msize = 5;
803 1.1 mrg REFERENCE (refmpn_addmul_5);
804 1.1 mrg
805 1.1 mrg p = ¶m[TYPE_ADDMUL_6];
806 1.1 mrg COPY (TYPE_ADDMUL_2);
807 1.1 mrg p->msize = 6;
808 1.1 mrg REFERENCE (refmpn_addmul_6);
809 1.1 mrg
810 1.1 mrg p = ¶m[TYPE_ADDMUL_7];
811 1.1 mrg COPY (TYPE_ADDMUL_2);
812 1.1 mrg p->msize = 7;
813 1.1 mrg REFERENCE (refmpn_addmul_7);
814 1.1 mrg
815 1.1 mrg p = ¶m[TYPE_ADDMUL_8];
816 1.1 mrg COPY (TYPE_ADDMUL_2);
817 1.1 mrg p->msize = 8;
818 1.1 mrg REFERENCE (refmpn_addmul_8);
819 1.1 mrg
820 1.1 mrg
821 1.1 mrg p = ¶m[TYPE_AND_N];
822 1.1 mrg p->dst[0] = 1;
823 1.1 mrg p->src[0] = 1;
824 1.1 mrg p->src[1] = 1;
825 1.1 mrg REFERENCE (refmpn_and_n);
826 1.1 mrg
827 1.1 mrg p = ¶m[TYPE_ANDN_N];
828 1.1 mrg COPY (TYPE_AND_N);
829 1.1 mrg REFERENCE (refmpn_andn_n);
830 1.1 mrg
831 1.1 mrg p = ¶m[TYPE_NAND_N];
832 1.1 mrg COPY (TYPE_AND_N);
833 1.1 mrg REFERENCE (refmpn_nand_n);
834 1.1 mrg
835 1.1 mrg p = ¶m[TYPE_IOR_N];
836 1.1 mrg COPY (TYPE_AND_N);
837 1.1 mrg REFERENCE (refmpn_ior_n);
838 1.1 mrg
839 1.1 mrg p = ¶m[TYPE_IORN_N];
840 1.1 mrg COPY (TYPE_AND_N);
841 1.1 mrg REFERENCE (refmpn_iorn_n);
842 1.1 mrg
843 1.1 mrg p = ¶m[TYPE_NIOR_N];
844 1.1 mrg COPY (TYPE_AND_N);
845 1.1 mrg REFERENCE (refmpn_nior_n);
846 1.1 mrg
847 1.1 mrg p = ¶m[TYPE_XOR_N];
848 1.1 mrg COPY (TYPE_AND_N);
849 1.1 mrg REFERENCE (refmpn_xor_n);
850 1.1 mrg
851 1.1 mrg p = ¶m[TYPE_XNOR_N];
852 1.1 mrg COPY (TYPE_AND_N);
853 1.1 mrg REFERENCE (refmpn_xnor_n);
854 1.1 mrg
855 1.1 mrg
856 1.1 mrg p = ¶m[TYPE_ADDSUB_N];
857 1.1 mrg p->retval = 1;
858 1.1 mrg p->dst[0] = 1;
859 1.1 mrg p->dst[1] = 1;
860 1.1 mrg p->src[0] = 1;
861 1.1 mrg p->src[1] = 1;
862 1.1 mrg REFERENCE (refmpn_add_n_sub_n);
863 1.1 mrg
864 1.1 mrg p = ¶m[TYPE_ADDSUB_NC];
865 1.1 mrg COPY (TYPE_ADDSUB_N);
866 1.1 mrg p->carry = CARRY_4;
867 1.1 mrg REFERENCE (refmpn_add_n_sub_nc);
868 1.1 mrg
869 1.1 mrg
870 1.1 mrg p = ¶m[TYPE_COPY];
871 1.1 mrg p->dst[0] = 1;
872 1.1 mrg p->src[0] = 1;
873 1.1 mrg p->overlap = OVERLAP_NONE;
874 1.1 mrg p->size = SIZE_ALLOW_ZERO;
875 1.1 mrg REFERENCE (refmpn_copy);
876 1.1 mrg
877 1.1 mrg p = ¶m[TYPE_COPYI];
878 1.1 mrg p->dst[0] = 1;
879 1.1 mrg p->src[0] = 1;
880 1.1 mrg p->overlap = OVERLAP_LOW_TO_HIGH;
881 1.1 mrg p->size = SIZE_ALLOW_ZERO;
882 1.1 mrg REFERENCE (refmpn_copyi);
883 1.1 mrg
884 1.1 mrg p = ¶m[TYPE_COPYD];
885 1.1 mrg p->dst[0] = 1;
886 1.1 mrg p->src[0] = 1;
887 1.1 mrg p->overlap = OVERLAP_HIGH_TO_LOW;
888 1.1 mrg p->size = SIZE_ALLOW_ZERO;
889 1.1 mrg REFERENCE (refmpn_copyd);
890 1.1 mrg
891 1.1 mrg p = ¶m[TYPE_COM];
892 1.1 mrg p->dst[0] = 1;
893 1.1 mrg p->src[0] = 1;
894 1.1 mrg REFERENCE (refmpn_com);
895 1.1 mrg
896 1.1 mrg
897 1.1 mrg p = ¶m[TYPE_ADDLSH1_N];
898 1.1 mrg COPY (TYPE_ADD_N);
899 1.1 mrg REFERENCE (refmpn_addlsh1_n);
900 1.1 mrg
901 1.1 mrg p = ¶m[TYPE_ADDLSH2_N];
902 1.1 mrg COPY (TYPE_ADD_N);
903 1.1 mrg REFERENCE (refmpn_addlsh2_n);
904 1.1 mrg
905 1.1 mrg p = ¶m[TYPE_ADDLSH_N];
906 1.1 mrg COPY (TYPE_ADD_N);
907 1.1 mrg p->shift = 1;
908 1.1 mrg REFERENCE (refmpn_addlsh_n);
909 1.1 mrg
910 1.1 mrg p = ¶m[TYPE_SUBLSH1_N];
911 1.1 mrg COPY (TYPE_ADD_N);
912 1.1 mrg REFERENCE (refmpn_sublsh1_n);
913 1.1 mrg
914 1.1 mrg p = ¶m[TYPE_SUBLSH_N];
915 1.1 mrg COPY (TYPE_ADDLSH_N);
916 1.1 mrg REFERENCE (refmpn_sublsh_n);
917 1.1 mrg
918 1.1 mrg p = ¶m[TYPE_RSBLSH1_N];
919 1.1 mrg COPY (TYPE_ADD_N);
920 1.1 mrg REFERENCE (refmpn_rsblsh1_n);
921 1.1 mrg
922 1.1 mrg p = ¶m[TYPE_RSBLSH2_N];
923 1.1 mrg COPY (TYPE_ADD_N);
924 1.1 mrg REFERENCE (refmpn_rsblsh2_n);
925 1.1 mrg
926 1.1 mrg p = ¶m[TYPE_RSBLSH_N];
927 1.1 mrg COPY (TYPE_ADDLSH_N);
928 1.1 mrg REFERENCE (refmpn_rsblsh_n);
929 1.1 mrg
930 1.1 mrg p = ¶m[TYPE_RSH1ADD_N];
931 1.1 mrg COPY (TYPE_ADD_N);
932 1.1 mrg REFERENCE (refmpn_rsh1add_n);
933 1.1 mrg
934 1.1 mrg p = ¶m[TYPE_RSH1SUB_N];
935 1.1 mrg COPY (TYPE_ADD_N);
936 1.1 mrg REFERENCE (refmpn_rsh1sub_n);
937 1.1 mrg
938 1.1 mrg
939 1.1 mrg p = ¶m[TYPE_MOD_1];
940 1.1 mrg p->retval = 1;
941 1.1 mrg p->src[0] = 1;
942 1.1 mrg p->size = SIZE_ALLOW_ZERO;
943 1.1 mrg p->divisor = DIVISOR_LIMB;
944 1.1 mrg REFERENCE (refmpn_mod_1);
945 1.1 mrg
946 1.1 mrg p = ¶m[TYPE_MOD_1C];
947 1.1 mrg COPY (TYPE_MOD_1);
948 1.1 mrg p->carry = CARRY_DIVISOR;
949 1.1 mrg REFERENCE (refmpn_mod_1c);
950 1.1 mrg
951 1.1 mrg p = ¶m[TYPE_DIVMOD_1];
952 1.1 mrg COPY (TYPE_MOD_1);
953 1.1 mrg p->dst[0] = 1;
954 1.1 mrg REFERENCE (refmpn_divmod_1);
955 1.1 mrg
956 1.1 mrg p = ¶m[TYPE_DIVMOD_1C];
957 1.1 mrg COPY (TYPE_DIVMOD_1);
958 1.1 mrg p->carry = CARRY_DIVISOR;
959 1.1 mrg REFERENCE (refmpn_divmod_1c);
960 1.1 mrg
961 1.1 mrg p = ¶m[TYPE_DIVREM_1];
962 1.1 mrg COPY (TYPE_DIVMOD_1);
963 1.1 mrg p->size2 = SIZE_FRACTION;
964 1.1 mrg p->dst_size[0] = SIZE_SUM;
965 1.1 mrg REFERENCE (refmpn_divrem_1);
966 1.1 mrg
967 1.1 mrg p = ¶m[TYPE_DIVREM_1C];
968 1.1 mrg COPY (TYPE_DIVREM_1);
969 1.1 mrg p->carry = CARRY_DIVISOR;
970 1.1 mrg REFERENCE (refmpn_divrem_1c);
971 1.1 mrg
972 1.1 mrg p = ¶m[TYPE_PREINV_DIVREM_1];
973 1.1 mrg COPY (TYPE_DIVREM_1);
974 1.1 mrg p->size = SIZE_YES; /* ie. no size==0 */
975 1.1 mrg REFERENCE (refmpn_preinv_divrem_1);
976 1.1 mrg
977 1.1 mrg p = ¶m[TYPE_PREINV_MOD_1];
978 1.1 mrg p->retval = 1;
979 1.1 mrg p->src[0] = 1;
980 1.1 mrg p->divisor = DIVISOR_NORM;
981 1.1 mrg REFERENCE (refmpn_preinv_mod_1);
982 1.1 mrg
983 1.1 mrg p = ¶m[TYPE_MOD_34LSUB1];
984 1.1 mrg p->retval = 1;
985 1.1 mrg p->src[0] = 1;
986 1.1 mrg VALIDATE (validate_mod_34lsub1);
987 1.1 mrg
988 1.1 mrg p = ¶m[TYPE_UDIV_QRNND];
989 1.1 mrg p->retval = 1;
990 1.1 mrg p->src[0] = 1;
991 1.1 mrg p->dst[0] = 1;
992 1.1 mrg p->dst_size[0] = SIZE_1;
993 1.1 mrg p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
994 1.1 mrg p->data = DATA_UDIV_QRNND;
995 1.1 mrg p->overlap = OVERLAP_NONE;
996 1.1 mrg REFERENCE (refmpn_udiv_qrnnd);
997 1.1 mrg
998 1.1 mrg p = ¶m[TYPE_UDIV_QRNND_R];
999 1.1 mrg COPY (TYPE_UDIV_QRNND);
1000 1.1 mrg REFERENCE (refmpn_udiv_qrnnd_r);
1001 1.1 mrg
1002 1.1 mrg
1003 1.1 mrg p = ¶m[TYPE_DIVEXACT_1];
1004 1.1 mrg p->dst[0] = 1;
1005 1.1 mrg p->src[0] = 1;
1006 1.1 mrg p->divisor = DIVISOR_LIMB;
1007 1.1 mrg p->data = DATA_MULTIPLE_DIVISOR;
1008 1.1 mrg VALIDATE (validate_divexact_1);
1009 1.1 mrg REFERENCE (refmpn_divmod_1);
1010 1.1 mrg
1011 1.1 mrg
1012 1.1 mrg p = ¶m[TYPE_DIVEXACT_BY3];
1013 1.1 mrg p->retval = 1;
1014 1.1 mrg p->dst[0] = 1;
1015 1.1 mrg p->src[0] = 1;
1016 1.1 mrg REFERENCE (refmpn_divexact_by3);
1017 1.1 mrg
1018 1.1 mrg p = ¶m[TYPE_DIVEXACT_BY3C];
1019 1.1 mrg COPY (TYPE_DIVEXACT_BY3);
1020 1.1 mrg p->carry = CARRY_3;
1021 1.1 mrg REFERENCE (refmpn_divexact_by3c);
1022 1.1 mrg
1023 1.1 mrg
1024 1.1 mrg p = ¶m[TYPE_MODEXACT_1_ODD];
1025 1.1 mrg p->retval = 1;
1026 1.1 mrg p->src[0] = 1;
1027 1.1 mrg p->divisor = DIVISOR_ODD;
1028 1.1 mrg VALIDATE (validate_modexact_1_odd);
1029 1.1 mrg
1030 1.1 mrg p = ¶m[TYPE_MODEXACT_1C_ODD];
1031 1.1 mrg COPY (TYPE_MODEXACT_1_ODD);
1032 1.1 mrg p->carry = CARRY_LIMB;
1033 1.1 mrg VALIDATE (validate_modexact_1c_odd);
1034 1.1 mrg
1035 1.1 mrg
1036 1.1 mrg p = ¶m[TYPE_GCD_1];
1037 1.1 mrg p->retval = 1;
1038 1.1 mrg p->src[0] = 1;
1039 1.1 mrg p->data = DATA_NON_ZERO;
1040 1.1 mrg p->divisor = DIVISOR_LIMB;
1041 1.1 mrg REFERENCE (refmpn_gcd_1);
1042 1.1 mrg
1043 1.1 mrg p = ¶m[TYPE_GCD];
1044 1.1 mrg p->retval = 1;
1045 1.1 mrg p->dst[0] = 1;
1046 1.1 mrg p->src[0] = 1;
1047 1.1 mrg p->src[1] = 1;
1048 1.1 mrg p->size2 = 1;
1049 1.1 mrg p->dst_size[0] = SIZE_RETVAL;
1050 1.1 mrg p->overlap = OVERLAP_NOT_SRCS;
1051 1.1 mrg p->data = DATA_GCD;
1052 1.1 mrg REFERENCE (refmpn_gcd);
1053 1.1 mrg
1054 1.1 mrg
1055 1.1 mrg p = ¶m[TYPE_MPZ_JACOBI];
1056 1.1 mrg p->retval = 1;
1057 1.1 mrg p->src[0] = 1;
1058 1.1 mrg p->size = SIZE_ALLOW_ZERO;
1059 1.1 mrg p->src[1] = 1;
1060 1.1 mrg p->data = DATA_SRC1_ODD;
1061 1.1 mrg p->size2 = 1;
1062 1.1 mrg p->carry = CARRY_4;
1063 1.1 mrg p->carry_sign = 1;
1064 1.1 mrg REFERENCE (refmpz_jacobi);
1065 1.1 mrg
1066 1.1 mrg p = ¶m[TYPE_MPZ_KRONECKER];
1067 1.1 mrg COPY (TYPE_MPZ_JACOBI);
1068 1.1 mrg p->data = 0; /* clear inherited DATA_SRC1_ODD */
1069 1.1 mrg REFERENCE (refmpz_kronecker);
1070 1.1 mrg
1071 1.1 mrg
1072 1.1 mrg p = ¶m[TYPE_MPZ_KRONECKER_UI];
1073 1.1 mrg p->retval = 1;
1074 1.1 mrg p->src[0] = 1;
1075 1.1 mrg p->size = SIZE_ALLOW_ZERO;
1076 1.1 mrg p->multiplier = 1;
1077 1.1 mrg p->carry = CARRY_BIT;
1078 1.1 mrg REFERENCE (refmpz_kronecker_ui);
1079 1.1 mrg
1080 1.1 mrg p = ¶m[TYPE_MPZ_KRONECKER_SI];
1081 1.1 mrg COPY (TYPE_MPZ_KRONECKER_UI);
1082 1.1 mrg REFERENCE (refmpz_kronecker_si);
1083 1.1 mrg
1084 1.1 mrg p = ¶m[TYPE_MPZ_UI_KRONECKER];
1085 1.1 mrg COPY (TYPE_MPZ_KRONECKER_UI);
1086 1.1 mrg REFERENCE (refmpz_ui_kronecker);
1087 1.1 mrg
1088 1.1 mrg p = ¶m[TYPE_MPZ_SI_KRONECKER];
1089 1.1 mrg COPY (TYPE_MPZ_KRONECKER_UI);
1090 1.1 mrg REFERENCE (refmpz_si_kronecker);
1091 1.1 mrg
1092 1.1 mrg
1093 1.1 mrg p = ¶m[TYPE_SQR];
1094 1.1 mrg p->dst[0] = 1;
1095 1.1 mrg p->src[0] = 1;
1096 1.1 mrg p->dst_size[0] = SIZE_SUM;
1097 1.1 mrg p->overlap = OVERLAP_NONE;
1098 1.1 mrg REFERENCE (refmpn_sqr);
1099 1.1 mrg
1100 1.1 mrg p = ¶m[TYPE_MUL_N];
1101 1.1 mrg COPY (TYPE_SQR);
1102 1.1 mrg p->src[1] = 1;
1103 1.1 mrg REFERENCE (refmpn_mul_n);
1104 1.1 mrg
1105 1.1 mrg p = ¶m[TYPE_MULLO_N];
1106 1.1 mrg COPY (TYPE_MUL_N);
1107 1.1 mrg p->dst_size[0] = 0;
1108 1.1 mrg REFERENCE (refmpn_mullo_n);
1109 1.1 mrg
1110 1.1 mrg p = ¶m[TYPE_MUL_MN];
1111 1.1 mrg COPY (TYPE_MUL_N);
1112 1.1 mrg p->size2 = 1;
1113 1.1 mrg REFERENCE (refmpn_mul_basecase);
1114 1.1 mrg
1115 1.1 mrg p = ¶m[TYPE_UMUL_PPMM];
1116 1.1 mrg p->retval = 1;
1117 1.1 mrg p->src[0] = 1;
1118 1.1 mrg p->dst[0] = 1;
1119 1.1 mrg p->dst_size[0] = SIZE_1;
1120 1.1 mrg p->overlap = OVERLAP_NONE;
1121 1.1 mrg REFERENCE (refmpn_umul_ppmm);
1122 1.1 mrg
1123 1.1 mrg p = ¶m[TYPE_UMUL_PPMM_R];
1124 1.1 mrg COPY (TYPE_UMUL_PPMM);
1125 1.1 mrg REFERENCE (refmpn_umul_ppmm_r);
1126 1.1 mrg
1127 1.1 mrg
1128 1.1 mrg p = ¶m[TYPE_RSHIFT];
1129 1.1 mrg p->retval = 1;
1130 1.1 mrg p->dst[0] = 1;
1131 1.1 mrg p->src[0] = 1;
1132 1.1 mrg p->shift = 1;
1133 1.1 mrg p->overlap = OVERLAP_LOW_TO_HIGH;
1134 1.1 mrg REFERENCE (refmpn_rshift);
1135 1.1 mrg
1136 1.1 mrg p = ¶m[TYPE_LSHIFT];
1137 1.1 mrg COPY (TYPE_RSHIFT);
1138 1.1 mrg p->overlap = OVERLAP_HIGH_TO_LOW;
1139 1.1 mrg REFERENCE (refmpn_lshift);
1140 1.1 mrg
1141 1.1 mrg p = ¶m[TYPE_LSHIFTC];
1142 1.1 mrg COPY (TYPE_RSHIFT);
1143 1.1 mrg p->overlap = OVERLAP_HIGH_TO_LOW;
1144 1.1 mrg REFERENCE (refmpn_lshiftc);
1145 1.1 mrg
1146 1.1 mrg
1147 1.1 mrg p = ¶m[TYPE_POPCOUNT];
1148 1.1 mrg p->retval = 1;
1149 1.1 mrg p->src[0] = 1;
1150 1.1 mrg REFERENCE (refmpn_popcount);
1151 1.1 mrg
1152 1.1 mrg p = ¶m[TYPE_HAMDIST];
1153 1.1 mrg COPY (TYPE_POPCOUNT);
1154 1.1 mrg p->src[1] = 1;
1155 1.1 mrg REFERENCE (refmpn_hamdist);
1156 1.1 mrg
1157 1.1 mrg
1158 1.1 mrg p = ¶m[TYPE_SBPI1_DIV_QR];
1159 1.1 mrg p->retval = 1;
1160 1.1 mrg p->dst[0] = 1;
1161 1.1 mrg p->dst[1] = 1;
1162 1.1 mrg p->src[0] = 1;
1163 1.1 mrg p->src[1] = 1;
1164 1.1 mrg p->data = DATA_SRC1_HIGHBIT;
1165 1.1 mrg p->size2 = 1;
1166 1.1 mrg p->dst_size[0] = SIZE_DIFF;
1167 1.1 mrg p->overlap = OVERLAP_NONE;
1168 1.1 mrg REFERENCE (refmpn_sb_div_qr);
1169 1.1 mrg
1170 1.1 mrg p = ¶m[TYPE_TDIV_QR];
1171 1.1 mrg p->dst[0] = 1;
1172 1.1 mrg p->dst[1] = 1;
1173 1.1 mrg p->src[0] = 1;
1174 1.1 mrg p->src[1] = 1;
1175 1.1 mrg p->size2 = 1;
1176 1.1 mrg p->dst_size[0] = SIZE_DIFF_PLUS_1;
1177 1.1 mrg p->dst_size[1] = SIZE_SIZE2;
1178 1.1 mrg p->overlap = OVERLAP_NONE;
1179 1.1 mrg REFERENCE (refmpn_tdiv_qr);
1180 1.1 mrg
1181 1.1 mrg p = ¶m[TYPE_SQRTREM];
1182 1.1 mrg p->retval = 1;
1183 1.1 mrg p->dst[0] = 1;
1184 1.1 mrg p->dst[1] = 1;
1185 1.1 mrg p->src[0] = 1;
1186 1.1 mrg p->dst_size[0] = SIZE_CEIL_HALF;
1187 1.1 mrg p->dst_size[1] = SIZE_RETVAL;
1188 1.1 mrg p->overlap = OVERLAP_NONE;
1189 1.1 mrg VALIDATE (validate_sqrtrem);
1190 1.1 mrg REFERENCE (refmpn_sqrtrem);
1191 1.1 mrg
1192 1.1 mrg p = ¶m[TYPE_ZERO];
1193 1.1 mrg p->dst[0] = 1;
1194 1.1 mrg p->size = SIZE_ALLOW_ZERO;
1195 1.1 mrg REFERENCE (refmpn_zero);
1196 1.1 mrg
1197 1.1 mrg p = ¶m[TYPE_GET_STR];
1198 1.1 mrg p->retval = 1;
1199 1.1 mrg p->src[0] = 1;
1200 1.1 mrg p->size = SIZE_ALLOW_ZERO;
1201 1.1 mrg p->dst[0] = 1;
1202 1.1 mrg p->dst[1] = 1;
1203 1.1 mrg p->dst_size[0] = SIZE_GET_STR;
1204 1.1 mrg p->dst_bytes[0] = 1;
1205 1.1 mrg p->overlap = OVERLAP_NONE;
1206 1.1 mrg REFERENCE (refmpn_get_str);
1207 1.1 mrg
1208 1.1 mrg p = ¶m[TYPE_BINVERT];
1209 1.1 mrg p->dst[0] = 1;
1210 1.1 mrg p->src[0] = 1;
1211 1.1 mrg p->data = DATA_SRC0_ODD;
1212 1.1 mrg p->overlap = OVERLAP_NONE;
1213 1.1 mrg REFERENCE (refmpn_binvert);
1214 1.1 mrg
1215 1.1 mrg p = ¶m[TYPE_INVERT];
1216 1.1 mrg p->dst[0] = 1;
1217 1.1 mrg p->src[0] = 1;
1218 1.1 mrg p->data = DATA_SRC0_HIGHBIT;
1219 1.1 mrg p->overlap = OVERLAP_NONE;
1220 1.1 mrg REFERENCE (refmpn_invert);
1221 1.1 mrg
1222 1.1 mrg #ifdef EXTRA_PARAM_INIT
1223 1.1 mrg EXTRA_PARAM_INIT
1224 1.1 mrg #endif
1225 1.1 mrg }
1226 1.1 mrg
1227 1.1 mrg
1228 1.1 mrg /* The following are macros if there's no native versions, so wrap them in
1229 1.1 mrg functions that can be in try_array[]. */
1230 1.1 mrg
1231 1.1 mrg void
1232 1.1 mrg MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1233 1.1 mrg { MPN_COPY (rp, sp, size); }
1234 1.1 mrg
1235 1.1 mrg void
1236 1.1 mrg MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1237 1.1 mrg { MPN_COPY_INCR (rp, sp, size); }
1238 1.1 mrg
1239 1.1 mrg void
1240 1.1 mrg MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1241 1.1 mrg { MPN_COPY_DECR (rp, sp, size); }
1242 1.1 mrg
1243 1.1 mrg void
1244 1.1 mrg __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1245 1.1 mrg { __GMPN_COPY (rp, sp, size); }
1246 1.1 mrg
1247 1.1 mrg #ifdef __GMPN_COPY_INCR
1248 1.1 mrg void
1249 1.1 mrg __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1250 1.1 mrg { __GMPN_COPY_INCR (rp, sp, size); }
1251 1.1 mrg #endif
1252 1.1 mrg
1253 1.1 mrg void
1254 1.1 mrg mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1255 1.1 mrg { mpn_com (rp, sp, size); }
1256 1.1 mrg
1257 1.1 mrg void
1258 1.1 mrg mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1259 1.1 mrg { mpn_and_n (rp, s1, s2, size); }
1260 1.1 mrg
1261 1.1 mrg void
1262 1.1 mrg mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1263 1.1 mrg { mpn_andn_n (rp, s1, s2, size); }
1264 1.1 mrg
1265 1.1 mrg void
1266 1.1 mrg mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1267 1.1 mrg { mpn_nand_n (rp, s1, s2, size); }
1268 1.1 mrg
1269 1.1 mrg void
1270 1.1 mrg mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1271 1.1 mrg { mpn_ior_n (rp, s1, s2, size); }
1272 1.1 mrg
1273 1.1 mrg void
1274 1.1 mrg mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1275 1.1 mrg { mpn_iorn_n (rp, s1, s2, size); }
1276 1.1 mrg
1277 1.1 mrg void
1278 1.1 mrg mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1279 1.1 mrg { mpn_nior_n (rp, s1, s2, size); }
1280 1.1 mrg
1281 1.1 mrg void
1282 1.1 mrg mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1283 1.1 mrg { mpn_xor_n (rp, s1, s2, size); }
1284 1.1 mrg
1285 1.1 mrg void
1286 1.1 mrg mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1287 1.1 mrg { mpn_xnor_n (rp, s1, s2, size); }
1288 1.1 mrg
1289 1.1 mrg mp_limb_t
1290 1.1 mrg udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1291 1.1 mrg {
1292 1.1 mrg mp_limb_t q;
1293 1.1 mrg udiv_qrnnd (q, *remptr, n1, n0, d);
1294 1.1 mrg return q;
1295 1.1 mrg }
1296 1.1 mrg
1297 1.1 mrg mp_limb_t
1298 1.1 mrg mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1299 1.1 mrg {
1300 1.1 mrg return mpn_divexact_by3 (rp, sp, size);
1301 1.1 mrg }
1302 1.1 mrg
1303 1.1 mrg mp_limb_t
1304 1.1 mrg mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1305 1.1 mrg {
1306 1.1 mrg return mpn_modexact_1_odd (ptr, size, divisor);
1307 1.1 mrg }
1308 1.1 mrg
1309 1.1 mrg void
1310 1.1 mrg mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1311 1.1 mrg {
1312 1.1 mrg mp_ptr tspace;
1313 1.1 mrg TMP_DECL;
1314 1.1 mrg TMP_MARK;
1315 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1316 1.1 mrg mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1317 1.1 mrg TMP_FREE;
1318 1.1 mrg }
1319 1.1 mrg void
1320 1.1 mrg mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1321 1.1 mrg {
1322 1.1 mrg mp_ptr tspace;
1323 1.1 mrg TMP_DECL;
1324 1.1 mrg TMP_MARK;
1325 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1326 1.1 mrg mpn_toom2_sqr (dst, src, size, tspace);
1327 1.1 mrg TMP_FREE;
1328 1.1 mrg }
1329 1.1 mrg void
1330 1.1 mrg mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1331 1.1 mrg {
1332 1.1 mrg mp_ptr tspace;
1333 1.1 mrg TMP_DECL;
1334 1.1 mrg TMP_MARK;
1335 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1336 1.1 mrg mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1337 1.1 mrg TMP_FREE;
1338 1.1 mrg }
1339 1.1 mrg void
1340 1.1 mrg mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1341 1.1 mrg {
1342 1.1 mrg mp_ptr tspace;
1343 1.1 mrg TMP_DECL;
1344 1.1 mrg TMP_MARK;
1345 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1346 1.1 mrg mpn_toom3_sqr (dst, src, size, tspace);
1347 1.1 mrg TMP_FREE;
1348 1.1 mrg }
1349 1.1 mrg void
1350 1.1 mrg mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1351 1.1 mrg {
1352 1.1 mrg mp_ptr tspace;
1353 1.1 mrg TMP_DECL;
1354 1.1 mrg TMP_MARK;
1355 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1356 1.1 mrg mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1357 1.1 mrg TMP_FREE;
1358 1.1 mrg }
1359 1.1 mrg void
1360 1.1 mrg mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1361 1.1 mrg {
1362 1.1 mrg mp_ptr tspace;
1363 1.1 mrg TMP_DECL;
1364 1.1 mrg TMP_MARK;
1365 1.1 mrg tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1366 1.1 mrg mpn_toom4_sqr (dst, src, size, tspace);
1367 1.1 mrg TMP_FREE;
1368 1.1 mrg }
1369 1.1 mrg
1370 1.1 mrg mp_limb_t
1371 1.1 mrg umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1372 1.1 mrg {
1373 1.1 mrg mp_limb_t high;
1374 1.1 mrg umul_ppmm (high, *lowptr, m1, m2);
1375 1.1 mrg return high;
1376 1.1 mrg }
1377 1.1 mrg
1378 1.1 mrg void
1379 1.1 mrg MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1380 1.1 mrg { MPN_ZERO (ptr, size); }
1381 1.1 mrg
1382 1.1 mrg
1383 1.1 mrg struct choice_t {
1384 1.1 mrg const char *name;
1385 1.1 mrg tryfun_t function;
1386 1.1 mrg int type;
1387 1.1 mrg mp_size_t minsize;
1388 1.1 mrg };
1389 1.1 mrg
1390 1.1 mrg #if HAVE_STRINGIZE
1391 1.1 mrg #define TRY(fun) #fun, (tryfun_t) fun
1392 1.1 mrg #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1393 1.1 mrg #else
1394 1.1 mrg #define TRY(fun) "fun", (tryfun_t) fun
1395 1.1 mrg #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1396 1.1 mrg #endif
1397 1.1 mrg
1398 1.1 mrg const struct choice_t choice_array[] = {
1399 1.1 mrg { TRY(mpn_add), TYPE_ADD },
1400 1.1 mrg { TRY(mpn_sub), TYPE_SUB },
1401 1.1 mrg
1402 1.1 mrg { TRY(mpn_add_n), TYPE_ADD_N },
1403 1.1 mrg { TRY(mpn_sub_n), TYPE_SUB_N },
1404 1.1 mrg
1405 1.1 mrg #if HAVE_NATIVE_mpn_add_nc
1406 1.1 mrg { TRY(mpn_add_nc), TYPE_ADD_NC },
1407 1.1 mrg #endif
1408 1.1 mrg #if HAVE_NATIVE_mpn_sub_nc
1409 1.1 mrg { TRY(mpn_sub_nc), TYPE_SUB_NC },
1410 1.1 mrg #endif
1411 1.1 mrg
1412 1.1 mrg #if HAVE_NATIVE_mpn_add_n_sub_n
1413 1.1 mrg { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1414 1.1 mrg #endif
1415 1.1 mrg #if HAVE_NATIVE_mpn_add_n_sub_nc
1416 1.1 mrg { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1417 1.1 mrg #endif
1418 1.1 mrg
1419 1.1 mrg { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1420 1.1 mrg { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1421 1.1 mrg #if HAVE_NATIVE_mpn_addmul_1c
1422 1.1 mrg { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1423 1.1 mrg #endif
1424 1.1 mrg #if HAVE_NATIVE_mpn_submul_1c
1425 1.1 mrg { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1426 1.1 mrg #endif
1427 1.1 mrg
1428 1.1 mrg #if HAVE_NATIVE_mpn_addmul_2
1429 1.1 mrg { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1430 1.1 mrg #endif
1431 1.1 mrg #if HAVE_NATIVE_mpn_addmul_3
1432 1.1 mrg { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1433 1.1 mrg #endif
1434 1.1 mrg #if HAVE_NATIVE_mpn_addmul_4
1435 1.1 mrg { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1436 1.1 mrg #endif
1437 1.1 mrg #if HAVE_NATIVE_mpn_addmul_5
1438 1.1 mrg { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1439 1.1 mrg #endif
1440 1.1 mrg #if HAVE_NATIVE_mpn_addmul_6
1441 1.1 mrg { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1442 1.1 mrg #endif
1443 1.1 mrg #if HAVE_NATIVE_mpn_addmul_7
1444 1.1 mrg { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1445 1.1 mrg #endif
1446 1.1 mrg #if HAVE_NATIVE_mpn_addmul_8
1447 1.1 mrg { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1448 1.1 mrg #endif
1449 1.1 mrg
1450 1.1 mrg { TRY_FUNFUN(mpn_com), TYPE_COM },
1451 1.1 mrg
1452 1.1 mrg { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1453 1.1 mrg { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1454 1.1 mrg { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1455 1.1 mrg
1456 1.1 mrg { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1457 1.1 mrg #ifdef __GMPN_COPY_INCR
1458 1.1 mrg { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1459 1.1 mrg #endif
1460 1.1 mrg
1461 1.1 mrg #if HAVE_NATIVE_mpn_copyi
1462 1.1 mrg { TRY(mpn_copyi), TYPE_COPYI },
1463 1.1 mrg #endif
1464 1.1 mrg #if HAVE_NATIVE_mpn_copyd
1465 1.1 mrg { TRY(mpn_copyd), TYPE_COPYD },
1466 1.1 mrg #endif
1467 1.1 mrg
1468 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
1469 1.1 mrg { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1470 1.1 mrg #endif
1471 1.1 mrg #if HAVE_NATIVE_mpn_addlsh2_n
1472 1.1 mrg { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1473 1.1 mrg #endif
1474 1.1 mrg #if HAVE_NATIVE_mpn_addlsh_n
1475 1.1 mrg { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1476 1.1 mrg #endif
1477 1.1 mrg #if HAVE_NATIVE_mpn_sublsh1_n
1478 1.1 mrg { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1479 1.1 mrg #endif
1480 1.1 mrg #if HAVE_NATIVE_mpn_sublsh_n
1481 1.1 mrg { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1482 1.1 mrg #endif
1483 1.1 mrg #if HAVE_NATIVE_mpn_rsblsh1_n
1484 1.1 mrg { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1485 1.1 mrg #endif
1486 1.1 mrg #if HAVE_NATIVE_mpn_rsblsh2_n
1487 1.1 mrg { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1488 1.1 mrg #endif
1489 1.1 mrg #if HAVE_NATIVE_mpn_rsblsh_n
1490 1.1 mrg { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1491 1.1 mrg #endif
1492 1.1 mrg #if HAVE_NATIVE_mpn_rsh1add_n
1493 1.1 mrg { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1494 1.1 mrg #endif
1495 1.1 mrg #if HAVE_NATIVE_mpn_rsh1sub_n
1496 1.1 mrg { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1497 1.1 mrg #endif
1498 1.1 mrg
1499 1.1 mrg { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1500 1.1 mrg { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1501 1.1 mrg { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1502 1.1 mrg { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1503 1.1 mrg { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1504 1.1 mrg { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1505 1.1 mrg { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1506 1.1 mrg { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1507 1.1 mrg
1508 1.1 mrg { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1509 1.1 mrg #if USE_PREINV_DIVREM_1
1510 1.1 mrg { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1511 1.1 mrg #endif
1512 1.1 mrg { TRY(mpn_mod_1), TYPE_MOD_1 },
1513 1.1 mrg #if USE_PREINV_MOD_1
1514 1.1 mrg { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1515 1.1 mrg #endif
1516 1.1 mrg #if HAVE_NATIVE_mpn_divrem_1c
1517 1.1 mrg { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1518 1.1 mrg #endif
1519 1.1 mrg #if HAVE_NATIVE_mpn_mod_1c
1520 1.1 mrg { TRY(mpn_mod_1c), TYPE_MOD_1C },
1521 1.1 mrg #endif
1522 1.1 mrg #if GMP_NUMB_BITS % 4 == 0
1523 1.1 mrg { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1524 1.1 mrg #endif
1525 1.1 mrg
1526 1.1 mrg { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1527 1.1 mrg #if HAVE_NATIVE_mpn_udiv_qrnnd
1528 1.1 mrg { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1529 1.1 mrg #endif
1530 1.1 mrg #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1531 1.1 mrg { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1532 1.1 mrg #endif
1533 1.1 mrg
1534 1.1 mrg { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1535 1.1 mrg { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1536 1.1 mrg { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1537 1.1 mrg
1538 1.1 mrg { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1539 1.1 mrg { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1540 1.1 mrg
1541 1.1 mrg
1542 1.1 mrg { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1543 1.1 mrg { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1544 1.1 mrg
1545 1.1 mrg { TRY(mpn_mul_1), TYPE_MUL_1 },
1546 1.1 mrg #if HAVE_NATIVE_mpn_mul_1c
1547 1.1 mrg { TRY(mpn_mul_1c), TYPE_MUL_1C },
1548 1.1 mrg #endif
1549 1.1 mrg #if HAVE_NATIVE_mpn_mul_2
1550 1.1 mrg { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1551 1.1 mrg #endif
1552 1.1 mrg #if HAVE_NATIVE_mpn_mul_3
1553 1.1 mrg { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1554 1.1 mrg #endif
1555 1.1 mrg #if HAVE_NATIVE_mpn_mul_4
1556 1.1 mrg { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1557 1.1 mrg #endif
1558 1.1 mrg
1559 1.1 mrg { TRY(mpn_rshift), TYPE_RSHIFT },
1560 1.1 mrg { TRY(mpn_lshift), TYPE_LSHIFT },
1561 1.1 mrg { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1562 1.1 mrg
1563 1.1 mrg
1564 1.1 mrg { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1565 1.1 mrg { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1566 1.1 mrg #if SQR_TOOM2_THRESHOLD > 0
1567 1.1 mrg { TRY(mpn_sqr_basecase), TYPE_SQR },
1568 1.1 mrg #endif
1569 1.1 mrg
1570 1.1 mrg { TRY(mpn_mul), TYPE_MUL_MN },
1571 1.1 mrg { TRY(mpn_mul_n), TYPE_MUL_N },
1572 1.1 mrg { TRY(mpn_sqr), TYPE_SQR },
1573 1.1 mrg
1574 1.1 mrg { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1575 1.1 mrg #if HAVE_NATIVE_mpn_umul_ppmm
1576 1.1 mrg { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1577 1.1 mrg #endif
1578 1.1 mrg #if HAVE_NATIVE_mpn_umul_ppmm_r
1579 1.1 mrg { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1580 1.1 mrg #endif
1581 1.1 mrg
1582 1.1 mrg { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1583 1.1 mrg { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1584 1.1 mrg { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1585 1.1 mrg { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1586 1.1 mrg { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1587 1.1 mrg { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1588 1.1 mrg
1589 1.1 mrg { TRY(mpn_gcd_1), TYPE_GCD_1 },
1590 1.1 mrg { TRY(mpn_gcd), TYPE_GCD },
1591 1.1 mrg { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1592 1.1 mrg { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1593 1.1 mrg { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1594 1.1 mrg { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1595 1.1 mrg { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1596 1.1 mrg
1597 1.1 mrg { TRY(mpn_popcount), TYPE_POPCOUNT },
1598 1.1 mrg { TRY(mpn_hamdist), TYPE_HAMDIST },
1599 1.1 mrg
1600 1.1 mrg { TRY(mpn_sqrtrem), TYPE_SQRTREM },
1601 1.1 mrg
1602 1.1 mrg { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1603 1.1 mrg
1604 1.1 mrg { TRY(mpn_get_str), TYPE_GET_STR },
1605 1.1 mrg
1606 1.1 mrg { TRY(mpn_binvert), TYPE_BINVERT },
1607 1.1 mrg { TRY(mpn_invert), TYPE_INVERT },
1608 1.1 mrg
1609 1.1 mrg #ifdef EXTRA_ROUTINES
1610 1.1 mrg EXTRA_ROUTINES
1611 1.1 mrg #endif
1612 1.1 mrg };
1613 1.1 mrg
1614 1.1 mrg const struct choice_t *choice = NULL;
1615 1.1 mrg
1616 1.1 mrg
1617 1.1 mrg void
1618 1.1 mrg mprotect_maybe (void *addr, size_t len, int prot)
1619 1.1 mrg {
1620 1.1 mrg if (!option_redzones)
1621 1.1 mrg return;
1622 1.1 mrg
1623 1.1 mrg #if HAVE_MPROTECT
1624 1.1 mrg if (mprotect (addr, len, prot) != 0)
1625 1.1 mrg {
1626 1.1 mrg fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1627 1.1 mrg addr, (unsigned) len, prot, strerror (errno));
1628 1.1 mrg exit (1);
1629 1.1 mrg }
1630 1.1 mrg #else
1631 1.1 mrg {
1632 1.1 mrg static int warned = 0;
1633 1.1 mrg if (!warned)
1634 1.1 mrg {
1635 1.1 mrg fprintf (stderr,
1636 1.1 mrg "mprotect not available, bounds testing not performed\n");
1637 1.1 mrg warned = 1;
1638 1.1 mrg }
1639 1.1 mrg }
1640 1.1 mrg #endif
1641 1.1 mrg }
1642 1.1 mrg
1643 1.1 mrg /* round "a" up to a multiple of "m" */
1644 1.1 mrg size_t
1645 1.1 mrg round_up_multiple (size_t a, size_t m)
1646 1.1 mrg {
1647 1.1 mrg unsigned long r;
1648 1.1 mrg
1649 1.1 mrg r = a % m;
1650 1.1 mrg if (r == 0)
1651 1.1 mrg return a;
1652 1.1 mrg else
1653 1.1 mrg return a + (m - r);
1654 1.1 mrg }
1655 1.1 mrg
1656 1.1 mrg
1657 1.1 mrg /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1658 1.1 mrg for instance HP-UX 10.
1659 1.1 mrg
1660 1.1 mrg mmap will almost certainly return a pointer already aligned to a page
1661 1.1 mrg boundary, but it's easy enough to share the alignment handling with the
1662 1.1 mrg malloc case. */
1663 1.1 mrg
1664 1.1 mrg void
1665 1.1 mrg malloc_region (struct region_t *r, mp_size_t n)
1666 1.1 mrg {
1667 1.1 mrg mp_ptr p;
1668 1.1 mrg size_t nbytes;
1669 1.1 mrg
1670 1.1 mrg ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1671 1.1 mrg
1672 1.1 mrg n = round_up_multiple (n, PAGESIZE_LIMBS);
1673 1.1 mrg r->size = n;
1674 1.1 mrg
1675 1.1 mrg nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
1676 1.1 mrg
1677 1.1 mrg #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
1678 1.1 mrg #define MAP_ANON MAP_ANONYMOUS
1679 1.1 mrg #endif
1680 1.1 mrg
1681 1.1 mrg #if HAVE_MMAP && defined (MAP_ANON)
1682 1.1 mrg /* note must pass fd=-1 for MAP_ANON on BSD */
1683 1.1 mrg p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1684 1.1 mrg if (p == (void *) -1)
1685 1.1 mrg {
1686 1.1 mrg fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
1687 1.1 mrg (unsigned) nbytes, strerror (errno));
1688 1.1 mrg exit (1);
1689 1.1 mrg }
1690 1.1 mrg #else
1691 1.1 mrg p = (mp_ptr) malloc (nbytes);
1692 1.1 mrg ASSERT_ALWAYS (p != NULL);
1693 1.1 mrg #endif
1694 1.1 mrg
1695 1.1 mrg p = align_pointer (p, pagesize);
1696 1.1 mrg
1697 1.1 mrg mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
1698 1.1 mrg p += REDZONE_LIMBS;
1699 1.1 mrg r->ptr = p;
1700 1.1 mrg
1701 1.1 mrg mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
1702 1.1 mrg }
1703 1.1 mrg
1704 1.1 mrg void
1705 1.1 mrg mprotect_region (const struct region_t *r, int prot)
1706 1.1 mrg {
1707 1.1 mrg mprotect_maybe (r->ptr, r->size, prot);
1708 1.1 mrg }
1709 1.1 mrg
1710 1.1 mrg
1711 1.1 mrg /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
1712 1.1 mrg and CARRY_4 */
1713 1.1 mrg mp_limb_t carry_array[] = {
1714 1.1 mrg 0, 1, 2, 3,
1715 1.1 mrg 4,
1716 1.1 mrg CNST_LIMB(1) << 8,
1717 1.1 mrg CNST_LIMB(1) << 16,
1718 1.1 mrg GMP_NUMB_MAX
1719 1.1 mrg };
1720 1.1 mrg int carry_index;
1721 1.1 mrg
1722 1.1 mrg #define CARRY_COUNT \
1723 1.1 mrg ((tr->carry == CARRY_BIT) ? 2 \
1724 1.1 mrg : tr->carry == CARRY_3 ? 3 \
1725 1.1 mrg : tr->carry == CARRY_4 ? 4 \
1726 1.1 mrg : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
1727 1.1 mrg ? numberof(carry_array) + CARRY_RANDOMS \
1728 1.1 mrg : 1)
1729 1.1 mrg
1730 1.1 mrg #define MPN_RANDOM_ALT(index,dst,size) \
1731 1.1 mrg (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
1732 1.1 mrg
1733 1.1 mrg /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1734 1.1 mrg the same type */
1735 1.1 mrg #define CARRY_ITERATION \
1736 1.1 mrg for (carry_index = 0; \
1737 1.1 mrg (carry_index < numberof (carry_array) \
1738 1.1 mrg ? (carry = carry_array[carry_index]) \
1739 1.1 mrg : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
1740 1.1 mrg (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
1741 1.1 mrg carry_index < CARRY_COUNT; \
1742 1.1 mrg carry_index++)
1743 1.1 mrg
1744 1.1 mrg
1745 1.1 mrg mp_limb_t multiplier_array[] = {
1746 1.1 mrg 0, 1, 2, 3,
1747 1.1 mrg CNST_LIMB(1) << 8,
1748 1.1 mrg CNST_LIMB(1) << 16,
1749 1.1 mrg GMP_NUMB_MAX - 2,
1750 1.1 mrg GMP_NUMB_MAX - 1,
1751 1.1 mrg GMP_NUMB_MAX
1752 1.1 mrg };
1753 1.1 mrg int multiplier_index;
1754 1.1 mrg
1755 1.1 mrg mp_limb_t divisor_array[] = {
1756 1.1 mrg 1, 2, 3,
1757 1.1 mrg CNST_LIMB(1) << 8,
1758 1.1 mrg CNST_LIMB(1) << 16,
1759 1.1 mrg CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
1760 1.1 mrg GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
1761 1.1 mrg GMP_NUMB_HIGHBIT,
1762 1.1 mrg GMP_NUMB_HIGHBIT + 1,
1763 1.1 mrg GMP_NUMB_MAX - 2,
1764 1.1 mrg GMP_NUMB_MAX - 1,
1765 1.1 mrg GMP_NUMB_MAX
1766 1.1 mrg };
1767 1.1 mrg
1768 1.1 mrg int divisor_index;
1769 1.1 mrg
1770 1.1 mrg /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1771 1.1 mrg the same type */
1772 1.1 mrg #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
1773 1.1 mrg for (index = 0; \
1774 1.1 mrg (index < numberof (array) \
1775 1.1 mrg ? (var = array[index]) \
1776 1.1 mrg : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
1777 1.1 mrg index < limit; \
1778 1.1 mrg index++)
1779 1.1 mrg
1780 1.1 mrg #define MULTIPLIER_COUNT \
1781 1.1 mrg (tr->multiplier \
1782 1.1 mrg ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
1783 1.1 mrg : 1)
1784 1.1 mrg
1785 1.1 mrg #define MULTIPLIER_ITERATION \
1786 1.1 mrg ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
1787 1.1 mrg multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
1788 1.1 mrg
1789 1.1 mrg #define DIVISOR_COUNT \
1790 1.1 mrg (tr->divisor \
1791 1.1 mrg ? numberof (divisor_array) + DIVISOR_RANDOMS \
1792 1.1 mrg : 1)
1793 1.1 mrg
1794 1.1 mrg #define DIVISOR_ITERATION \
1795 1.1 mrg ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
1796 1.1 mrg DIVISOR_RANDOMS, TRY_DIVISOR)
1797 1.1 mrg
1798 1.1 mrg
1799 1.1 mrg /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
1800 1.1 mrg d[0] or d[1] respectively, -1 means a separate (write-protected)
1801 1.1 mrg location. */
1802 1.1 mrg
1803 1.1 mrg struct overlap_t {
1804 1.1 mrg int s[NUM_SOURCES];
1805 1.1 mrg } overlap_array[] = {
1806 1.1 mrg { { -1, -1 } },
1807 1.1 mrg { { 0, -1 } },
1808 1.1 mrg { { -1, 0 } },
1809 1.1 mrg { { 0, 0 } },
1810 1.1 mrg { { 1, -1 } },
1811 1.1 mrg { { -1, 1 } },
1812 1.1 mrg { { 1, 1 } },
1813 1.1 mrg { { 0, 1 } },
1814 1.1 mrg { { 1, 0 } },
1815 1.1 mrg };
1816 1.1 mrg
1817 1.1 mrg struct overlap_t *overlap, *overlap_limit;
1818 1.1 mrg
1819 1.1 mrg #define OVERLAP_COUNT \
1820 1.1 mrg (tr->overlap & OVERLAP_NONE ? 1 \
1821 1.1 mrg : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
1822 1.1 mrg : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
1823 1.1 mrg : tr->dst[1] ? 9 \
1824 1.1 mrg : tr->src[1] ? 4 \
1825 1.1 mrg : tr->dst[0] ? 2 \
1826 1.1 mrg : 1)
1827 1.1 mrg
1828 1.1 mrg #define OVERLAP_ITERATION \
1829 1.1 mrg for (overlap = &overlap_array[0], \
1830 1.1 mrg overlap_limit = &overlap_array[OVERLAP_COUNT]; \
1831 1.1 mrg overlap < overlap_limit; \
1832 1.1 mrg overlap++)
1833 1.1 mrg
1834 1.1 mrg
1835 1.1 mrg int base = 10;
1836 1.1 mrg
1837 1.1 mrg #define T_RAND_COUNT 2
1838 1.1 mrg int t_rand;
1839 1.1 mrg
1840 1.1 mrg void
1841 1.1 mrg t_random (mp_ptr ptr, mp_size_t n)
1842 1.1 mrg {
1843 1.1 mrg if (n == 0)
1844 1.1 mrg return;
1845 1.1 mrg
1846 1.1 mrg switch (option_data) {
1847 1.1 mrg case DATA_TRAND:
1848 1.1 mrg switch (t_rand) {
1849 1.1 mrg case 0: refmpn_random (ptr, n); break;
1850 1.1 mrg case 1: refmpn_random2 (ptr, n); break;
1851 1.1 mrg default: abort();
1852 1.1 mrg }
1853 1.1 mrg break;
1854 1.1 mrg case DATA_SEQ:
1855 1.1 mrg {
1856 1.1 mrg static mp_limb_t counter = 0;
1857 1.1 mrg mp_size_t i;
1858 1.1 mrg for (i = 0; i < n; i++)
1859 1.1 mrg ptr[i] = ++counter;
1860 1.1 mrg }
1861 1.1 mrg break;
1862 1.1 mrg case DATA_ZEROS:
1863 1.1 mrg refmpn_zero (ptr, n);
1864 1.1 mrg break;
1865 1.1 mrg case DATA_FFS:
1866 1.1 mrg refmpn_fill (ptr, n, GMP_NUMB_MAX);
1867 1.1 mrg break;
1868 1.1 mrg case DATA_2FD:
1869 1.1 mrg /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
1870 1.1 mrg inducing the q1_ff special case in the mul-by-inverse part of some
1871 1.1 mrg versions of divrem_1 and mod_1. */
1872 1.1 mrg refmpn_fill (ptr, n, (mp_limb_t) -1);
1873 1.1 mrg ptr[n-1] = 2;
1874 1.1 mrg ptr[0] -= 2;
1875 1.1 mrg break;
1876 1.1 mrg
1877 1.1 mrg default:
1878 1.1 mrg abort();
1879 1.1 mrg }
1880 1.1 mrg }
1881 1.1 mrg #define T_RAND_ITERATION \
1882 1.1 mrg for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
1883 1.1 mrg
1884 1.1 mrg
1885 1.1 mrg void
1886 1.1 mrg print_each (const struct each_t *e)
1887 1.1 mrg {
1888 1.1 mrg int i;
1889 1.1 mrg
1890 1.1 mrg printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
1891 1.1 mrg if (tr->retval)
1892 1.1 mrg mpn_trace (" retval", &e->retval, 1);
1893 1.1 mrg
1894 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
1895 1.1 mrg {
1896 1.1 mrg if (tr->dst[i])
1897 1.1 mrg {
1898 1.1 mrg if (tr->dst_bytes[i])
1899 1.1 mrg byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1900 1.1 mrg else
1901 1.1 mrg mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1902 1.1 mrg printf (" located %p\n", (void *) (e->d[i].p));
1903 1.1 mrg }
1904 1.1 mrg }
1905 1.1 mrg
1906 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
1907 1.1 mrg if (tr->src[i])
1908 1.1 mrg printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
1909 1.1 mrg }
1910 1.1 mrg
1911 1.1 mrg
1912 1.1 mrg void
1913 1.1 mrg print_all (void)
1914 1.1 mrg {
1915 1.1 mrg int i;
1916 1.1 mrg
1917 1.1 mrg printf ("\n");
1918 1.1 mrg printf ("size %ld\n", (long) size);
1919 1.1 mrg if (tr->size2)
1920 1.1 mrg printf ("size2 %ld\n", (long) size2);
1921 1.1 mrg
1922 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
1923 1.1 mrg if (d[i].size != size)
1924 1.1 mrg printf ("d[%d].size %ld\n", i, (long) d[i].size);
1925 1.1 mrg
1926 1.1 mrg if (tr->multiplier)
1927 1.1 mrg mpn_trace (" multiplier", &multiplier, 1);
1928 1.1 mrg if (tr->divisor)
1929 1.1 mrg mpn_trace (" divisor", &divisor, 1);
1930 1.1 mrg if (tr->shift)
1931 1.1 mrg printf (" shift %lu\n", shift);
1932 1.1 mrg if (tr->carry)
1933 1.1 mrg mpn_trace (" carry", &carry, 1);
1934 1.1 mrg if (tr->msize)
1935 1.1 mrg mpn_trace (" multiplier_N", multiplier_N, tr->msize);
1936 1.1 mrg
1937 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
1938 1.1 mrg if (tr->dst[i])
1939 1.1 mrg printf (" d[%d] %s, align %ld, size %ld\n",
1940 1.1 mrg i, d[i].high ? "high" : "low",
1941 1.1 mrg (long) d[i].align, (long) d[i].size);
1942 1.1 mrg
1943 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
1944 1.1 mrg {
1945 1.1 mrg if (tr->src[i])
1946 1.1 mrg {
1947 1.1 mrg printf (" s[%d] %s, align %ld, ",
1948 1.1 mrg i, s[i].high ? "high" : "low", (long) s[i].align);
1949 1.1 mrg switch (overlap->s[i]) {
1950 1.1 mrg case -1:
1951 1.1 mrg printf ("no overlap\n");
1952 1.1 mrg break;
1953 1.1 mrg default:
1954 1.1 mrg printf ("==d[%d]%s\n",
1955 1.1 mrg overlap->s[i],
1956 1.1 mrg tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
1957 1.1 mrg : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
1958 1.1 mrg : "");
1959 1.1 mrg break;
1960 1.1 mrg }
1961 1.1 mrg printf (" s[%d]=", i);
1962 1.1 mrg if (tr->carry_sign && (carry & (1 << i)))
1963 1.1 mrg printf ("-");
1964 1.1 mrg mpn_trace (NULL, s[i].p, SRC_SIZE(i));
1965 1.1 mrg }
1966 1.1 mrg }
1967 1.1 mrg
1968 1.1 mrg if (tr->dst0_from_src1)
1969 1.1 mrg mpn_trace (" d[0]", s[1].region.ptr, size);
1970 1.1 mrg
1971 1.1 mrg if (tr->reference)
1972 1.1 mrg print_each (&ref);
1973 1.1 mrg print_each (&fun);
1974 1.1 mrg }
1975 1.1 mrg
1976 1.1 mrg void
1977 1.1 mrg compare (void)
1978 1.1 mrg {
1979 1.1 mrg int error = 0;
1980 1.1 mrg int i;
1981 1.1 mrg
1982 1.1 mrg if (tr->retval && ref.retval != fun.retval)
1983 1.1 mrg {
1984 1.1 mrg gmp_printf ("Different return values (%Mu, %Mu)\n",
1985 1.1 mrg ref.retval, fun.retval);
1986 1.1 mrg error = 1;
1987 1.1 mrg }
1988 1.1 mrg
1989 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
1990 1.1 mrg {
1991 1.1 mrg switch (tr->dst_size[i]) {
1992 1.1 mrg case SIZE_RETVAL:
1993 1.1 mrg case SIZE_GET_STR:
1994 1.1 mrg d[i].size = ref.retval;
1995 1.1 mrg break;
1996 1.1 mrg }
1997 1.1 mrg }
1998 1.1 mrg
1999 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
2000 1.1 mrg {
2001 1.1 mrg if (! tr->dst[i])
2002 1.1 mrg continue;
2003 1.1 mrg
2004 1.1 mrg if (tr->dst_bytes[i])
2005 1.1 mrg {
2006 1.1 mrg if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2007 1.1 mrg {
2008 1.1 mrg printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2009 1.1 mrg i,
2010 1.1 mrg (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2011 1.1 mrg (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2012 1.1 mrg error = 1;
2013 1.1 mrg }
2014 1.1 mrg }
2015 1.1 mrg else
2016 1.1 mrg {
2017 1.1 mrg if (d[i].size != 0
2018 1.1 mrg && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2019 1.1 mrg {
2020 1.1 mrg printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2021 1.1 mrg i,
2022 1.1 mrg (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2023 1.1 mrg (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2024 1.1 mrg error = 1;
2025 1.1 mrg }
2026 1.1 mrg }
2027 1.1 mrg }
2028 1.1 mrg
2029 1.1 mrg if (error)
2030 1.1 mrg {
2031 1.1 mrg print_all();
2032 1.1 mrg abort();
2033 1.1 mrg }
2034 1.1 mrg }
2035 1.1 mrg
2036 1.1 mrg
2037 1.1 mrg /* The functions are cast if the return value should be a long rather than
2038 1.1 mrg the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2039 1.1 mrg might not be enough if some actual calling conventions checking is
2040 1.1 mrg implemented on a long long limb system. */
2041 1.1 mrg
2042 1.1 mrg void
2043 1.1 mrg call (struct each_t *e, tryfun_t function)
2044 1.1 mrg {
2045 1.1 mrg switch (choice->type) {
2046 1.1 mrg case TYPE_ADD:
2047 1.1 mrg case TYPE_SUB:
2048 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2049 1.1 mrg (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2050 1.1 mrg break;
2051 1.1 mrg
2052 1.1 mrg case TYPE_ADD_N:
2053 1.1 mrg case TYPE_SUB_N:
2054 1.1 mrg case TYPE_ADDLSH1_N:
2055 1.1 mrg case TYPE_ADDLSH2_N:
2056 1.1 mrg case TYPE_SUBLSH1_N:
2057 1.1 mrg case TYPE_RSBLSH1_N:
2058 1.1 mrg case TYPE_RSBLSH2_N:
2059 1.1 mrg case TYPE_RSH1ADD_N:
2060 1.1 mrg case TYPE_RSH1SUB_N:
2061 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2062 1.1 mrg (e->d[0].p, e->s[0].p, e->s[1].p, size);
2063 1.1 mrg break;
2064 1.1 mrg case TYPE_ADDLSH_N:
2065 1.1 mrg case TYPE_SUBLSH_N:
2066 1.1 mrg case TYPE_RSBLSH_N:
2067 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2068 1.1 mrg (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2069 1.1 mrg break;
2070 1.1 mrg case TYPE_ADD_NC:
2071 1.1 mrg case TYPE_SUB_NC:
2072 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2073 1.1 mrg (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2074 1.1 mrg break;
2075 1.1 mrg
2076 1.1 mrg case TYPE_MUL_1:
2077 1.1 mrg case TYPE_ADDMUL_1:
2078 1.1 mrg case TYPE_SUBMUL_1:
2079 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2080 1.1 mrg (e->d[0].p, e->s[0].p, size, multiplier);
2081 1.1 mrg break;
2082 1.1 mrg case TYPE_MUL_1C:
2083 1.1 mrg case TYPE_ADDMUL_1C:
2084 1.1 mrg case TYPE_SUBMUL_1C:
2085 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2086 1.1 mrg (e->d[0].p, e->s[0].p, size, multiplier, carry);
2087 1.1 mrg break;
2088 1.1 mrg
2089 1.1 mrg case TYPE_MUL_2:
2090 1.1 mrg case TYPE_MUL_3:
2091 1.1 mrg case TYPE_MUL_4:
2092 1.1 mrg if (size == 1)
2093 1.1 mrg abort ();
2094 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2095 1.1 mrg (e->d[0].p, e->s[0].p, size, multiplier_N);
2096 1.1 mrg break;
2097 1.1 mrg
2098 1.1 mrg case TYPE_ADDMUL_2:
2099 1.1 mrg case TYPE_ADDMUL_3:
2100 1.1 mrg case TYPE_ADDMUL_4:
2101 1.1 mrg case TYPE_ADDMUL_5:
2102 1.1 mrg case TYPE_ADDMUL_6:
2103 1.1 mrg case TYPE_ADDMUL_7:
2104 1.1 mrg case TYPE_ADDMUL_8:
2105 1.1 mrg if (size == 1)
2106 1.1 mrg abort ();
2107 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2108 1.1 mrg (e->d[0].p, e->s[0].p, size, multiplier_N);
2109 1.1 mrg break;
2110 1.1 mrg
2111 1.1 mrg case TYPE_AND_N:
2112 1.1 mrg case TYPE_ANDN_N:
2113 1.1 mrg case TYPE_NAND_N:
2114 1.1 mrg case TYPE_IOR_N:
2115 1.1 mrg case TYPE_IORN_N:
2116 1.1 mrg case TYPE_NIOR_N:
2117 1.1 mrg case TYPE_XOR_N:
2118 1.1 mrg case TYPE_XNOR_N:
2119 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2120 1.1 mrg break;
2121 1.1 mrg
2122 1.1 mrg case TYPE_ADDSUB_N:
2123 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2124 1.1 mrg (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2125 1.1 mrg break;
2126 1.1 mrg case TYPE_ADDSUB_NC:
2127 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2128 1.1 mrg (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2129 1.1 mrg break;
2130 1.1 mrg
2131 1.1 mrg case TYPE_COPY:
2132 1.1 mrg case TYPE_COPYI:
2133 1.1 mrg case TYPE_COPYD:
2134 1.1 mrg case TYPE_COM:
2135 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2136 1.1 mrg break;
2137 1.1 mrg
2138 1.1 mrg
2139 1.1 mrg case TYPE_DIVEXACT_BY3:
2140 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2141 1.1 mrg break;
2142 1.1 mrg case TYPE_DIVEXACT_BY3C:
2143 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2144 1.1 mrg carry);
2145 1.1 mrg break;
2146 1.1 mrg
2147 1.1 mrg
2148 1.1 mrg case TYPE_DIVMOD_1:
2149 1.1 mrg case TYPE_DIVEXACT_1:
2150 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2151 1.1 mrg (e->d[0].p, e->s[0].p, size, divisor);
2152 1.1 mrg break;
2153 1.1 mrg case TYPE_DIVMOD_1C:
2154 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2155 1.1 mrg (e->d[0].p, e->s[0].p, size, divisor, carry);
2156 1.1 mrg break;
2157 1.1 mrg case TYPE_DIVREM_1:
2158 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2159 1.1 mrg (e->d[0].p, size2, e->s[0].p, size, divisor);
2160 1.1 mrg break;
2161 1.1 mrg case TYPE_DIVREM_1C:
2162 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2163 1.1 mrg (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2164 1.1 mrg break;
2165 1.1 mrg case TYPE_PREINV_DIVREM_1:
2166 1.1 mrg {
2167 1.1 mrg mp_limb_t dinv;
2168 1.1 mrg unsigned shift;
2169 1.1 mrg shift = refmpn_count_leading_zeros (divisor);
2170 1.1 mrg dinv = refmpn_invert_limb (divisor << shift);
2171 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2172 1.1 mrg (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2173 1.1 mrg }
2174 1.1 mrg break;
2175 1.1 mrg case TYPE_MOD_1:
2176 1.1 mrg case TYPE_MODEXACT_1_ODD:
2177 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2178 1.1 mrg (e->s[0].p, size, divisor);
2179 1.1 mrg break;
2180 1.1 mrg case TYPE_MOD_1C:
2181 1.1 mrg case TYPE_MODEXACT_1C_ODD:
2182 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2183 1.1 mrg (e->s[0].p, size, divisor, carry);
2184 1.1 mrg break;
2185 1.1 mrg case TYPE_PREINV_MOD_1:
2186 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2187 1.1 mrg (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2188 1.1 mrg break;
2189 1.1 mrg case TYPE_MOD_34LSUB1:
2190 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2191 1.1 mrg break;
2192 1.1 mrg
2193 1.1 mrg case TYPE_UDIV_QRNND:
2194 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2195 1.1 mrg (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2196 1.1 mrg break;
2197 1.1 mrg case TYPE_UDIV_QRNND_R:
2198 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2199 1.1 mrg (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2200 1.1 mrg break;
2201 1.1 mrg
2202 1.1 mrg case TYPE_SBPI1_DIV_QR:
2203 1.1 mrg {
2204 1.1 mrg gmp_pi1_t dinv;
2205 1.1 mrg invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2206 1.1 mrg refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2207 1.1 mrg refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2208 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2209 1.1 mrg (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2210 1.1 mrg refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2211 1.1 mrg }
2212 1.1 mrg break;
2213 1.1 mrg
2214 1.1 mrg case TYPE_TDIV_QR:
2215 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2216 1.1 mrg e->s[0].p, size, e->s[1].p, size2);
2217 1.1 mrg break;
2218 1.1 mrg
2219 1.1 mrg case TYPE_GCD_1:
2220 1.1 mrg /* Must have a non-zero src, but this probably isn't the best way to do
2221 1.1 mrg it. */
2222 1.1 mrg if (refmpn_zero_p (e->s[0].p, size))
2223 1.1 mrg e->retval = 0;
2224 1.1 mrg else
2225 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2226 1.1 mrg break;
2227 1.1 mrg
2228 1.1 mrg case TYPE_GCD:
2229 1.1 mrg /* Sources are destroyed, so they're saved and replaced, but a general
2230 1.1 mrg approach to this might be better. Note that it's still e->s[0].p and
2231 1.1 mrg e->s[1].p that are passed, to get the desired alignments. */
2232 1.1 mrg {
2233 1.1 mrg mp_ptr s0 = refmpn_malloc_limbs (size);
2234 1.1 mrg mp_ptr s1 = refmpn_malloc_limbs (size2);
2235 1.1 mrg refmpn_copyi (s0, e->s[0].p, size);
2236 1.1 mrg refmpn_copyi (s1, e->s[1].p, size2);
2237 1.1 mrg
2238 1.1 mrg mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2239 1.1 mrg mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2240 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2241 1.1 mrg e->s[0].p, size,
2242 1.1 mrg e->s[1].p, size2);
2243 1.1 mrg refmpn_copyi (e->s[0].p, s0, size);
2244 1.1 mrg refmpn_copyi (e->s[1].p, s1, size2);
2245 1.1 mrg free (s0);
2246 1.1 mrg free (s1);
2247 1.1 mrg }
2248 1.1 mrg break;
2249 1.1 mrg
2250 1.1 mrg case TYPE_GCD_FINDA:
2251 1.1 mrg {
2252 1.1 mrg /* FIXME: do this with a flag */
2253 1.1 mrg mp_limb_t c[2];
2254 1.1 mrg c[0] = e->s[0].p[0];
2255 1.1 mrg c[0] += (c[0] == 0);
2256 1.1 mrg c[1] = e->s[0].p[0];
2257 1.1 mrg c[1] += (c[1] == 0);
2258 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (c);
2259 1.1 mrg }
2260 1.1 mrg break;
2261 1.1 mrg
2262 1.1 mrg case TYPE_MPZ_JACOBI:
2263 1.1 mrg case TYPE_MPZ_KRONECKER:
2264 1.1 mrg {
2265 1.1 mrg mpz_t a, b;
2266 1.1 mrg PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2267 1.1 mrg PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2268 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (a, b);
2269 1.1 mrg }
2270 1.1 mrg break;
2271 1.1 mrg case TYPE_MPZ_KRONECKER_UI:
2272 1.1 mrg {
2273 1.1 mrg mpz_t a;
2274 1.1 mrg PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2275 1.1 mrg e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2276 1.1 mrg }
2277 1.1 mrg break;
2278 1.1 mrg case TYPE_MPZ_KRONECKER_SI:
2279 1.1 mrg {
2280 1.1 mrg mpz_t a;
2281 1.1 mrg PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2282 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2283 1.1 mrg }
2284 1.1 mrg break;
2285 1.1 mrg case TYPE_MPZ_UI_KRONECKER:
2286 1.1 mrg {
2287 1.1 mrg mpz_t b;
2288 1.1 mrg PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2289 1.1 mrg e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2290 1.1 mrg }
2291 1.1 mrg break;
2292 1.1 mrg case TYPE_MPZ_SI_KRONECKER:
2293 1.1 mrg {
2294 1.1 mrg mpz_t b;
2295 1.1 mrg PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2296 1.1 mrg e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2297 1.1 mrg }
2298 1.1 mrg break;
2299 1.1 mrg
2300 1.1 mrg case TYPE_MUL_MN:
2301 1.1 mrg CALLING_CONVENTIONS (function)
2302 1.1 mrg (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2303 1.1 mrg break;
2304 1.1 mrg case TYPE_MUL_N:
2305 1.1 mrg case TYPE_MULLO_N:
2306 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2307 1.1 mrg break;
2308 1.1 mrg case TYPE_SQR:
2309 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2310 1.1 mrg break;
2311 1.1 mrg
2312 1.1 mrg case TYPE_UMUL_PPMM:
2313 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2314 1.1 mrg (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2315 1.1 mrg break;
2316 1.1 mrg case TYPE_UMUL_PPMM_R:
2317 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2318 1.1 mrg (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2319 1.1 mrg break;
2320 1.1 mrg
2321 1.1 mrg case TYPE_LSHIFT:
2322 1.1 mrg case TYPE_LSHIFTC:
2323 1.1 mrg case TYPE_RSHIFT:
2324 1.1 mrg e->retval = CALLING_CONVENTIONS (function)
2325 1.1 mrg (e->d[0].p, e->s[0].p, size, shift);
2326 1.1 mrg break;
2327 1.1 mrg
2328 1.1 mrg case TYPE_POPCOUNT:
2329 1.1 mrg e->retval = (* (unsigned long (*)(ANYARGS))
2330 1.1 mrg CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2331 1.1 mrg break;
2332 1.1 mrg case TYPE_HAMDIST:
2333 1.1 mrg e->retval = (* (unsigned long (*)(ANYARGS))
2334 1.1 mrg CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2335 1.1 mrg break;
2336 1.1 mrg
2337 1.1 mrg case TYPE_SQRTREM:
2338 1.1 mrg e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2339 1.1 mrg (e->d[0].p, e->d[1].p, e->s[0].p, size);
2340 1.1 mrg break;
2341 1.1 mrg
2342 1.1 mrg case TYPE_ZERO:
2343 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, size);
2344 1.1 mrg break;
2345 1.1 mrg
2346 1.1 mrg case TYPE_GET_STR:
2347 1.1 mrg {
2348 1.1 mrg size_t sizeinbase, fill;
2349 1.1 mrg char *dst;
2350 1.1 mrg MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2351 1.1 mrg ASSERT_ALWAYS (sizeinbase <= d[0].size);
2352 1.1 mrg fill = d[0].size - sizeinbase;
2353 1.1 mrg if (d[0].high)
2354 1.1 mrg {
2355 1.1 mrg memset (e->d[0].p, 0xBA, fill);
2356 1.1 mrg dst = (char *) e->d[0].p + fill;
2357 1.1 mrg }
2358 1.1 mrg else
2359 1.1 mrg {
2360 1.1 mrg dst = (char *) e->d[0].p;
2361 1.1 mrg memset (dst + sizeinbase, 0xBA, fill);
2362 1.1 mrg }
2363 1.1 mrg if (POW2_P (base))
2364 1.1 mrg {
2365 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (dst, base,
2366 1.1 mrg e->s[0].p, size);
2367 1.1 mrg }
2368 1.1 mrg else
2369 1.1 mrg {
2370 1.1 mrg refmpn_copy (e->d[1].p, e->s[0].p, size);
2371 1.1 mrg e->retval = CALLING_CONVENTIONS (function) (dst, base,
2372 1.1 mrg e->d[1].p, size);
2373 1.1 mrg }
2374 1.1 mrg refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2375 1.1 mrg }
2376 1.1 mrg break;
2377 1.1 mrg
2378 1.1 mrg case TYPE_INVERT:
2379 1.1 mrg {
2380 1.1 mrg mp_ptr scratch;
2381 1.1 mrg TMP_DECL;
2382 1.1 mrg TMP_MARK;
2383 1.1 mrg scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2384 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2385 1.1 mrg TMP_FREE;
2386 1.1 mrg }
2387 1.1 mrg break;
2388 1.1 mrg case TYPE_BINVERT:
2389 1.1 mrg {
2390 1.1 mrg mp_ptr scratch;
2391 1.1 mrg TMP_DECL;
2392 1.1 mrg TMP_MARK;
2393 1.1 mrg scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2394 1.1 mrg CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2395 1.1 mrg TMP_FREE;
2396 1.1 mrg }
2397 1.1 mrg break;
2398 1.1 mrg
2399 1.1 mrg #ifdef EXTRA_CALL
2400 1.1 mrg EXTRA_CALL
2401 1.1 mrg #endif
2402 1.1 mrg
2403 1.1 mrg default:
2404 1.1 mrg printf ("Unknown routine type %d\n", choice->type);
2405 1.1 mrg abort ();
2406 1.1 mrg break;
2407 1.1 mrg }
2408 1.1 mrg }
2409 1.1 mrg
2410 1.1 mrg
2411 1.1 mrg void
2412 1.1 mrg pointer_setup (struct each_t *e)
2413 1.1 mrg {
2414 1.1 mrg int i, j;
2415 1.1 mrg
2416 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
2417 1.1 mrg {
2418 1.1 mrg switch (tr->dst_size[i]) {
2419 1.1 mrg case 0:
2420 1.1 mrg case SIZE_RETVAL: /* will be adjusted later */
2421 1.1 mrg d[i].size = size;
2422 1.1 mrg break;
2423 1.1 mrg
2424 1.1 mrg case SIZE_1:
2425 1.1 mrg d[i].size = 1;
2426 1.1 mrg break;
2427 1.1 mrg case SIZE_2:
2428 1.1 mrg d[i].size = 2;
2429 1.1 mrg break;
2430 1.1 mrg case SIZE_3:
2431 1.1 mrg d[i].size = 3;
2432 1.1 mrg break;
2433 1.1 mrg
2434 1.1 mrg case SIZE_PLUS_1:
2435 1.1 mrg d[i].size = size+1;
2436 1.1 mrg break;
2437 1.1 mrg case SIZE_PLUS_MSIZE_SUB_1:
2438 1.1 mrg d[i].size = size + tr->msize - 1;
2439 1.1 mrg break;
2440 1.1 mrg
2441 1.1 mrg case SIZE_SUM:
2442 1.1 mrg if (tr->size2)
2443 1.1 mrg d[i].size = size + size2;
2444 1.1 mrg else
2445 1.1 mrg d[i].size = 2*size;
2446 1.1 mrg break;
2447 1.1 mrg
2448 1.1 mrg case SIZE_SIZE2:
2449 1.1 mrg d[i].size = size2;
2450 1.1 mrg break;
2451 1.1 mrg
2452 1.1 mrg case SIZE_DIFF:
2453 1.1 mrg d[i].size = size - size2;
2454 1.1 mrg break;
2455 1.1 mrg
2456 1.1 mrg case SIZE_DIFF_PLUS_1:
2457 1.1 mrg d[i].size = size - size2 + 1;
2458 1.1 mrg break;
2459 1.1 mrg
2460 1.1 mrg case SIZE_CEIL_HALF:
2461 1.1 mrg d[i].size = (size+1)/2;
2462 1.1 mrg break;
2463 1.1 mrg
2464 1.1 mrg case SIZE_GET_STR:
2465 1.1 mrg {
2466 1.1 mrg mp_limb_t ff = GMP_NUMB_MAX;
2467 1.1 mrg MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2468 1.1 mrg }
2469 1.1 mrg break;
2470 1.1 mrg
2471 1.1 mrg default:
2472 1.1 mrg printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2473 1.1 mrg abort ();
2474 1.1 mrg }
2475 1.1 mrg }
2476 1.1 mrg
2477 1.1 mrg /* establish e->d[].p destinations */
2478 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
2479 1.1 mrg {
2480 1.1 mrg mp_size_t offset = 0;
2481 1.1 mrg
2482 1.1 mrg /* possible room for overlapping sources */
2483 1.1 mrg for (j = 0; j < numberof (overlap->s); j++)
2484 1.1 mrg if (overlap->s[j] == i)
2485 1.1 mrg offset = MAX (offset, s[j].align);
2486 1.1 mrg
2487 1.1 mrg if (d[i].high)
2488 1.1 mrg {
2489 1.1 mrg if (tr->dst_bytes[i])
2490 1.1 mrg {
2491 1.1 mrg e->d[i].p = (mp_ptr)
2492 1.1 mrg ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2493 1.1 mrg - d[i].size - d[i].align);
2494 1.1 mrg }
2495 1.1 mrg else
2496 1.1 mrg {
2497 1.1 mrg e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2498 1.1 mrg - d[i].size - d[i].align;
2499 1.1 mrg if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2500 1.1 mrg e->d[i].p -= offset;
2501 1.1 mrg }
2502 1.1 mrg }
2503 1.1 mrg else
2504 1.1 mrg {
2505 1.1 mrg if (tr->dst_bytes[i])
2506 1.1 mrg {
2507 1.1 mrg e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2508 1.1 mrg }
2509 1.1 mrg else
2510 1.1 mrg {
2511 1.1 mrg e->d[i].p = e->d[i].region.ptr + d[i].align;
2512 1.1 mrg if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2513 1.1 mrg e->d[i].p += offset;
2514 1.1 mrg }
2515 1.1 mrg }
2516 1.1 mrg }
2517 1.1 mrg
2518 1.1 mrg /* establish e->s[].p sources */
2519 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
2520 1.1 mrg {
2521 1.1 mrg int o = overlap->s[i];
2522 1.1 mrg switch (o) {
2523 1.1 mrg case -1:
2524 1.1 mrg /* no overlap */
2525 1.1 mrg e->s[i].p = s[i].p;
2526 1.1 mrg break;
2527 1.1 mrg case 0:
2528 1.1 mrg case 1:
2529 1.1 mrg /* overlap with d[o] */
2530 1.1 mrg if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2531 1.1 mrg e->s[i].p = e->d[o].p - s[i].align;
2532 1.1 mrg else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2533 1.1 mrg e->s[i].p = e->d[o].p + s[i].align;
2534 1.1 mrg else if (tr->size2 == SIZE_FRACTION)
2535 1.1 mrg e->s[i].p = e->d[o].p + size2;
2536 1.1 mrg else
2537 1.1 mrg e->s[i].p = e->d[o].p;
2538 1.1 mrg break;
2539 1.1 mrg default:
2540 1.1 mrg abort();
2541 1.1 mrg break;
2542 1.1 mrg }
2543 1.1 mrg }
2544 1.1 mrg }
2545 1.1 mrg
2546 1.1 mrg
2547 1.1 mrg void
2548 1.1 mrg validate_fail (void)
2549 1.1 mrg {
2550 1.1 mrg if (tr->reference)
2551 1.1 mrg {
2552 1.1 mrg trap_location = TRAP_REF;
2553 1.1 mrg call (&ref, tr->reference);
2554 1.1 mrg trap_location = TRAP_NOWHERE;
2555 1.1 mrg }
2556 1.1 mrg
2557 1.1 mrg print_all();
2558 1.1 mrg abort();
2559 1.1 mrg }
2560 1.1 mrg
2561 1.1 mrg
2562 1.1 mrg void
2563 1.1 mrg try_one (void)
2564 1.1 mrg {
2565 1.1 mrg int i;
2566 1.1 mrg
2567 1.1 mrg if (option_spinner)
2568 1.1 mrg spinner();
2569 1.1 mrg spinner_count++;
2570 1.1 mrg
2571 1.1 mrg trap_location = TRAP_SETUPS;
2572 1.1 mrg
2573 1.1 mrg if (tr->divisor == DIVISOR_NORM)
2574 1.1 mrg divisor |= GMP_NUMB_HIGHBIT;
2575 1.1 mrg if (tr->divisor == DIVISOR_ODD)
2576 1.1 mrg divisor |= 1;
2577 1.1 mrg
2578 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
2579 1.1 mrg {
2580 1.1 mrg if (s[i].high)
2581 1.1 mrg s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2582 1.1 mrg else
2583 1.1 mrg s[i].p = s[i].region.ptr + s[i].align;
2584 1.1 mrg }
2585 1.1 mrg
2586 1.1 mrg pointer_setup (&ref);
2587 1.1 mrg pointer_setup (&fun);
2588 1.1 mrg
2589 1.1 mrg ref.retval = 0x04152637;
2590 1.1 mrg fun.retval = 0x8C9DAEBF;
2591 1.1 mrg
2592 1.1 mrg t_random (multiplier_N, tr->msize);
2593 1.1 mrg
2594 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
2595 1.1 mrg {
2596 1.1 mrg if (! tr->src[i])
2597 1.1 mrg continue;
2598 1.1 mrg
2599 1.1 mrg mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2600 1.1 mrg t_random (s[i].p, SRC_SIZE(i));
2601 1.1 mrg
2602 1.1 mrg switch (tr->data) {
2603 1.1 mrg case DATA_NON_ZERO:
2604 1.1 mrg if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2605 1.1 mrg s[i].p[0] = 1;
2606 1.1 mrg break;
2607 1.1 mrg
2608 1.1 mrg case DATA_MULTIPLE_DIVISOR:
2609 1.1 mrg /* same number of low zero bits as divisor */
2610 1.1 mrg s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
2611 1.1 mrg refmpn_sub_1 (s[i].p, s[i].p, size,
2612 1.1 mrg refmpn_mod_1 (s[i].p, size, divisor));
2613 1.1 mrg break;
2614 1.1 mrg
2615 1.1 mrg case DATA_GCD:
2616 1.1 mrg /* s[1] no more bits than s[0] */
2617 1.1 mrg if (i == 1 && size2 == size)
2618 1.1 mrg s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
2619 1.1 mrg
2620 1.1 mrg /* high limb non-zero */
2621 1.1 mrg s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
2622 1.1 mrg
2623 1.1 mrg /* odd */
2624 1.1 mrg s[i].p[0] |= 1;
2625 1.1 mrg break;
2626 1.1 mrg
2627 1.1 mrg case DATA_SRC0_ODD:
2628 1.1 mrg if (i == 0)
2629 1.1 mrg s[i].p[0] |= 1;
2630 1.1 mrg break;
2631 1.1 mrg
2632 1.1 mrg case DATA_SRC1_ODD:
2633 1.1 mrg if (i == 1)
2634 1.1 mrg s[i].p[0] |= 1;
2635 1.1 mrg break;
2636 1.1 mrg
2637 1.1 mrg case DATA_SRC1_HIGHBIT:
2638 1.1 mrg if (i == 1)
2639 1.1 mrg {
2640 1.1 mrg if (tr->size2)
2641 1.1 mrg s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
2642 1.1 mrg else
2643 1.1 mrg s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2644 1.1 mrg }
2645 1.1 mrg break;
2646 1.1 mrg
2647 1.1 mrg case DATA_SRC0_HIGHBIT:
2648 1.1 mrg if (i == 0)
2649 1.1 mrg {
2650 1.1 mrg s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2651 1.1 mrg }
2652 1.1 mrg break;
2653 1.1 mrg
2654 1.1 mrg case DATA_UDIV_QRNND:
2655 1.1 mrg s[i].p[1] %= divisor;
2656 1.1 mrg break;
2657 1.1 mrg }
2658 1.1 mrg
2659 1.1 mrg mprotect_region (&s[i].region, PROT_READ);
2660 1.1 mrg }
2661 1.1 mrg
2662 1.1 mrg for (i = 0; i < NUM_DESTS; i++)
2663 1.1 mrg {
2664 1.1 mrg if (! tr->dst[i])
2665 1.1 mrg continue;
2666 1.1 mrg
2667 1.1 mrg if (tr->dst0_from_src1 && i==0)
2668 1.1 mrg {
2669 1.1 mrg mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
2670 1.1 mrg mp_size_t fill = MAX (0, d[0].size - copy);
2671 1.1 mrg MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
2672 1.1 mrg MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
2673 1.1 mrg refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
2674 1.1 mrg refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
2675 1.1 mrg }
2676 1.1 mrg else if (tr->dst_bytes[i])
2677 1.1 mrg {
2678 1.1 mrg memset (ref.d[i].p, 0xBA, d[i].size);
2679 1.1 mrg memset (fun.d[i].p, 0xBA, d[i].size);
2680 1.1 mrg }
2681 1.1 mrg else
2682 1.1 mrg {
2683 1.1 mrg refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
2684 1.1 mrg refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
2685 1.1 mrg }
2686 1.1 mrg }
2687 1.1 mrg
2688 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
2689 1.1 mrg {
2690 1.1 mrg if (! tr->src[i])
2691 1.1 mrg continue;
2692 1.1 mrg
2693 1.1 mrg if (ref.s[i].p != s[i].p)
2694 1.1 mrg {
2695 1.1 mrg refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
2696 1.1 mrg refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
2697 1.1 mrg }
2698 1.1 mrg }
2699 1.1 mrg
2700 1.1 mrg if (option_print)
2701 1.1 mrg print_all();
2702 1.1 mrg
2703 1.1 mrg if (tr->validate != NULL)
2704 1.1 mrg {
2705 1.1 mrg trap_location = TRAP_FUN;
2706 1.1 mrg call (&fun, choice->function);
2707 1.1 mrg trap_location = TRAP_NOWHERE;
2708 1.1 mrg
2709 1.1 mrg if (! CALLING_CONVENTIONS_CHECK ())
2710 1.1 mrg {
2711 1.1 mrg print_all();
2712 1.1 mrg abort();
2713 1.1 mrg }
2714 1.1 mrg
2715 1.1 mrg (*tr->validate) ();
2716 1.1 mrg }
2717 1.1 mrg else
2718 1.1 mrg {
2719 1.1 mrg trap_location = TRAP_REF;
2720 1.1 mrg call (&ref, tr->reference);
2721 1.1 mrg trap_location = TRAP_FUN;
2722 1.1 mrg call (&fun, choice->function);
2723 1.1 mrg trap_location = TRAP_NOWHERE;
2724 1.1 mrg
2725 1.1 mrg if (! CALLING_CONVENTIONS_CHECK ())
2726 1.1 mrg {
2727 1.1 mrg print_all();
2728 1.1 mrg abort();
2729 1.1 mrg }
2730 1.1 mrg
2731 1.1 mrg compare ();
2732 1.1 mrg }
2733 1.1 mrg }
2734 1.1 mrg
2735 1.1 mrg
2736 1.1 mrg #define SIZE_ITERATION \
2737 1.1 mrg for (size = MAX3 (option_firstsize, \
2738 1.1 mrg choice->minsize, \
2739 1.1 mrg (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
2740 1.1 mrg size <= option_lastsize; \
2741 1.1 mrg size++)
2742 1.1 mrg
2743 1.1 mrg #define SIZE2_FIRST \
2744 1.1 mrg (tr->size2 == SIZE_2 ? 2 \
2745 1.1 mrg : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
2746 1.1 mrg : tr->size2 ? \
2747 1.1 mrg MAX (choice->minsize, (option_firstsize2 != 0 \
2748 1.1 mrg ? option_firstsize2 : 1)) \
2749 1.1 mrg : 0)
2750 1.1 mrg
2751 1.1 mrg #define SIZE2_LAST \
2752 1.1 mrg (tr->size2 == SIZE_2 ? 2 \
2753 1.1 mrg : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
2754 1.1 mrg : tr->size2 ? size \
2755 1.1 mrg : 0)
2756 1.1 mrg
2757 1.1 mrg #define SIZE2_ITERATION \
2758 1.1 mrg for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
2759 1.1 mrg
2760 1.1 mrg #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
2761 1.1 mrg #define ALIGN_ITERATION(w,n,cond) \
2762 1.1 mrg for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
2763 1.1 mrg
2764 1.1 mrg #define HIGH_LIMIT(cond) ((cond) != 0)
2765 1.1 mrg #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
2766 1.1 mrg #define HIGH_ITERATION(w,n,cond) \
2767 1.1 mrg for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
2768 1.1 mrg
2769 1.1 mrg #define SHIFT_LIMIT \
2770 1.1 mrg ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
2771 1.1 mrg
2772 1.1 mrg #define SHIFT_ITERATION \
2773 1.1 mrg for (shift = 1; shift <= SHIFT_LIMIT; shift++)
2774 1.1 mrg
2775 1.1 mrg
2776 1.1 mrg void
2777 1.1 mrg try_many (void)
2778 1.1 mrg {
2779 1.1 mrg int i;
2780 1.1 mrg
2781 1.1 mrg {
2782 1.1 mrg unsigned long total = 1;
2783 1.1 mrg
2784 1.1 mrg total *= option_repetitions;
2785 1.1 mrg total *= option_lastsize;
2786 1.1 mrg if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
2787 1.1 mrg else if (tr->size2) total *= (option_lastsize+1)/2;
2788 1.1 mrg
2789 1.1 mrg total *= SHIFT_LIMIT;
2790 1.1 mrg total *= MULTIPLIER_COUNT;
2791 1.1 mrg total *= DIVISOR_COUNT;
2792 1.1 mrg total *= CARRY_COUNT;
2793 1.1 mrg total *= T_RAND_COUNT;
2794 1.1 mrg
2795 1.1 mrg total *= HIGH_COUNT (tr->dst[0]);
2796 1.1 mrg total *= HIGH_COUNT (tr->dst[1]);
2797 1.1 mrg total *= HIGH_COUNT (tr->src[0]);
2798 1.1 mrg total *= HIGH_COUNT (tr->src[1]);
2799 1.1 mrg
2800 1.1 mrg total *= ALIGN_COUNT (tr->dst[0]);
2801 1.1 mrg total *= ALIGN_COUNT (tr->dst[1]);
2802 1.1 mrg total *= ALIGN_COUNT (tr->src[0]);
2803 1.1 mrg total *= ALIGN_COUNT (tr->src[1]);
2804 1.1 mrg
2805 1.1 mrg total *= OVERLAP_COUNT;
2806 1.1 mrg
2807 1.1 mrg printf ("%s %lu\n", choice->name, total);
2808 1.1 mrg }
2809 1.1 mrg
2810 1.1 mrg spinner_count = 0;
2811 1.1 mrg
2812 1.1 mrg for (i = 0; i < option_repetitions; i++)
2813 1.1 mrg SIZE_ITERATION
2814 1.1 mrg SIZE2_ITERATION
2815 1.1 mrg
2816 1.1 mrg SHIFT_ITERATION
2817 1.1 mrg MULTIPLIER_ITERATION
2818 1.1 mrg DIVISOR_ITERATION
2819 1.1 mrg CARRY_ITERATION /* must be after divisor */
2820 1.1 mrg T_RAND_ITERATION
2821 1.1 mrg
2822 1.1 mrg HIGH_ITERATION(d,0, tr->dst[0])
2823 1.1 mrg HIGH_ITERATION(d,1, tr->dst[1])
2824 1.1 mrg HIGH_ITERATION(s,0, tr->src[0])
2825 1.1 mrg HIGH_ITERATION(s,1, tr->src[1])
2826 1.1 mrg
2827 1.1 mrg ALIGN_ITERATION(d,0, tr->dst[0])
2828 1.1 mrg ALIGN_ITERATION(d,1, tr->dst[1])
2829 1.1 mrg ALIGN_ITERATION(s,0, tr->src[0])
2830 1.1 mrg ALIGN_ITERATION(s,1, tr->src[1])
2831 1.1 mrg
2832 1.1 mrg OVERLAP_ITERATION
2833 1.1 mrg try_one();
2834 1.1 mrg
2835 1.1 mrg printf("\n");
2836 1.1 mrg }
2837 1.1 mrg
2838 1.1 mrg
2839 1.1 mrg /* Usually print_all() doesn't show much, but it might give a hint as to
2840 1.1 mrg where the function was up to when it died. */
2841 1.1 mrg void
2842 1.1 mrg trap (int sig)
2843 1.1 mrg {
2844 1.1 mrg const char *name = "noname";
2845 1.1 mrg
2846 1.1 mrg switch (sig) {
2847 1.1 mrg case SIGILL: name = "SIGILL"; break;
2848 1.1 mrg #ifdef SIGBUS
2849 1.1 mrg case SIGBUS: name = "SIGBUS"; break;
2850 1.1 mrg #endif
2851 1.1 mrg case SIGSEGV: name = "SIGSEGV"; break;
2852 1.1 mrg case SIGFPE: name = "SIGFPE"; break;
2853 1.1 mrg }
2854 1.1 mrg
2855 1.1 mrg printf ("\n\nSIGNAL TRAP: %s\n", name);
2856 1.1 mrg
2857 1.1 mrg switch (trap_location) {
2858 1.1 mrg case TRAP_REF:
2859 1.1 mrg printf (" in reference function: %s\n", tr->reference_name);
2860 1.1 mrg break;
2861 1.1 mrg case TRAP_FUN:
2862 1.1 mrg printf (" in test function: %s\n", choice->name);
2863 1.1 mrg print_all ();
2864 1.1 mrg break;
2865 1.1 mrg case TRAP_SETUPS:
2866 1.1 mrg printf (" in parameter setups\n");
2867 1.1 mrg print_all ();
2868 1.1 mrg break;
2869 1.1 mrg default:
2870 1.1 mrg printf (" somewhere unknown\n");
2871 1.1 mrg break;
2872 1.1 mrg }
2873 1.1 mrg exit (1);
2874 1.1 mrg }
2875 1.1 mrg
2876 1.1 mrg
2877 1.1 mrg void
2878 1.1 mrg try_init (void)
2879 1.1 mrg {
2880 1.1 mrg #if HAVE_GETPAGESIZE
2881 1.1 mrg /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
2882 1.1 mrg know _SC_PAGESIZE. */
2883 1.1 mrg pagesize = getpagesize ();
2884 1.1 mrg #else
2885 1.1 mrg #if HAVE_SYSCONF
2886 1.1 mrg if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
2887 1.1 mrg {
2888 1.1 mrg /* According to the linux man page, sysconf doesn't set errno */
2889 1.1 mrg fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
2890 1.1 mrg exit (1);
2891 1.1 mrg }
2892 1.1 mrg #else
2893 1.1 mrg Error, error, cannot get page size
2894 1.1 mrg #endif
2895 1.1 mrg #endif
2896 1.1 mrg
2897 1.1 mrg printf ("pagesize is 0x%lX bytes\n", pagesize);
2898 1.1 mrg
2899 1.1 mrg signal (SIGILL, trap);
2900 1.1 mrg #ifdef SIGBUS
2901 1.1 mrg signal (SIGBUS, trap);
2902 1.1 mrg #endif
2903 1.1 mrg signal (SIGSEGV, trap);
2904 1.1 mrg signal (SIGFPE, trap);
2905 1.1 mrg
2906 1.1 mrg {
2907 1.1 mrg int i;
2908 1.1 mrg
2909 1.1 mrg for (i = 0; i < NUM_SOURCES; i++)
2910 1.1 mrg {
2911 1.1 mrg malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
2912 1.1 mrg printf ("s[%d] %p to %p (0x%lX bytes)\n",
2913 1.1 mrg i, (void *) (s[i].region.ptr),
2914 1.1 mrg (void *) (s[i].region.ptr + s[i].region.size),
2915 1.1 mrg (long) s[i].region.size * BYTES_PER_MP_LIMB);
2916 1.1 mrg }
2917 1.1 mrg
2918 1.1 mrg #define INIT_EACH(e,es) \
2919 1.1 mrg for (i = 0; i < NUM_DESTS; i++) \
2920 1.1 mrg { \
2921 1.1 mrg malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
2922 1.1 mrg printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
2923 1.1 mrg es, i, (void *) (e.d[i].region.ptr), \
2924 1.1 mrg (void *) (e.d[i].region.ptr + e.d[i].region.size), \
2925 1.1 mrg (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \
2926 1.1 mrg }
2927 1.1 mrg
2928 1.1 mrg INIT_EACH(ref, "ref");
2929 1.1 mrg INIT_EACH(fun, "fun");
2930 1.1 mrg }
2931 1.1 mrg }
2932 1.1 mrg
2933 1.1 mrg int
2934 1.1 mrg strmatch_wild (const char *pattern, const char *str)
2935 1.1 mrg {
2936 1.1 mrg size_t plen, slen;
2937 1.1 mrg
2938 1.1 mrg /* wildcard at start */
2939 1.1 mrg if (pattern[0] == '*')
2940 1.1 mrg {
2941 1.1 mrg pattern++;
2942 1.1 mrg plen = strlen (pattern);
2943 1.1 mrg slen = strlen (str);
2944 1.1 mrg return (plen == 0
2945 1.1 mrg || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
2946 1.1 mrg }
2947 1.1 mrg
2948 1.1 mrg /* wildcard at end */
2949 1.1 mrg plen = strlen (pattern);
2950 1.1 mrg if (plen >= 1 && pattern[plen-1] == '*')
2951 1.1 mrg return (memcmp (pattern, str, plen-1) == 0);
2952 1.1 mrg
2953 1.1 mrg /* no wildcards */
2954 1.1 mrg return (strcmp (pattern, str) == 0);
2955 1.1 mrg }
2956 1.1 mrg
2957 1.1 mrg void
2958 1.1 mrg try_name (const char *name)
2959 1.1 mrg {
2960 1.1 mrg int found = 0;
2961 1.1 mrg int i;
2962 1.1 mrg
2963 1.1 mrg for (i = 0; i < numberof (choice_array); i++)
2964 1.1 mrg {
2965 1.1 mrg if (strmatch_wild (name, choice_array[i].name))
2966 1.1 mrg {
2967 1.1 mrg choice = &choice_array[i];
2968 1.1 mrg tr = ¶m[choice->type];
2969 1.1 mrg try_many ();
2970 1.1 mrg found = 1;
2971 1.1 mrg }
2972 1.1 mrg }
2973 1.1 mrg
2974 1.1 mrg if (!found)
2975 1.1 mrg {
2976 1.1 mrg printf ("%s unknown\n", name);
2977 1.1 mrg /* exit (1); */
2978 1.1 mrg }
2979 1.1 mrg }
2980 1.1 mrg
2981 1.1 mrg
2982 1.1 mrg void
2983 1.1 mrg usage (const char *prog)
2984 1.1 mrg {
2985 1.1 mrg int col = 0;
2986 1.1 mrg int i;
2987 1.1 mrg
2988 1.1 mrg printf ("Usage: %s [options] function...\n", prog);
2989 1.1 mrg printf (" -1 use limb data 1,2,3,etc\n");
2990 1.1 mrg printf (" -9 use limb data all 0xFF..FFs\n");
2991 1.1 mrg printf (" -a zeros use limb data all zeros\n");
2992 1.1 mrg printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
2993 1.1 mrg printf (" -a 2fd use data 0x2FFF...FFFD\n");
2994 1.1 mrg printf (" -p print each case tried (try this if seg faulting)\n");
2995 1.1 mrg printf (" -R seed random numbers from time()\n");
2996 1.1 mrg printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
2997 1.1 mrg printf (" -s size starting size to test\n");
2998 1.1 mrg printf (" -S size2 starting size2 to test\n");
2999 1.1 mrg printf (" -s s1-s2 range of sizes to test\n");
3000 1.1 mrg printf (" -W don't show the spinner (use this in gdb)\n");
3001 1.1 mrg printf (" -z disable mprotect() redzones\n");
3002 1.1 mrg printf ("Default data is refmpn_random() and refmpn_random2().\n");
3003 1.1 mrg printf ("\n");
3004 1.1 mrg printf ("Functions that can be tested:\n");
3005 1.1 mrg
3006 1.1 mrg for (i = 0; i < numberof (choice_array); i++)
3007 1.1 mrg {
3008 1.1 mrg if (col + 1 + strlen (choice_array[i].name) > 79)
3009 1.1 mrg {
3010 1.1 mrg printf ("\n");
3011 1.1 mrg col = 0;
3012 1.1 mrg }
3013 1.1 mrg printf (" %s", choice_array[i].name);
3014 1.1 mrg col += 1 + strlen (choice_array[i].name);
3015 1.1 mrg }
3016 1.1 mrg printf ("\n");
3017 1.1 mrg
3018 1.1 mrg exit(1);
3019 1.1 mrg }
3020 1.1 mrg
3021 1.1 mrg
3022 1.1 mrg int
3023 1.1 mrg main (int argc, char *argv[])
3024 1.1 mrg {
3025 1.1 mrg int i;
3026 1.1 mrg
3027 1.1 mrg /* unbuffered output */
3028 1.1 mrg setbuf (stdout, NULL);
3029 1.1 mrg setbuf (stderr, NULL);
3030 1.1 mrg
3031 1.1 mrg /* default trace in hex, and in upper-case so can paste into bc */
3032 1.1 mrg mp_trace_base = -16;
3033 1.1 mrg
3034 1.1 mrg param_init ();
3035 1.1 mrg
3036 1.1 mrg {
3037 1.1 mrg unsigned long seed = 123;
3038 1.1 mrg int opt;
3039 1.1 mrg
3040 1.1 mrg while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3041 1.1 mrg {
3042 1.1 mrg switch (opt) {
3043 1.1 mrg case '1':
3044 1.1 mrg /* use limb data values 1, 2, 3, ... etc */
3045 1.1 mrg option_data = DATA_SEQ;
3046 1.1 mrg break;
3047 1.1 mrg case '9':
3048 1.1 mrg /* use limb data values 0xFFF...FFF always */
3049 1.1 mrg option_data = DATA_FFS;
3050 1.1 mrg break;
3051 1.1 mrg case 'a':
3052 1.1 mrg if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3053 1.1 mrg else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3054 1.1 mrg else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3055 1.1 mrg else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3056 1.1 mrg else
3057 1.1 mrg {
3058 1.1 mrg fprintf (stderr, "unrecognised data option: %s\n", optarg);
3059 1.1 mrg exit (1);
3060 1.1 mrg }
3061 1.1 mrg break;
3062 1.1 mrg case 'b':
3063 1.1 mrg mp_trace_base = atoi (optarg);
3064 1.1 mrg break;
3065 1.1 mrg case 'E':
3066 1.1 mrg /* re-seed */
3067 1.1 mrg sscanf (optarg, "%lu", &seed);
3068 1.1 mrg printf ("Re-seeding with %lu\n", seed);
3069 1.1 mrg break;
3070 1.1 mrg case 'p':
3071 1.1 mrg option_print = 1;
3072 1.1 mrg break;
3073 1.1 mrg case 'R':
3074 1.1 mrg /* randomize */
3075 1.1 mrg seed = time (NULL);
3076 1.1 mrg printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3077 1.1 mrg break;
3078 1.1 mrg case 'r':
3079 1.1 mrg option_repetitions = atoi (optarg);
3080 1.1 mrg break;
3081 1.1 mrg case 's':
3082 1.1 mrg {
3083 1.1 mrg char *p;
3084 1.1 mrg option_firstsize = strtol (optarg, 0, 0);
3085 1.1 mrg if ((p = strchr (optarg, '-')) != NULL)
3086 1.1 mrg option_lastsize = strtol (p+1, 0, 0);
3087 1.1 mrg }
3088 1.1 mrg break;
3089 1.1 mrg case 'S':
3090 1.1 mrg /* -S <size> sets the starting size for the second of a two size
3091 1.1 mrg routine (like mpn_mul_basecase) */
3092 1.1 mrg option_firstsize2 = strtol (optarg, 0, 0);
3093 1.1 mrg break;
3094 1.1 mrg case 'W':
3095 1.1 mrg /* use this when running in the debugger */
3096 1.1 mrg option_spinner = 0;
3097 1.1 mrg break;
3098 1.1 mrg case 'z':
3099 1.1 mrg /* disable redzones */
3100 1.1 mrg option_redzones = 0;
3101 1.1 mrg break;
3102 1.1 mrg case '?':
3103 1.1 mrg usage (argv[0]);
3104 1.1 mrg break;
3105 1.1 mrg }
3106 1.1 mrg }
3107 1.1 mrg
3108 1.1 mrg gmp_randinit_default (__gmp_rands);
3109 1.1 mrg __gmp_rands_initialized = 1;
3110 1.1 mrg gmp_randseed_ui (__gmp_rands, seed);
3111 1.1 mrg }
3112 1.1 mrg
3113 1.1 mrg try_init();
3114 1.1 mrg
3115 1.1 mrg if (argc <= optind)
3116 1.1 mrg usage (argv[0]);
3117 1.1 mrg
3118 1.1 mrg for (i = optind; i < argc; i++)
3119 1.1 mrg try_name (argv[i]);
3120 1.1 mrg
3121 1.1 mrg return 0;
3122 1.1 mrg }
3123