Home | History | Annotate | Line # | Download | only in tune
      1      1.1  mrg /* Speed measuring program.
      2      1.1  mrg 
      3  1.1.1.4  mrg Copyright 1999-2003, 2005, 2006, 2008-2019 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg This file is part of the GNU MP Library.
      6      1.1  mrg 
      7      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.3  mrg it under the terms of either:
      9  1.1.1.3  mrg 
     10  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     11  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.3  mrg     option) any later version.
     13  1.1.1.3  mrg 
     14  1.1.1.3  mrg or
     15  1.1.1.3  mrg 
     16  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     17  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.3  mrg     later version.
     19  1.1.1.3  mrg 
     20  1.1.1.3  mrg or both in parallel, as here.
     21      1.1  mrg 
     22      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.3  mrg for more details.
     26      1.1  mrg 
     27  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     28  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     30      1.1  mrg 
     31      1.1  mrg /* Usage message is in the code below, run with no arguments to print it.
     32      1.1  mrg    See README for interesting applications.
     33      1.1  mrg 
     34      1.1  mrg    To add a new routine foo(), create a speed_foo() function in the style of
     35      1.1  mrg    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
     36      1.1  mrg    speed_foo() wants an "r" parameter.
     37      1.1  mrg 
     38      1.1  mrg    The routines don't have help messages or descriptions, but most have
     39  1.1.1.5  mrg    suggestive names.  See the source code for full details. */
     40      1.1  mrg 
     41      1.1  mrg #include "config.h"
     42      1.1  mrg 
     43      1.1  mrg #include <limits.h>
     44      1.1  mrg #include <stdio.h>
     45      1.1  mrg #include <stdlib.h>
     46      1.1  mrg #include <string.h>
     47      1.1  mrg 
     48      1.1  mrg #if HAVE_UNISTD_H
     49      1.1  mrg #include <unistd.h>  /* for getpid, R_OK */
     50      1.1  mrg #endif
     51      1.1  mrg 
     52      1.1  mrg #if TIME_WITH_SYS_TIME
     53      1.1  mrg # include <sys/time.h>  /* for struct timeval */
     54      1.1  mrg # include <time.h>
     55      1.1  mrg #else
     56      1.1  mrg # if HAVE_SYS_TIME_H
     57      1.1  mrg #  include <sys/time.h>
     58      1.1  mrg # else
     59      1.1  mrg #  include <time.h>
     60      1.1  mrg # endif
     61      1.1  mrg #endif
     62      1.1  mrg 
     63      1.1  mrg #if HAVE_SYS_RESOURCE_H
     64      1.1  mrg #include <sys/resource.h>  /* for getrusage() */
     65      1.1  mrg #endif
     66      1.1  mrg 
     67      1.1  mrg 
     68      1.1  mrg #include "gmp-impl.h"
     69      1.1  mrg #include "longlong.h"  /* for the benefit of speed-many.c */
     70      1.1  mrg #include "tests.h"
     71      1.1  mrg #include "speed.h"
     72      1.1  mrg 
     73      1.1  mrg 
     74      1.1  mrg #if !HAVE_DECL_OPTARG
     75      1.1  mrg extern char *optarg;
     76      1.1  mrg extern int optind, opterr;
     77      1.1  mrg #endif
     78      1.1  mrg 
     79      1.1  mrg #if !HAVE_STRTOUL
     80      1.1  mrg #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
     81      1.1  mrg #endif
     82      1.1  mrg 
     83      1.1  mrg #ifdef SPEED_EXTRA_PROTOS
     84      1.1  mrg SPEED_EXTRA_PROTOS
     85      1.1  mrg #endif
     86      1.1  mrg #ifdef SPEED_EXTRA_PROTOS2
     87      1.1  mrg SPEED_EXTRA_PROTOS2
     88      1.1  mrg #endif
     89      1.1  mrg 
     90      1.1  mrg 
     91      1.1  mrg #if GMP_LIMB_BITS == 32
     92      1.1  mrg #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
     93      1.1  mrg #endif
     94      1.1  mrg #if GMP_LIMB_BITS == 64
     95      1.1  mrg #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
     96      1.1  mrg #endif
     97      1.1  mrg 
     98      1.1  mrg 
     99      1.1  mrg #define CMP_ABSOLUTE     1
    100      1.1  mrg #define CMP_RATIO        2
    101      1.1  mrg #define CMP_DIFFERENCE   3
    102      1.1  mrg #define CMP_DIFFPREV     4
    103      1.1  mrg int  option_cmp = CMP_ABSOLUTE;
    104      1.1  mrg 
    105      1.1  mrg #define UNIT_SECONDS        1
    106      1.1  mrg #define UNIT_CYCLES         2
    107      1.1  mrg #define UNIT_CYCLESPERLIMB  3
    108      1.1  mrg int  option_unit = UNIT_SECONDS;
    109      1.1  mrg 
    110      1.1  mrg #define DATA_RANDOM   1
    111      1.1  mrg #define DATA_RANDOM2  2
    112      1.1  mrg #define DATA_ZEROS    3
    113      1.1  mrg #define DATA_AAS      4
    114      1.1  mrg #define DATA_FFS      5
    115      1.1  mrg #define DATA_2FD      6
    116      1.1  mrg int  option_data = DATA_RANDOM;
    117      1.1  mrg 
    118      1.1  mrg int        option_square = 0;
    119      1.1  mrg double     option_factor = 0.0;
    120      1.1  mrg mp_size_t  option_step = 1;
    121      1.1  mrg int        option_gnuplot = 0;
    122      1.1  mrg char      *option_gnuplot_basename;
    123      1.1  mrg struct size_array_t {
    124      1.1  mrg   mp_size_t start, end;
    125      1.1  mrg } *size_array = NULL;
    126      1.1  mrg mp_size_t  size_num = 0;
    127      1.1  mrg mp_size_t  size_allocnum = 0;
    128      1.1  mrg int        option_resource_usage = 0;
    129      1.1  mrg long       option_seed = 123456789;
    130      1.1  mrg 
    131      1.1  mrg struct speed_params  sp;
    132      1.1  mrg 
    133      1.1  mrg #define COLUMN_WIDTH  13  /* for the free-form output */
    134      1.1  mrg 
    135      1.1  mrg #define FLAG_R            (1<<0)  /* require ".r" */
    136      1.1  mrg #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
    137      1.1  mrg #define FLAG_RSIZE        (1<<2)
    138      1.1  mrg #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
    139      1.1  mrg 
    140      1.1  mrg const struct routine_t {
    141      1.1  mrg   /* constants */
    142      1.1  mrg   const char        *name;
    143      1.1  mrg   speed_function_t  fun;
    144      1.1  mrg   int               flag;
    145      1.1  mrg } routine[] = {
    146      1.1  mrg 
    147      1.1  mrg   { "noop",              speed_noop                 },
    148      1.1  mrg   { "noop_wxs",          speed_noop_wxs             },
    149      1.1  mrg   { "noop_wxys",         speed_noop_wxys            },
    150      1.1  mrg 
    151      1.1  mrg   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
    152      1.1  mrg   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
    153  1.1.1.3  mrg   { "mpn_add_1",         speed_mpn_add_1,     FLAG_R },
    154  1.1.1.3  mrg   { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
    155  1.1.1.3  mrg   { "mpn_sub_1",         speed_mpn_sub_1,     FLAG_R },
    156  1.1.1.3  mrg   { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
    157      1.1  mrg 
    158  1.1.1.2  mrg   { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
    159  1.1.1.2  mrg   { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
    160  1.1.1.2  mrg   { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
    161  1.1.1.2  mrg   { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
    162  1.1.1.2  mrg   { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
    163  1.1.1.2  mrg   { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
    164  1.1.1.2  mrg 
    165      1.1  mrg #if HAVE_NATIVE_mpn_add_n_sub_n
    166      1.1  mrg   { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
    167      1.1  mrg #endif
    168      1.1  mrg 
    169      1.1  mrg   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
    170      1.1  mrg   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
    171      1.1  mrg #if HAVE_NATIVE_mpn_addmul_2
    172      1.1  mrg   { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
    173      1.1  mrg #endif
    174      1.1  mrg #if HAVE_NATIVE_mpn_addmul_3
    175      1.1  mrg   { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
    176      1.1  mrg #endif
    177      1.1  mrg #if HAVE_NATIVE_mpn_addmul_4
    178      1.1  mrg   { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
    179      1.1  mrg #endif
    180      1.1  mrg #if HAVE_NATIVE_mpn_addmul_5
    181      1.1  mrg   { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
    182      1.1  mrg #endif
    183      1.1  mrg #if HAVE_NATIVE_mpn_addmul_6
    184      1.1  mrg   { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
    185      1.1  mrg #endif
    186      1.1  mrg #if HAVE_NATIVE_mpn_addmul_7
    187      1.1  mrg   { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
    188      1.1  mrg #endif
    189      1.1  mrg #if HAVE_NATIVE_mpn_addmul_8
    190      1.1  mrg   { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
    191      1.1  mrg #endif
    192      1.1  mrg   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
    193      1.1  mrg   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
    194      1.1  mrg #if HAVE_NATIVE_mpn_mul_2
    195      1.1  mrg   { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
    196      1.1  mrg #endif
    197      1.1  mrg #if HAVE_NATIVE_mpn_mul_3
    198      1.1  mrg   { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
    199      1.1  mrg #endif
    200      1.1  mrg #if HAVE_NATIVE_mpn_mul_4
    201      1.1  mrg   { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
    202      1.1  mrg #endif
    203  1.1.1.2  mrg #if HAVE_NATIVE_mpn_mul_5
    204  1.1.1.2  mrg   { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
    205  1.1.1.2  mrg #endif
    206  1.1.1.2  mrg #if HAVE_NATIVE_mpn_mul_6
    207  1.1.1.2  mrg   { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
    208  1.1.1.2  mrg #endif
    209      1.1  mrg 
    210      1.1  mrg   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
    211      1.1  mrg   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
    212      1.1  mrg #if HAVE_NATIVE_mpn_divrem_1c
    213      1.1  mrg   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
    214      1.1  mrg   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
    215      1.1  mrg #endif
    216  1.1.1.2  mrg   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
    217      1.1  mrg #if HAVE_NATIVE_mpn_mod_1c
    218  1.1.1.2  mrg   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
    219      1.1  mrg #endif
    220      1.1  mrg   { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
    221      1.1  mrg   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
    222      1.1  mrg   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
    223      1.1  mrg 
    224  1.1.1.2  mrg   { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
    225  1.1.1.2  mrg   { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
    226  1.1.1.2  mrg   { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
    227  1.1.1.2  mrg   { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
    228  1.1.1.2  mrg   { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
    229  1.1.1.2  mrg   { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
    230      1.1  mrg 
    231      1.1  mrg   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
    232      1.1  mrg   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
    233      1.1  mrg   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
    234      1.1  mrg   { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
    235      1.1  mrg   { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
    236      1.1  mrg   { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
    237      1.1  mrg 
    238      1.1  mrg   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
    239      1.1  mrg   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
    240      1.1  mrg   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
    241      1.1  mrg 
    242  1.1.1.3  mrg   { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R  },
    243  1.1.1.3  mrg   { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R  },
    244  1.1.1.3  mrg   { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R  },
    245  1.1.1.3  mrg   { "mpn_div_qr_1",      speed_mpn_div_qr_1,      FLAG_R },
    246  1.1.1.3  mrg 
    247  1.1.1.2  mrg   { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
    248  1.1.1.2  mrg   { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
    249  1.1.1.2  mrg 
    250      1.1  mrg   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
    251      1.1  mrg   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
    252      1.1  mrg 
    253  1.1.1.2  mrg   { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
    254      1.1  mrg   { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
    255      1.1  mrg   { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
    256      1.1  mrg 
    257      1.1  mrg #if HAVE_NATIVE_mpn_modexact_1_odd
    258      1.1  mrg   { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
    259      1.1  mrg #endif
    260      1.1  mrg   { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
    261      1.1  mrg 
    262      1.1  mrg #if GMP_NUMB_BITS % 4 == 0
    263      1.1  mrg   { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
    264      1.1  mrg #endif
    265      1.1  mrg 
    266      1.1  mrg   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
    267      1.1  mrg   { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
    268      1.1  mrg   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
    269      1.1  mrg 
    270      1.1  mrg   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
    271      1.1  mrg   { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
    272      1.1  mrg   { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
    273      1.1  mrg   { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
    274      1.1  mrg   { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
    275      1.1  mrg   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
    276      1.1  mrg   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
    277      1.1  mrg   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
    278      1.1  mrg   { "mpn_com",           speed_mpn_com              },
    279  1.1.1.3  mrg   { "mpn_neg",           speed_mpn_neg              },
    280      1.1  mrg 
    281      1.1  mrg   { "mpn_popcount",      speed_mpn_popcount         },
    282      1.1  mrg   { "mpn_hamdist",       speed_mpn_hamdist          },
    283      1.1  mrg 
    284      1.1  mrg   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
    285      1.1  mrg 
    286  1.1.1.4  mrg   { "mpn_hgcd2",         speed_mpn_hgcd2, FLAG_NODATA },
    287  1.1.1.4  mrg   { "mpn_hgcd2_1",       speed_mpn_hgcd2_1, FLAG_NODATA },
    288  1.1.1.4  mrg   { "mpn_hgcd2_2",       speed_mpn_hgcd2_2, FLAG_NODATA },
    289  1.1.1.4  mrg   { "mpn_hgcd2_3",       speed_mpn_hgcd2_3, FLAG_NODATA },
    290  1.1.1.4  mrg   { "mpn_hgcd2_4",       speed_mpn_hgcd2_4, FLAG_NODATA },
    291  1.1.1.4  mrg   { "mpn_hgcd2_5",       speed_mpn_hgcd2_5, FLAG_NODATA },
    292      1.1  mrg   { "mpn_hgcd",          speed_mpn_hgcd             },
    293      1.1  mrg   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
    294  1.1.1.2  mrg   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
    295  1.1.1.2  mrg   { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
    296  1.1.1.2  mrg 
    297  1.1.1.2  mrg   { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
    298  1.1.1.2  mrg   { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
    299  1.1.1.2  mrg   { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
    300      1.1  mrg 
    301      1.1  mrg   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
    302  1.1.1.4  mrg   { "mpn_gcd_11",        speed_mpn_gcd_11, FLAG_R_OPTIONAL },
    303      1.1  mrg   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
    304  1.1.1.4  mrg   { "mpn_gcd_22",        speed_mpn_gcd_22, FLAG_R_OPTIONAL },
    305      1.1  mrg 
    306      1.1  mrg   { "mpn_gcd",           speed_mpn_gcd                    },
    307      1.1  mrg 
    308      1.1  mrg   { "mpn_gcdext",            speed_mpn_gcdext            },
    309      1.1  mrg   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
    310      1.1  mrg   { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
    311      1.1  mrg   { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
    312      1.1  mrg   { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
    313      1.1  mrg #if 0
    314      1.1  mrg   { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
    315      1.1  mrg #endif
    316  1.1.1.4  mrg 
    317  1.1.1.4  mrg   { "mpz_nextprime",     speed_mpz_nextprime        },
    318  1.1.1.4  mrg 
    319      1.1  mrg   { "mpz_jacobi",        speed_mpz_jacobi           },
    320      1.1  mrg   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
    321      1.1  mrg   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
    322      1.1  mrg   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
    323      1.1  mrg   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
    324  1.1.1.2  mrg   { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
    325      1.1  mrg 
    326      1.1  mrg   { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
    327      1.1  mrg   { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
    328      1.1  mrg   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
    329      1.1  mrg #if HAVE_NATIVE_mpn_sqr_diagonal
    330      1.1  mrg   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
    331      1.1  mrg #endif
    332  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
    333  1.1.1.2  mrg   { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
    334  1.1.1.2  mrg #endif
    335      1.1  mrg 
    336      1.1  mrg   { "mpn_mul_n",         speed_mpn_mul_n            },
    337      1.1  mrg   { "mpn_sqr",           speed_mpn_sqr              },
    338      1.1  mrg 
    339      1.1  mrg   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
    340      1.1  mrg   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
    341      1.1  mrg   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
    342      1.1  mrg   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
    343      1.1  mrg   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
    344      1.1  mrg   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
    345      1.1  mrg   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
    346      1.1  mrg   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
    347      1.1  mrg   { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
    348      1.1  mrg   { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
    349      1.1  mrg   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
    350      1.1  mrg   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
    351      1.1  mrg   { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
    352      1.1  mrg   { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
    353      1.1  mrg   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
    354      1.1  mrg   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
    355      1.1  mrg #if WANT_OLD_FFT_FULL
    356      1.1  mrg   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
    357      1.1  mrg   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
    358      1.1  mrg #endif
    359      1.1  mrg   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
    360      1.1  mrg   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
    361      1.1  mrg 
    362  1.1.1.3  mrg   { "mpn_sqrlo",          speed_mpn_sqrlo           },
    363  1.1.1.3  mrg   { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase  },
    364      1.1  mrg   { "mpn_mullo_n",        speed_mpn_mullo_n         },
    365      1.1  mrg   { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
    366      1.1  mrg 
    367  1.1.1.2  mrg   { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
    368  1.1.1.2  mrg   { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
    369  1.1.1.2  mrg   { "mpn_mulmid_n",         speed_mpn_mulmid_n },
    370  1.1.1.2  mrg   { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
    371  1.1.1.2  mrg 
    372      1.1  mrg   { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
    373      1.1  mrg   { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
    374      1.1  mrg   { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
    375      1.1  mrg   { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
    376      1.1  mrg 
    377      1.1  mrg   { "mpn_invert",              speed_mpn_invert              },
    378      1.1  mrg   { "mpn_invertappr",          speed_mpn_invertappr          },
    379      1.1  mrg   { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
    380      1.1  mrg   { "mpn_binvert",             speed_mpn_binvert             },
    381  1.1.1.3  mrg   { "mpn_sec_invert",          speed_mpn_sec_invert          },
    382      1.1  mrg 
    383      1.1  mrg   { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
    384      1.1  mrg   { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
    385      1.1  mrg   { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
    386      1.1  mrg   { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
    387      1.1  mrg   { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
    388      1.1  mrg   { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
    389      1.1  mrg 
    390      1.1  mrg   { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
    391      1.1  mrg   { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
    392      1.1  mrg   { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
    393      1.1  mrg   { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
    394  1.1.1.4  mrg   { "mpn_sbpi1_bdiv_r",        speed_mpn_sbpi1_bdiv_r        },
    395      1.1  mrg 
    396  1.1.1.2  mrg   { "mpn_broot",               speed_mpn_broot,    FLAG_R },
    397  1.1.1.2  mrg   { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
    398  1.1.1.2  mrg   { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
    399  1.1.1.2  mrg 
    400      1.1  mrg   { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
    401      1.1  mrg   { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
    402      1.1  mrg   { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
    403      1.1  mrg 
    404      1.1  mrg   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
    405      1.1  mrg   { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
    406  1.1.1.3  mrg   { "mpn_sqrt",          speed_mpn_sqrt             },
    407  1.1.1.3  mrg   { "mpn_root",          speed_mpn_root, FLAG_R     },
    408      1.1  mrg 
    409  1.1.1.4  mrg   { "mpn_perfect_power_p",  speed_mpn_perfect_power_p,       },
    410  1.1.1.4  mrg   { "mpn_perfect_square_p", speed_mpn_perfect_square_p,      },
    411  1.1.1.4  mrg 
    412      1.1  mrg   { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
    413      1.1  mrg   { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
    414      1.1  mrg   { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
    415      1.1  mrg   { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
    416      1.1  mrg   { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
    417      1.1  mrg 
    418      1.1  mrg   { "mpz_add",           speed_mpz_add              },
    419  1.1.1.4  mrg   { "mpz_invert",        speed_mpz_invert,   FLAG_R_OPTIONAL },
    420      1.1  mrg   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
    421  1.1.1.2  mrg   { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
    422      1.1  mrg   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
    423  1.1.1.3  mrg   { "mpz_2fac_ui",       speed_mpz_2fac_ui,  FLAG_NODATA   },
    424  1.1.1.4  mrg   { "mpz_mfac_uiui",     speed_mpz_mfac_uiui,  FLAG_NODATA | FLAG_R_OPTIONAL },
    425  1.1.1.4  mrg   { "mpz_primorial_ui",  speed_mpz_primorial_ui, FLAG_NODATA },
    426  1.1.1.4  mrg   { "mpz_powm",          speed_mpz_powm,     FLAG_R_OPTIONAL },
    427      1.1  mrg   { "mpz_powm_mod",      speed_mpz_powm_mod         },
    428      1.1  mrg   { "mpz_powm_redc",     speed_mpz_powm_redc        },
    429  1.1.1.2  mrg   { "mpz_powm_sec",      speed_mpz_powm_sec        },
    430      1.1  mrg   { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
    431      1.1  mrg 
    432      1.1  mrg   { "mpz_mod",           speed_mpz_mod              },
    433      1.1  mrg   { "mpn_redc_1",        speed_mpn_redc_1           },
    434      1.1  mrg   { "mpn_redc_2",        speed_mpn_redc_2           },
    435      1.1  mrg   { "mpn_redc_n",        speed_mpn_redc_n           },
    436      1.1  mrg 
    437      1.1  mrg   { "MPN_COPY",          speed_MPN_COPY             },
    438      1.1  mrg   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
    439      1.1  mrg   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
    440      1.1  mrg   { "memcpy",            speed_memcpy               },
    441      1.1  mrg #if HAVE_NATIVE_mpn_copyi
    442      1.1  mrg   { "mpn_copyi",         speed_mpn_copyi            },
    443      1.1  mrg #endif
    444      1.1  mrg #if HAVE_NATIVE_mpn_copyd
    445      1.1  mrg   { "mpn_copyd",         speed_mpn_copyd            },
    446      1.1  mrg #endif
    447  1.1.1.3  mrg   { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
    448  1.1.1.3  mrg #if HAVE_NATIVE_mpn_addlsh1_n == 1
    449  1.1.1.2  mrg   { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
    450      1.1  mrg #endif
    451  1.1.1.3  mrg #if HAVE_NATIVE_mpn_sublsh1_n == 1
    452  1.1.1.2  mrg   { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
    453  1.1.1.2  mrg #endif
    454  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip1
    455  1.1.1.2  mrg   { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
    456  1.1.1.2  mrg #endif
    457  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip2
    458  1.1.1.2  mrg   { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
    459  1.1.1.2  mrg #endif
    460  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sublsh1_n_ip1
    461  1.1.1.2  mrg   { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
    462      1.1  mrg #endif
    463  1.1.1.3  mrg #if HAVE_NATIVE_mpn_rsblsh1_n == 1
    464  1.1.1.2  mrg   { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
    465      1.1  mrg #endif
    466  1.1.1.3  mrg #if HAVE_NATIVE_mpn_addlsh2_n == 1
    467  1.1.1.2  mrg   { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
    468      1.1  mrg #endif
    469  1.1.1.3  mrg #if HAVE_NATIVE_mpn_sublsh2_n == 1
    470  1.1.1.2  mrg   { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
    471  1.1.1.2  mrg #endif
    472  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh2_n_ip1
    473  1.1.1.2  mrg   { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
    474  1.1.1.2  mrg #endif
    475  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh2_n_ip2
    476  1.1.1.2  mrg   { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
    477  1.1.1.2  mrg #endif
    478  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sublsh2_n_ip1
    479  1.1.1.2  mrg   { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
    480      1.1  mrg #endif
    481  1.1.1.3  mrg #if HAVE_NATIVE_mpn_rsblsh2_n == 1
    482  1.1.1.2  mrg   { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
    483  1.1.1.2  mrg #endif
    484  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh_n
    485  1.1.1.2  mrg   { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
    486  1.1.1.2  mrg #endif
    487  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sublsh_n
    488  1.1.1.2  mrg   { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
    489  1.1.1.2  mrg #endif
    490  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh_n_ip1
    491  1.1.1.2  mrg   { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
    492  1.1.1.2  mrg #endif
    493  1.1.1.2  mrg #if HAVE_NATIVE_mpn_addlsh_n_ip2
    494  1.1.1.2  mrg   { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
    495  1.1.1.2  mrg #endif
    496  1.1.1.2  mrg #if HAVE_NATIVE_mpn_sublsh_n_ip1
    497  1.1.1.2  mrg   { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
    498  1.1.1.2  mrg #endif
    499  1.1.1.2  mrg #if HAVE_NATIVE_mpn_rsblsh_n
    500  1.1.1.2  mrg   { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
    501      1.1  mrg #endif
    502      1.1  mrg #if HAVE_NATIVE_mpn_rsh1add_n
    503  1.1.1.2  mrg   { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
    504      1.1  mrg #endif
    505      1.1  mrg #if HAVE_NATIVE_mpn_rsh1sub_n
    506  1.1.1.2  mrg   { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
    507      1.1  mrg #endif
    508      1.1  mrg 
    509  1.1.1.3  mrg   { "mpn_cnd_add_n",     speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
    510  1.1.1.3  mrg   { "mpn_cnd_sub_n",     speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
    511  1.1.1.2  mrg 
    512      1.1  mrg   { "MPN_ZERO",          speed_MPN_ZERO             },
    513      1.1  mrg 
    514      1.1  mrg   { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
    515      1.1  mrg   { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
    516      1.1  mrg   { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
    517      1.1  mrg   { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
    518      1.1  mrg   { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
    519      1.1  mrg 
    520      1.1  mrg   { "malloc_free",                  speed_malloc_free                  },
    521      1.1  mrg   { "malloc_realloc_free",          speed_malloc_realloc_free          },
    522      1.1  mrg   { "gmp_allocate_free",            speed_gmp_allocate_free            },
    523      1.1  mrg   { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
    524      1.1  mrg   { "mpz_init_clear",               speed_mpz_init_clear               },
    525      1.1  mrg   { "mpq_init_clear",               speed_mpq_init_clear               },
    526      1.1  mrg   { "mpf_init_clear",               speed_mpf_init_clear               },
    527      1.1  mrg   { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
    528      1.1  mrg 
    529      1.1  mrg   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
    530      1.1  mrg #if HAVE_NATIVE_mpn_umul_ppmm
    531      1.1  mrg   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
    532      1.1  mrg #endif
    533      1.1  mrg #if HAVE_NATIVE_mpn_umul_ppmm_r
    534      1.1  mrg   { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
    535      1.1  mrg #endif
    536      1.1  mrg 
    537      1.1  mrg   { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
    538      1.1  mrg   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
    539      1.1  mrg 
    540      1.1  mrg   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
    541      1.1  mrg   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
    542      1.1  mrg #if HAVE_NATIVE_mpn_udiv_qrnnd
    543      1.1  mrg   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
    544      1.1  mrg #endif
    545      1.1  mrg #if HAVE_NATIVE_mpn_udiv_qrnnd_r
    546      1.1  mrg   { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
    547      1.1  mrg #endif
    548      1.1  mrg   { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
    549      1.1  mrg 
    550      1.1  mrg   { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
    551      1.1  mrg   { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
    552      1.1  mrg 
    553      1.1  mrg   { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
    554      1.1  mrg   { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
    555      1.1  mrg   { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
    556      1.1  mrg 
    557      1.1  mrg #ifdef SPEED_EXTRA_ROUTINES
    558      1.1  mrg   SPEED_EXTRA_ROUTINES
    559      1.1  mrg #endif
    560      1.1  mrg #ifdef SPEED_EXTRA_ROUTINES2
    561      1.1  mrg   SPEED_EXTRA_ROUTINES2
    562      1.1  mrg #endif
    563      1.1  mrg };
    564      1.1  mrg 
    565      1.1  mrg 
    566      1.1  mrg struct choice_t {
    567      1.1  mrg   const struct routine_t  *p;
    568      1.1  mrg   mp_limb_t               r;
    569      1.1  mrg   double                  scale;
    570      1.1  mrg   double                  time;
    571      1.1  mrg   int                     no_time;
    572      1.1  mrg   double                  prev_time;
    573      1.1  mrg   const char              *name;
    574      1.1  mrg };
    575      1.1  mrg struct choice_t  *choice;
    576      1.1  mrg int  num_choices = 0;
    577      1.1  mrg 
    578      1.1  mrg 
    579      1.1  mrg void
    580      1.1  mrg data_fill (mp_ptr ptr, mp_size_t size)
    581      1.1  mrg {
    582      1.1  mrg   switch (option_data) {
    583      1.1  mrg   case DATA_RANDOM:
    584      1.1  mrg     mpn_random (ptr, size);
    585      1.1  mrg     break;
    586      1.1  mrg   case DATA_RANDOM2:
    587      1.1  mrg     mpn_random2 (ptr, size);
    588      1.1  mrg     break;
    589      1.1  mrg   case DATA_ZEROS:
    590      1.1  mrg     MPN_ZERO (ptr, size);
    591      1.1  mrg     break;
    592      1.1  mrg   case DATA_AAS:
    593      1.1  mrg     MPN_FILL (ptr, size, GMP_NUMB_0xAA);
    594      1.1  mrg     break;
    595      1.1  mrg   case DATA_FFS:
    596      1.1  mrg     MPN_FILL (ptr, size, GMP_NUMB_MAX);
    597      1.1  mrg     break;
    598      1.1  mrg   case DATA_2FD:
    599      1.1  mrg     MPN_FILL (ptr, size, GMP_NUMB_MAX);
    600      1.1  mrg     ptr[0] -= 2;
    601      1.1  mrg     break;
    602      1.1  mrg   default:
    603      1.1  mrg     abort();
    604      1.1  mrg     /*NOTREACHED*/
    605      1.1  mrg   }
    606      1.1  mrg }
    607      1.1  mrg 
    608      1.1  mrg /* The code here handling the various combinations of output options isn't
    609      1.1  mrg    too attractive, but it works and is fairly clean.  */
    610      1.1  mrg 
    611      1.1  mrg #define SIZE_TO_DIVISOR(n)              \
    612      1.1  mrg   (option_square == 1 ? (n)*(n)         \
    613      1.1  mrg   : option_square == 2 ? (n)*((n)+1)/2  \
    614      1.1  mrg   : (n))
    615      1.1  mrg 
    616      1.1  mrg void
    617      1.1  mrg run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
    618      1.1  mrg {
    619      1.1  mrg   const char  *first_open_fastest, *first_open_notfastest, *first_close;
    620      1.1  mrg   int         i, fastest, want_data;
    621      1.1  mrg   double      fastest_time;
    622      1.1  mrg   TMP_DECL;
    623      1.1  mrg 
    624      1.1  mrg   TMP_MARK;
    625      1.1  mrg 
    626      1.1  mrg   /* allocate data, unless all routines are NODATA */
    627      1.1  mrg   want_data = 0;
    628      1.1  mrg   for (i = 0; i < num_choices; i++)
    629      1.1  mrg     want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
    630      1.1  mrg 
    631      1.1  mrg   if (want_data)
    632      1.1  mrg     {
    633      1.1  mrg       SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
    634      1.1  mrg       SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
    635      1.1  mrg 
    636      1.1  mrg       data_fill (s->xp, s->size);
    637      1.1  mrg       data_fill (s->yp, s->size);
    638      1.1  mrg     }
    639      1.1  mrg   else
    640      1.1  mrg     {
    641      1.1  mrg       sp.xp = NULL;
    642      1.1  mrg       sp.yp = NULL;
    643      1.1  mrg     }
    644      1.1  mrg 
    645      1.1  mrg   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
    646      1.1  mrg     {
    647      1.1  mrg       first_open_fastest = "(#";
    648      1.1  mrg       first_open_notfastest = " (";
    649      1.1  mrg       first_close = ")";
    650      1.1  mrg     }
    651      1.1  mrg   else
    652      1.1  mrg     {
    653      1.1  mrg       first_open_fastest = "#";
    654      1.1  mrg       first_open_notfastest = " ";
    655      1.1  mrg       first_close = "";
    656      1.1  mrg     }
    657      1.1  mrg 
    658      1.1  mrg   fastest = -1;
    659      1.1  mrg   fastest_time = -1.0;
    660      1.1  mrg   for (i = 0; i < num_choices; i++)
    661      1.1  mrg     {
    662      1.1  mrg       s->r = choice[i].r;
    663      1.1  mrg       choice[i].time = speed_measure (choice[i].p->fun, s);
    664      1.1  mrg       choice[i].no_time = (choice[i].time == -1.0);
    665      1.1  mrg       if (! choice[i].no_time)
    666      1.1  mrg         choice[i].time *= choice[i].scale;
    667      1.1  mrg 
    668      1.1  mrg       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
    669      1.1  mrg          is before any differences.  */
    670      1.1  mrg       {
    671      1.1  mrg         double     t;
    672      1.1  mrg         t = choice[i].time;
    673      1.1  mrg         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
    674      1.1  mrg           {
    675      1.1  mrg             if (choice[i].prev_time == -1.0)
    676      1.1  mrg               choice[i].no_time = 1;
    677      1.1  mrg             else
    678      1.1  mrg               choice[i].time = choice[i].time - choice[i].prev_time;
    679      1.1  mrg           }
    680      1.1  mrg         choice[i].prev_time = t;
    681      1.1  mrg       }
    682      1.1  mrg 
    683      1.1  mrg       if (choice[i].no_time)
    684      1.1  mrg         continue;
    685      1.1  mrg 
    686      1.1  mrg       /* Look for the fastest after CMP_DIFFPREV has been applied, but
    687      1.1  mrg          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
    688      1.1  mrg          if there's more than one routine.  */
    689      1.1  mrg       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
    690      1.1  mrg         {
    691      1.1  mrg           fastest = i;
    692      1.1  mrg           fastest_time = choice[i].time;
    693      1.1  mrg         }
    694      1.1  mrg 
    695      1.1  mrg       if (option_cmp == CMP_DIFFPREV)
    696      1.1  mrg         {
    697      1.1  mrg           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
    698      1.1  mrg           if (option_unit == UNIT_CYCLES)
    699      1.1  mrg             choice[i].time /= speed_cycletime;
    700      1.1  mrg           else if (option_unit == UNIT_CYCLESPERLIMB)
    701      1.1  mrg             {
    702      1.1  mrg               if (prev_size == -1)
    703      1.1  mrg                 choice[i].time /= speed_cycletime;
    704      1.1  mrg               else
    705      1.1  mrg                 choice[i].time /=  (speed_cycletime
    706      1.1  mrg                                     * (SIZE_TO_DIVISOR(s->size)
    707      1.1  mrg                                        - SIZE_TO_DIVISOR(prev_size)));
    708      1.1  mrg             }
    709      1.1  mrg         }
    710      1.1  mrg       else
    711      1.1  mrg         {
    712      1.1  mrg           if (option_unit == UNIT_CYCLES)
    713      1.1  mrg             choice[i].time /= speed_cycletime;
    714      1.1  mrg           else if (option_unit == UNIT_CYCLESPERLIMB)
    715      1.1  mrg             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
    716      1.1  mrg 
    717      1.1  mrg           if (option_cmp == CMP_RATIO && i > 0)
    718      1.1  mrg             {
    719      1.1  mrg               /* A ratio isn't affected by the units chosen. */
    720      1.1  mrg               if (choice[0].no_time || choice[0].time == 0.0)
    721      1.1  mrg                 choice[i].no_time = 1;
    722      1.1  mrg               else
    723      1.1  mrg                 choice[i].time /= choice[0].time;
    724      1.1  mrg             }
    725      1.1  mrg           else if (option_cmp == CMP_DIFFERENCE && i > 0)
    726      1.1  mrg             {
    727      1.1  mrg               if (choice[0].no_time)
    728      1.1  mrg                 {
    729      1.1  mrg                   choice[i].no_time = 1;
    730      1.1  mrg                   continue;
    731      1.1  mrg                 }
    732      1.1  mrg               choice[i].time -= choice[0].time;
    733      1.1  mrg             }
    734      1.1  mrg         }
    735      1.1  mrg     }
    736      1.1  mrg 
    737      1.1  mrg   if (option_gnuplot)
    738      1.1  mrg     {
    739      1.1  mrg       /* In CMP_DIFFPREV, don't print anything for the first size, start
    740      1.1  mrg          with the second where an actual difference is available.
    741      1.1  mrg 
    742      1.1  mrg          In CMP_RATIO, print the first column as 1.0.
    743      1.1  mrg 
    744      1.1  mrg          The 9 decimals printed is much more than the expected precision of
    745      1.1  mrg          the measurements actually. */
    746      1.1  mrg 
    747      1.1  mrg       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
    748      1.1  mrg         {
    749      1.1  mrg           fprintf (fp, "%-6ld ", s->size);
    750      1.1  mrg           for (i = 0; i < num_choices; i++)
    751      1.1  mrg             fprintf (fp, "  %.9e",
    752      1.1  mrg                      choice[i].no_time ? 0.0
    753      1.1  mrg                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
    754      1.1  mrg                      : choice[i].time);
    755      1.1  mrg           fprintf (fp, "\n");
    756      1.1  mrg         }
    757      1.1  mrg     }
    758      1.1  mrg   else
    759      1.1  mrg     {
    760      1.1  mrg       fprintf (fp, "%-6ld ", s->size);
    761      1.1  mrg       for (i = 0; i < num_choices; i++)
    762      1.1  mrg         {
    763      1.1  mrg           char  buf[128];
    764      1.1  mrg           int   decimals;
    765      1.1  mrg 
    766      1.1  mrg           if (choice[i].no_time)
    767      1.1  mrg             {
    768      1.1  mrg               fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
    769      1.1  mrg             }
    770      1.1  mrg           else
    771      1.1  mrg             {if (option_unit == UNIT_CYCLESPERLIMB
    772      1.1  mrg                  || (option_cmp == CMP_RATIO && i > 0))
    773      1.1  mrg                 decimals = 4;
    774      1.1  mrg               else if (option_unit == UNIT_CYCLES)
    775      1.1  mrg                 decimals = 2;
    776      1.1  mrg               else
    777      1.1  mrg                 decimals = 9;
    778      1.1  mrg 
    779      1.1  mrg               sprintf (buf, "%s%.*f%s",
    780      1.1  mrg                        i == fastest ? first_open_fastest : first_open_notfastest,
    781      1.1  mrg                        decimals, choice[i].time, first_close);
    782      1.1  mrg               fprintf (fp, " %*s", COLUMN_WIDTH, buf);
    783      1.1  mrg             }
    784      1.1  mrg         }
    785      1.1  mrg       fprintf (fp, "\n");
    786      1.1  mrg     }
    787      1.1  mrg 
    788      1.1  mrg   TMP_FREE;
    789      1.1  mrg }
    790      1.1  mrg 
    791      1.1  mrg void
    792      1.1  mrg run_all (FILE *fp)
    793      1.1  mrg {
    794      1.1  mrg   mp_size_t  prev_size;
    795      1.1  mrg   int        i;
    796      1.1  mrg   TMP_DECL;
    797      1.1  mrg 
    798      1.1  mrg   TMP_MARK;
    799      1.1  mrg   SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
    800      1.1  mrg   SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
    801      1.1  mrg 
    802      1.1  mrg   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
    803      1.1  mrg   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
    804      1.1  mrg 
    805      1.1  mrg   for (i = 0; i < size_num; i++)
    806      1.1  mrg     {
    807      1.1  mrg       sp.size = size_array[i].start;
    808      1.1  mrg       prev_size = -1;
    809      1.1  mrg       for (;;)
    810      1.1  mrg         {
    811      1.1  mrg           mp_size_t  step;
    812      1.1  mrg 
    813      1.1  mrg           if (option_data == DATA_2FD && sp.size >= 2)
    814      1.1  mrg             sp.xp[sp.size-1] = 2;
    815      1.1  mrg 
    816      1.1  mrg           run_one (fp, &sp, prev_size);
    817      1.1  mrg           prev_size = sp.size;
    818      1.1  mrg 
    819      1.1  mrg           if (option_data == DATA_2FD && sp.size >= 2)
    820      1.1  mrg             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
    821      1.1  mrg 
    822      1.1  mrg           if (option_factor != 0.0)
    823      1.1  mrg             {
    824      1.1  mrg               step = (mp_size_t) (sp.size * option_factor - sp.size);
    825      1.1  mrg               if (step < 1)
    826      1.1  mrg                 step = 1;
    827      1.1  mrg             }
    828      1.1  mrg           else
    829      1.1  mrg             step = 1;
    830      1.1  mrg           if (step < option_step)
    831      1.1  mrg             step = option_step;
    832      1.1  mrg 
    833      1.1  mrg           sp.size += step;
    834      1.1  mrg           if (sp.size > size_array[i].end)
    835      1.1  mrg             break;
    836      1.1  mrg         }
    837      1.1  mrg     }
    838      1.1  mrg 
    839      1.1  mrg   TMP_FREE;
    840      1.1  mrg }
    841      1.1  mrg 
    842      1.1  mrg 
    843      1.1  mrg FILE *
    844      1.1  mrg fopen_for_write (const char *filename)
    845      1.1  mrg {
    846      1.1  mrg   FILE  *fp;
    847      1.1  mrg   if ((fp = fopen (filename, "w")) == NULL)
    848      1.1  mrg     {
    849      1.1  mrg       fprintf (stderr, "Cannot create %s\n", filename);
    850      1.1  mrg       exit(1);
    851      1.1  mrg     }
    852      1.1  mrg   return fp;
    853      1.1  mrg }
    854      1.1  mrg 
    855      1.1  mrg void
    856      1.1  mrg fclose_written (FILE *fp, const char *filename)
    857      1.1  mrg {
    858      1.1  mrg   int  err;
    859      1.1  mrg 
    860      1.1  mrg   err = ferror (fp);
    861      1.1  mrg   err |= fclose (fp);
    862      1.1  mrg 
    863      1.1  mrg   if (err)
    864      1.1  mrg     {
    865      1.1  mrg       fprintf (stderr, "Error writing %s\n", filename);
    866      1.1  mrg       exit(1);
    867      1.1  mrg     }
    868      1.1  mrg }
    869      1.1  mrg 
    870      1.1  mrg 
    871      1.1  mrg void
    872      1.1  mrg run_gnuplot (int argc, char *argv[])
    873      1.1  mrg {
    874      1.1  mrg   char  *plot_filename;
    875      1.1  mrg   char  *data_filename;
    876      1.1  mrg   FILE  *fp;
    877      1.1  mrg   int   i;
    878      1.1  mrg 
    879      1.1  mrg   plot_filename = (char *) (*__gmp_allocate_func)
    880      1.1  mrg     (strlen (option_gnuplot_basename) + 20);
    881      1.1  mrg   data_filename = (char *) (*__gmp_allocate_func)
    882      1.1  mrg     (strlen (option_gnuplot_basename) + 20);
    883      1.1  mrg 
    884      1.1  mrg   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
    885      1.1  mrg   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
    886      1.1  mrg 
    887      1.1  mrg   fp = fopen_for_write (plot_filename);
    888      1.1  mrg 
    889      1.1  mrg   fprintf (fp, "# Generated with:\n");
    890      1.1  mrg   fprintf (fp, "#");
    891      1.1  mrg   for (i = 0; i < argc; i++)
    892      1.1  mrg     fprintf (fp, " %s", argv[i]);
    893      1.1  mrg   fprintf (fp, "\n");
    894      1.1  mrg   fprintf (fp, "\n");
    895      1.1  mrg 
    896      1.1  mrg   fprintf (fp, "reset\n");
    897      1.1  mrg 
    898      1.1  mrg   /* Putting the key at the top left is usually good, and you can change it
    899      1.1  mrg      interactively if it's not. */
    900      1.1  mrg   fprintf (fp, "set key left\n");
    901      1.1  mrg 
    902  1.1.1.4  mrg   /* write underscores, not subscripts */
    903  1.1.1.4  mrg   fprintf (fp, "set termoption noenhanced\n");
    904  1.1.1.4  mrg 
    905      1.1  mrg   /* designed to make it possible to see crossovers easily */
    906  1.1.1.2  mrg   fprintf (fp, "set style data lines\n");
    907      1.1  mrg 
    908      1.1  mrg   fprintf (fp, "plot ");
    909      1.1  mrg   for (i = 0; i < num_choices; i++)
    910      1.1  mrg     {
    911      1.1  mrg       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
    912      1.1  mrg       fprintf (fp, " title \"%s\"", choice[i].name);
    913      1.1  mrg 
    914      1.1  mrg       if (i != num_choices-1)
    915      1.1  mrg         fprintf (fp, ", \\");
    916      1.1  mrg       fprintf (fp, "\n");
    917      1.1  mrg     }
    918      1.1  mrg 
    919      1.1  mrg   fprintf (fp, "load \"-\"\n");
    920      1.1  mrg   fclose_written (fp, plot_filename);
    921      1.1  mrg 
    922      1.1  mrg   fp = fopen_for_write (data_filename);
    923      1.1  mrg 
    924      1.1  mrg   /* Unbuffered so you can see where the program was up to if it crashes or
    925      1.1  mrg      you kill it. */
    926      1.1  mrg   setbuf (fp, NULL);
    927      1.1  mrg 
    928      1.1  mrg   run_all (fp);
    929      1.1  mrg   fclose_written (fp, data_filename);
    930      1.1  mrg }
    931      1.1  mrg 
    932      1.1  mrg 
    933      1.1  mrg /* Return a limb with n many one bits (starting from the least significant) */
    934      1.1  mrg 
    935      1.1  mrg #define LIMB_ONES(n) \
    936      1.1  mrg   ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
    937      1.1  mrg     : (n) == 0 ? CNST_LIMB(0)                   \
    938      1.1  mrg     : (CNST_LIMB(1) << (n)) - 1)
    939      1.1  mrg 
    940      1.1  mrg mp_limb_t
    941      1.1  mrg r_string (const char *s)
    942      1.1  mrg {
    943      1.1  mrg   const char  *s_orig = s;
    944      1.1  mrg   long        n;
    945      1.1  mrg 
    946      1.1  mrg   if (strcmp (s, "aas") == 0)
    947      1.1  mrg     return GMP_NUMB_0xAA;
    948      1.1  mrg 
    949      1.1  mrg   {
    950      1.1  mrg     mpz_t      z;
    951      1.1  mrg     mp_limb_t  l;
    952      1.1  mrg     int        set, siz;
    953      1.1  mrg 
    954      1.1  mrg     mpz_init (z);
    955      1.1  mrg     set = mpz_set_str (z, s, 0);
    956      1.1  mrg     siz = SIZ(z);
    957      1.1  mrg     l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
    958      1.1  mrg     mpz_clear (z);
    959      1.1  mrg     if (set == 0)
    960      1.1  mrg       {
    961      1.1  mrg         if (siz > 1 || siz < -1)
    962      1.1  mrg           printf ("Warning, r parameter %s truncated to %d bits\n",
    963      1.1  mrg                   s_orig, GMP_LIMB_BITS);
    964      1.1  mrg         return l;
    965      1.1  mrg       }
    966      1.1  mrg   }
    967      1.1  mrg 
    968      1.1  mrg   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
    969      1.1  mrg     n = strtoul (s+2, (char **) &s, 16);
    970      1.1  mrg   else
    971      1.1  mrg     n = strtol (s, (char **) &s, 10);
    972      1.1  mrg 
    973      1.1  mrg   if (strcmp (s, "bits") == 0)
    974      1.1  mrg     {
    975      1.1  mrg       mp_limb_t  l;
    976      1.1  mrg       if (n > GMP_LIMB_BITS)
    977      1.1  mrg         {
    978      1.1  mrg           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
    979      1.1  mrg                    n, GMP_LIMB_BITS);
    980      1.1  mrg           exit (1);
    981      1.1  mrg         }
    982      1.1  mrg       mpn_random (&l, 1);
    983      1.1  mrg       return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
    984      1.1  mrg     }
    985      1.1  mrg   else  if (strcmp (s, "ones") == 0)
    986      1.1  mrg     {
    987      1.1  mrg       if (n > GMP_LIMB_BITS)
    988      1.1  mrg         {
    989      1.1  mrg           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
    990      1.1  mrg                    n, GMP_LIMB_BITS);
    991      1.1  mrg           exit (1);
    992      1.1  mrg         }
    993      1.1  mrg       return LIMB_ONES (n);
    994      1.1  mrg     }
    995      1.1  mrg   else if (*s != '\0')
    996      1.1  mrg     {
    997      1.1  mrg       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
    998      1.1  mrg       exit (1);
    999      1.1  mrg     }
   1000      1.1  mrg 
   1001      1.1  mrg   return n;
   1002      1.1  mrg }
   1003      1.1  mrg 
   1004      1.1  mrg 
   1005      1.1  mrg void
   1006      1.1  mrg routine_find (struct choice_t *c, const char *s_orig)
   1007      1.1  mrg {
   1008      1.1  mrg   const char  *s;
   1009      1.1  mrg   int     i;
   1010      1.1  mrg   size_t  nlen;
   1011      1.1  mrg 
   1012      1.1  mrg   c->name = s_orig;
   1013      1.1  mrg   s = strchr (s_orig, '*');
   1014      1.1  mrg   if (s != NULL)
   1015      1.1  mrg     {
   1016      1.1  mrg       c->scale = atof(s_orig);
   1017      1.1  mrg       s++;
   1018      1.1  mrg     }
   1019      1.1  mrg   else
   1020      1.1  mrg     {
   1021      1.1  mrg       c->scale = 1.0;
   1022      1.1  mrg       s = s_orig;
   1023      1.1  mrg     }
   1024      1.1  mrg 
   1025      1.1  mrg   for (i = 0; i < numberof (routine); i++)
   1026      1.1  mrg     {
   1027      1.1  mrg       nlen = strlen (routine[i].name);
   1028      1.1  mrg       if (memcmp (s, routine[i].name, nlen) != 0)
   1029      1.1  mrg         continue;
   1030      1.1  mrg 
   1031      1.1  mrg       if (s[nlen] == '.')
   1032      1.1  mrg         {
   1033      1.1  mrg           /* match, with a .r parameter */
   1034      1.1  mrg 
   1035      1.1  mrg           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
   1036      1.1  mrg             {
   1037      1.1  mrg               fprintf (stderr,
   1038      1.1  mrg                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
   1039      1.1  mrg                        s_orig);
   1040      1.1  mrg               exit (1);
   1041      1.1  mrg             }
   1042      1.1  mrg 
   1043      1.1  mrg           c->p = &routine[i];
   1044      1.1  mrg           c->r = r_string (s + nlen + 1);
   1045      1.1  mrg           return;
   1046      1.1  mrg         }
   1047      1.1  mrg 
   1048      1.1  mrg       if (s[nlen] == '\0')
   1049      1.1  mrg         {
   1050      1.1  mrg           /* match, with no parameter */
   1051      1.1  mrg 
   1052      1.1  mrg           if (routine[i].flag & FLAG_R)
   1053      1.1  mrg             {
   1054      1.1  mrg               fprintf (stderr,
   1055      1.1  mrg                        "Choice %s bad: needs a \".<r>\" parameter\n",
   1056      1.1  mrg                        s_orig);
   1057      1.1  mrg               exit (1);
   1058      1.1  mrg             }
   1059      1.1  mrg 
   1060      1.1  mrg           c->p = &routine[i];
   1061      1.1  mrg           c->r = 0;
   1062      1.1  mrg           return;
   1063      1.1  mrg         }
   1064      1.1  mrg     }
   1065      1.1  mrg 
   1066      1.1  mrg   fprintf (stderr, "Choice %s unrecognised\n", s_orig);
   1067      1.1  mrg   exit (1);
   1068      1.1  mrg }
   1069      1.1  mrg 
   1070      1.1  mrg 
   1071      1.1  mrg void
   1072      1.1  mrg usage (void)
   1073      1.1  mrg {
   1074      1.1  mrg   int  i;
   1075      1.1  mrg 
   1076      1.1  mrg   speed_time_init ();
   1077      1.1  mrg 
   1078      1.1  mrg   printf ("Usage: speed [-options] -s size <routine>...\n");
   1079      1.1  mrg   printf ("Measure the speed of some routines.\n");
   1080      1.1  mrg   printf ("Times are in seconds, accuracy is shown.\n");
   1081      1.1  mrg   printf ("\n");
   1082      1.1  mrg   printf ("   -p num     set precision as number of time units each routine must run\n");
   1083      1.1  mrg   printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
   1084      1.1  mrg   printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
   1085      1.1  mrg   printf ("   -t step    step through sizes by given amount\n");
   1086      1.1  mrg   printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
   1087      1.1  mrg   printf ("   -r         show times as ratios of the first routine\n");
   1088      1.1  mrg   printf ("   -d         show times as difference from the first routine\n");
   1089      1.1  mrg   printf ("   -D         show times as difference from previous size shown\n");
   1090      1.1  mrg   printf ("   -c         show times in CPU cycles\n");
   1091      1.1  mrg   printf ("   -C         show times in cycles per limb\n");
   1092      1.1  mrg   printf ("   -u         print resource usage (memory) at end\n");
   1093      1.1  mrg   printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
   1094      1.1  mrg   printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
   1095      1.1  mrg   printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
   1096      1.1  mrg   printf ("   -o addrs   print addresses of data blocks\n");
   1097      1.1  mrg   printf ("\n");
   1098      1.1  mrg   printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
   1099      1.1  mrg   printf ("is greater.\n");
   1100      1.1  mrg   printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
   1101      1.1  mrg   printf ("size and the previous size.\n");
   1102      1.1  mrg   printf ("\n");
   1103      1.1  mrg   printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
   1104      1.1  mrg   printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
   1105      1.1  mrg   printf ("a log/log plot).\n");
   1106      1.1  mrg   printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
   1107      1.1  mrg   printf ("when viewing more than one routine, it means same axis scales for all data).\n");
   1108      1.1  mrg   printf ("\n");
   1109      1.1  mrg   printf ("The available routines are as follows.\n");
   1110      1.1  mrg   printf ("\n");
   1111      1.1  mrg 
   1112      1.1  mrg   for (i = 0; i < numberof (routine); i++)
   1113      1.1  mrg     {
   1114      1.1  mrg       if (routine[i].flag & FLAG_R)
   1115      1.1  mrg         printf ("\t%s.r\n", routine[i].name);
   1116      1.1  mrg       else if (routine[i].flag & FLAG_R_OPTIONAL)
   1117      1.1  mrg         printf ("\t%s (optional .r)\n", routine[i].name);
   1118      1.1  mrg       else
   1119      1.1  mrg         printf ("\t%s\n", routine[i].name);
   1120      1.1  mrg     }
   1121      1.1  mrg   printf ("\n");
   1122      1.1  mrg   printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
   1123      1.1  mrg   printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
   1124      1.1  mrg   printf ("\n");
   1125      1.1  mrg   printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
   1126      1.1  mrg   printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
   1127      1.1  mrg   printf ("\n");
   1128      1.1  mrg   printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
   1129      1.1  mrg   printf ("The fastest routine at each size is marked with a # (free form output only).\n");
   1130      1.1  mrg   printf ("\n");
   1131      1.1  mrg   printf ("%s", speed_time_string);
   1132      1.1  mrg   printf ("\n");
   1133      1.1  mrg   printf ("Gnuplot home page http://www.gnuplot.info/\n");
   1134      1.1  mrg   printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
   1135      1.1  mrg }
   1136      1.1  mrg 
   1137      1.1  mrg void
   1138      1.1  mrg check_align_option (const char *name, mp_size_t align)
   1139      1.1  mrg {
   1140      1.1  mrg   if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
   1141      1.1  mrg     {
   1142      1.1  mrg       fprintf (stderr, "Alignment request out of range: %s %ld\n",
   1143      1.1  mrg                name, (long) align);
   1144      1.1  mrg       fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
   1145      1.1  mrg                SPEED_TMP_ALLOC_ADJUST_MASK);
   1146      1.1  mrg       exit (1);
   1147      1.1  mrg     }
   1148      1.1  mrg }
   1149      1.1  mrg 
   1150      1.1  mrg int
   1151      1.1  mrg main (int argc, char *argv[])
   1152      1.1  mrg {
   1153      1.1  mrg   int  i;
   1154      1.1  mrg   int  opt;
   1155      1.1  mrg 
   1156      1.1  mrg   /* Unbuffered so output goes straight out when directed to a pipe or file
   1157      1.1  mrg      and isn't lost on killing the program half way.  */
   1158      1.1  mrg   setbuf (stdout, NULL);
   1159      1.1  mrg 
   1160      1.1  mrg   for (;;)
   1161      1.1  mrg     {
   1162      1.1  mrg       opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
   1163      1.1  mrg       if (opt == EOF)
   1164      1.1  mrg         break;
   1165      1.1  mrg 
   1166      1.1  mrg       switch (opt) {
   1167      1.1  mrg       case 'a':
   1168      1.1  mrg         if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
   1169      1.1  mrg         else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
   1170      1.1  mrg         else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
   1171      1.1  mrg         else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
   1172      1.1  mrg         else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
   1173      1.1  mrg         else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
   1174      1.1  mrg         else
   1175      1.1  mrg           {
   1176      1.1  mrg             fprintf (stderr, "unrecognised data option: %s\n", optarg);
   1177      1.1  mrg             exit (1);
   1178      1.1  mrg           }
   1179      1.1  mrg         break;
   1180      1.1  mrg       case 'C':
   1181      1.1  mrg         if (option_unit  != UNIT_SECONDS) goto bad_unit;
   1182      1.1  mrg         option_unit = UNIT_CYCLESPERLIMB;
   1183      1.1  mrg         break;
   1184      1.1  mrg       case 'c':
   1185      1.1  mrg         if (option_unit != UNIT_SECONDS)
   1186      1.1  mrg           {
   1187      1.1  mrg           bad_unit:
   1188      1.1  mrg             fprintf (stderr, "cannot use more than one of -c, -C\n");
   1189      1.1  mrg             exit (1);
   1190      1.1  mrg           }
   1191      1.1  mrg         option_unit = UNIT_CYCLES;
   1192      1.1  mrg         break;
   1193      1.1  mrg       case 'D':
   1194      1.1  mrg         if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
   1195      1.1  mrg         option_cmp = CMP_DIFFPREV;
   1196      1.1  mrg         break;
   1197      1.1  mrg       case 'd':
   1198      1.1  mrg         if (option_cmp != CMP_ABSOLUTE)
   1199      1.1  mrg           {
   1200      1.1  mrg           bad_cmp:
   1201      1.1  mrg             fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
   1202      1.1  mrg             exit (1);
   1203      1.1  mrg           }
   1204      1.1  mrg         option_cmp = CMP_DIFFERENCE;
   1205      1.1  mrg         break;
   1206      1.1  mrg       case 'E':
   1207      1.1  mrg         option_square = 1;
   1208      1.1  mrg         break;
   1209      1.1  mrg       case 'F':
   1210      1.1  mrg         option_square = 2;
   1211      1.1  mrg         break;
   1212      1.1  mrg       case 'f':
   1213      1.1  mrg         option_factor = atof (optarg);
   1214      1.1  mrg         if (option_factor <= 1.0)
   1215      1.1  mrg           {
   1216      1.1  mrg             fprintf (stderr, "-f factor must be > 1.0\n");
   1217      1.1  mrg             exit (1);
   1218      1.1  mrg           }
   1219      1.1  mrg         break;
   1220      1.1  mrg       case 'o':
   1221      1.1  mrg         speed_option_set (optarg);
   1222      1.1  mrg         break;
   1223      1.1  mrg       case 'P':
   1224      1.1  mrg         option_gnuplot = 1;
   1225      1.1  mrg         option_gnuplot_basename = optarg;
   1226      1.1  mrg         break;
   1227      1.1  mrg       case 'p':
   1228      1.1  mrg         speed_precision = atoi (optarg);
   1229      1.1  mrg         break;
   1230      1.1  mrg       case 'R':
   1231      1.1  mrg         option_seed = time (NULL);
   1232      1.1  mrg         break;
   1233      1.1  mrg       case 'r':
   1234      1.1  mrg         if (option_cmp != CMP_ABSOLUTE)
   1235      1.1  mrg           goto bad_cmp;
   1236      1.1  mrg         option_cmp = CMP_RATIO;
   1237      1.1  mrg         break;
   1238      1.1  mrg       case 's':
   1239      1.1  mrg         {
   1240      1.1  mrg           char  *s;
   1241      1.1  mrg           for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
   1242      1.1  mrg             {
   1243      1.1  mrg               if (size_num == size_allocnum)
   1244      1.1  mrg                 {
   1245      1.1  mrg                   size_array = (struct size_array_t *)
   1246      1.1  mrg                     __gmp_allocate_or_reallocate
   1247      1.1  mrg                     (size_array,
   1248      1.1  mrg                      size_allocnum * sizeof(size_array[0]),
   1249      1.1  mrg                      (size_allocnum+10) * sizeof(size_array[0]));
   1250      1.1  mrg                   size_allocnum += 10;
   1251      1.1  mrg                 }
   1252      1.1  mrg               if (sscanf (s, "%ld-%ld",
   1253      1.1  mrg                           &size_array[size_num].start,
   1254      1.1  mrg                           &size_array[size_num].end) != 2)
   1255      1.1  mrg                 {
   1256      1.1  mrg                   size_array[size_num].start = size_array[size_num].end
   1257      1.1  mrg                     = atol (s);
   1258      1.1  mrg                 }
   1259      1.1  mrg 
   1260      1.1  mrg               if (size_array[size_num].start < 0
   1261      1.1  mrg                   || size_array[size_num].end < 0
   1262      1.1  mrg                   || size_array[size_num].start > size_array[size_num].end)
   1263      1.1  mrg                 {
   1264      1.1  mrg                   fprintf (stderr, "invalid size parameter: %s\n", s);
   1265      1.1  mrg                   exit (1);
   1266      1.1  mrg                 }
   1267      1.1  mrg 
   1268      1.1  mrg               size_num++;
   1269      1.1  mrg             }
   1270      1.1  mrg         }
   1271      1.1  mrg         break;
   1272      1.1  mrg       case 't':
   1273      1.1  mrg         option_step = atol (optarg);
   1274      1.1  mrg         if (option_step < 1)
   1275      1.1  mrg           {
   1276      1.1  mrg             fprintf (stderr, "-t step must be >= 1\n");
   1277      1.1  mrg             exit (1);
   1278      1.1  mrg           }
   1279      1.1  mrg         break;
   1280      1.1  mrg       case 'u':
   1281      1.1  mrg         option_resource_usage = 1;
   1282      1.1  mrg         break;
   1283      1.1  mrg       case 'z':
   1284      1.1  mrg         sp.cache = 1;
   1285      1.1  mrg         break;
   1286      1.1  mrg       case 'x':
   1287      1.1  mrg         sp.align_xp = atol (optarg);
   1288      1.1  mrg         check_align_option ("-x", sp.align_xp);
   1289      1.1  mrg         break;
   1290      1.1  mrg       case 'y':
   1291      1.1  mrg         sp.align_yp = atol (optarg);
   1292      1.1  mrg         check_align_option ("-y", sp.align_yp);
   1293      1.1  mrg         break;
   1294      1.1  mrg       case 'w':
   1295      1.1  mrg         sp.align_wp = atol (optarg);
   1296      1.1  mrg         check_align_option ("-w", sp.align_wp);
   1297      1.1  mrg         break;
   1298      1.1  mrg       case 'W':
   1299      1.1  mrg         sp.align_wp2 = atol (optarg);
   1300      1.1  mrg         check_align_option ("-W", sp.align_wp2);
   1301      1.1  mrg         break;
   1302      1.1  mrg       case '?':
   1303      1.1  mrg         exit(1);
   1304      1.1  mrg       }
   1305      1.1  mrg     }
   1306      1.1  mrg 
   1307      1.1  mrg   if (optind >= argc)
   1308      1.1  mrg     {
   1309      1.1  mrg       usage ();
   1310      1.1  mrg       exit (1);
   1311      1.1  mrg     }
   1312      1.1  mrg 
   1313      1.1  mrg   if (size_num == 0)
   1314      1.1  mrg     {
   1315      1.1  mrg       fprintf (stderr, "-s <size> must be specified\n");
   1316      1.1  mrg       exit (1);
   1317      1.1  mrg     }
   1318      1.1  mrg 
   1319      1.1  mrg   gmp_randinit_default (__gmp_rands);
   1320      1.1  mrg   __gmp_rands_initialized = 1;
   1321      1.1  mrg   gmp_randseed_ui (__gmp_rands, option_seed);
   1322      1.1  mrg 
   1323      1.1  mrg   choice = (struct choice_t *) (*__gmp_allocate_func)
   1324      1.1  mrg     ((argc - optind) * sizeof(choice[0]));
   1325      1.1  mrg   for ( ; optind < argc; optind++)
   1326      1.1  mrg     {
   1327      1.1  mrg       struct choice_t  c;
   1328      1.1  mrg       routine_find (&c, argv[optind]);
   1329      1.1  mrg       choice[num_choices] = c;
   1330      1.1  mrg       num_choices++;
   1331      1.1  mrg     }
   1332      1.1  mrg 
   1333      1.1  mrg   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
   1334      1.1  mrg       num_choices < 2)
   1335      1.1  mrg     {
   1336      1.1  mrg       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
   1337      1.1  mrg     }
   1338      1.1  mrg 
   1339      1.1  mrg   speed_time_init ();
   1340      1.1  mrg   if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
   1341      1.1  mrg     speed_cycletime_need_cycles ();
   1342      1.1  mrg   else
   1343      1.1  mrg     speed_cycletime_need_seconds ();
   1344      1.1  mrg 
   1345      1.1  mrg   if (option_gnuplot)
   1346      1.1  mrg     {
   1347      1.1  mrg       run_gnuplot (argc, argv);
   1348      1.1  mrg     }
   1349      1.1  mrg   else
   1350      1.1  mrg     {
   1351      1.1  mrg       if (option_unit == UNIT_SECONDS)
   1352      1.1  mrg         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
   1353      1.1  mrg       else
   1354      1.1  mrg         printf ("overhead %.2f cycles",
   1355      1.1  mrg                 speed_measure (speed_noop, NULL) / speed_cycletime);
   1356      1.1  mrg       printf (", precision %d units of %.2e secs",
   1357      1.1  mrg               speed_precision, speed_unittime);
   1358      1.1  mrg 
   1359      1.1  mrg       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
   1360      1.1  mrg         printf (", CPU freq unknown\n");
   1361      1.1  mrg       else
   1362      1.1  mrg         printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
   1363      1.1  mrg 
   1364      1.1  mrg       printf ("       ");
   1365      1.1  mrg       for (i = 0; i < num_choices; i++)
   1366      1.1  mrg         printf (" %*s", COLUMN_WIDTH, choice[i].name);
   1367      1.1  mrg       printf ("\n");
   1368      1.1  mrg 
   1369      1.1  mrg       run_all (stdout);
   1370      1.1  mrg     }
   1371      1.1  mrg 
   1372      1.1  mrg   if (option_resource_usage)
   1373      1.1  mrg     {
   1374      1.1  mrg #if HAVE_GETRUSAGE
   1375      1.1  mrg       {
   1376      1.1  mrg         /* This doesn't give data sizes on linux 2.0.x, only utime. */
   1377      1.1  mrg         struct rusage  r;
   1378      1.1  mrg         if (getrusage (RUSAGE_SELF, &r) != 0)
   1379      1.1  mrg           perror ("getrusage");
   1380      1.1  mrg         else
   1381      1.1  mrg           printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
   1382  1.1.1.5  mrg                   (long) r.ru_utime.tv_sec, (long) r.ru_utime.tv_usec,
   1383      1.1  mrg                   r.ru_idrss, r.ru_isrss, r.ru_ixrss);
   1384      1.1  mrg       }
   1385      1.1  mrg #else
   1386      1.1  mrg       printf ("getrusage() not available\n");
   1387      1.1  mrg #endif
   1388      1.1  mrg 
   1389      1.1  mrg       /* Linux kernel. */
   1390      1.1  mrg       {
   1391      1.1  mrg         char  buf[128];
   1392      1.1  mrg         sprintf (buf, "/proc/%d/status", getpid());
   1393      1.1  mrg         if (access (buf, R_OK) == 0)
   1394      1.1  mrg           {
   1395      1.1  mrg             sprintf (buf, "cat /proc/%d/status", getpid());
   1396      1.1  mrg             system (buf);
   1397      1.1  mrg           }
   1398      1.1  mrg 
   1399      1.1  mrg       }
   1400      1.1  mrg     }
   1401      1.1  mrg 
   1402      1.1  mrg   return 0;
   1403      1.1  mrg }
   1404