Home | History | Annotate | Line # | Download | only in tune
many.pl revision 1.1
      1 #! /usr/bin/perl -w
      2 
      3 # Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
      4 #
      5 # This file is part of the GNU MP Library.
      6 #
      7 # The GNU MP Library is free software; you can redistribute it and/or modify
      8 # it under the terms of the GNU Lesser General Public License as published by
      9 # the Free Software Foundation; either version 3 of the License, or (at your
     10 # option) any later version.
     11 #
     12 # The GNU MP Library is distributed in the hope that it will be useful, but
     13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     15 # License for more details.
     16 #
     17 # You should have received a copy of the GNU Lesser General Public License
     18 # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     19 
     20 
     21 # Usage:  cd $builddir/tune
     22 #	  perl $srcdir/tune/many.pl [-t] <files/dirs>...
     23 #
     24 # Output: speed-many.c
     25 #         try-many.c
     26 #         Makefile.many
     27 #
     28 # Make alternate versions of various mpn routines available for measuring
     29 # and testing.
     30 #
     31 # The $srcdir and $builddir in the invocation above just means the script
     32 # lives in the tune source directory, but should be run in the tune build
     33 # directory.  When not using a separate object directory this just becomes
     34 #
     35 #	cd tune
     36 #	perl many.pl [-t] <files/dirs>...
     37 #
     38 #
     39 # SINGLE FILES
     40 #
     41 # Suppose $HOME/newcode/mul_1_experiment.asm is a new implementation of
     42 # mpn_mul_1, then
     43 #
     44 #	cd $builddir/tune
     45 #	perl $srcdir/tune/many.pl $HOME/newcode/mul_1_experiment.asm
     46 #
     47 # will produce rules and renaming so that a speed program incorporating it
     48 # can be built,
     49 #
     50 #	make -f Makefile.many speed-many
     51 #
     52 # then for example it can be compared to the standard mul_1,
     53 #
     54 #	./speed-many -s 1-30 mpn_mul_1 mpn_mul_1_experiment
     55 #
     56 # An expanded try program can be used to check correctness,
     57 #
     58 #	make -f Makefile.many try-many
     59 #
     60 # and run
     61 #
     62 #	./try-many mpn_mul_1_experiment
     63 #
     64 # Files can be ".c", ".S" or ".asm".  ".s" files can't be used because they
     65 # don't get any preprocessing so there's no way to do renaming of their
     66 # functions.
     67 #
     68 #
     69 # WHOLE DIRECTORIES
     70 #
     71 # If a directory is given, then all files in it will be made available.
     72 # For example,
     73 #
     74 #	cd $builddir/tune
     75 #	perl $srcdir/tune/many.pl $HOME/newcode
     76 #
     77 # Each file should have a suffix, like "_experiment" above.
     78 #
     79 #
     80 # MPN DIRECTORIES
     81 #
     82 # mpn directories from the GMP source tree can be included, and this is a
     83 # convenient way to compare multiple implementations suiting different chips
     84 # in a CPU family.  For example the following would make all x86 routines
     85 # available,
     86 #
     87 #	cd $builddir/tune
     88 #	perl $srcdir/tune/many.pl `find $srcdir/mpn/x86 -type d`
     89 #
     90 # On a new x86 chip a comparison could then be made to see how existing code
     91 # runs.  For example,
     92 #
     93 #	make -f Makefile.many speed-many
     94 #	./speed-many -s 1-30 -c \
     95 #		mpn_add_n_x86 mpn_add_n_pentium mpn_add_n_k6 mpn_add_n_k7
     96 #
     97 # Files in "mpn" subdirectories don't need the "_experiment" style suffix
     98 # described above, instead a suffix is constructed from the subdirectory.
     99 # For example "mpn/x86/k7/mmx/mod_1.asm" will generate a function
    100 # mpn_mod_1_k7_mmx.  The rule is to take the last directory name after the
    101 # "mpn", or the last two if there's three or more.  (Check the generated
    102 # speed-many.c if in doubt.)
    103 #
    104 #
    105 # GENERIC C
    106 #
    107 # The mpn/generic directory can be included too, just like any processor
    108 # specific directory.  This is a good way to compare assembler and generic C
    109 # implementations.  For example,
    110 #
    111 #	cd $builddir/tune
    112 #	perl $srcdir/tune/many.pl $srcdir/mpn/generic
    113 #
    114 # or if just a few routines are of interest, then for example
    115 #
    116 #	cd $builddir/tune
    117 #	perl $srcdir/tune/many.pl \
    118 #		$srcdir/mpn/generic/lshift.c \
    119 #		$srcdir/mpn/generic/mod_1.c \
    120 #		$srcdir/mpn/generic/aorsmul_1.c
    121 #
    122 # giving mpn_lshift_generic etc.
    123 #
    124 #
    125 # TESTS/DEVEL PROGRAMS
    126 #
    127 # Makefile.many also has rules to build the tests/devel programs with suitable
    128 # renaming, and with some parameters for correctness or speed.  This is less
    129 # convenient than the speed and try programs, but provides an independent
    130 # check.  For example,
    131 #
    132 #	make -f Makefile.many tests_mul_1_experimental
    133 #	./tests_mul_1_experimental
    134 #
    135 # and for speed
    136 #
    137 #	make -f Makefile.many tests_mul_1_experimental_sp
    138 #	./tests_mul_1_experimental_sp
    139 #
    140 # Not all the programs support speed measuring, in which case only the
    141 # correctness test will be useful.
    142 #
    143 # The parameters for repetitions and host clock speed are -D defines.  Some
    144 # defaults are provided at the end of Makefile.many, but probably these will
    145 # want to be overridden.  For example,
    146 #
    147 #	rm tests_mul_1_experimental.o
    148 #	make -f Makefile.many \
    149 #	   CFLAGS_TESTS="-DSIZE=50 -DTIMES=1000 -DRANDOM -DCLOCK=175000000" \
    150 #	   tests_mul_1_experimental
    151 #	./tests_mul_1_experimental
    152 #
    153 #
    154 # OTHER NOTES
    155 #
    156 # The mappings of file names to functions, and the macros to then use for
    157 # speed measuring etc are driven by @table below.  The scheme isn't
    158 # completely general, it's only got as many variations as have been needed
    159 # so far.
    160 #
    161 # Some functions are only made available in speed-many, or others only in
    162 # try-many.  An @table entry speed=>none means no speed measuring is
    163 # available, or try=>none no try program testing.  These can be removed
    164 # if/when the respective programs get the necessary support.
    165 #
    166 # If a file has "1c" or "nc" carry-in entrypoints, they're renamed and made
    167 # available too.  These are recognised from PROLOGUE or MULFUNC_PROLOGUE in
    168 # .S and .asm files, or from a line starting with "mpn_foo_1c" in a .c file
    169 # (possibly via a #define), and on that basis are entirely optional.  This
    170 # entrypoint matching is done for the standard entrypoints too, but it would
    171 # be very unusual to have for instance a mul_1c without a mul_1.
    172 #
    173 # Some mpz files are recognized.  For example an experimental copy of
    174 # mpz/powm.c could be included as powm_new.c and would be called
    175 # mpz_powm_new.  So far only speed measuring is available for these.
    176 #
    177 # For the ".S" and ".asm" files, both PIC and non-PIC objects are built.
    178 # The PIC functions have a "_pic" suffix, for example "mpn_mod_1_k7_mmx_pic".
    179 # This can be ignored for routines that don't differ for PIC, or for CPUs
    180 # where everything is PIC anyway.
    181 #
    182 # K&R compilers are supported via the same ansi2knr mechanism used by
    183 # automake, though it's hard to believe anyone will have much interest in
    184 # measuring a compiler so old that it doesn't even have an ANSI mode.
    185 #
    186 # The "-t" option can be used to print a trace of the files found and what's
    187 # done with them.  A great deal of obscure output is produced, but it can
    188 # indicate where or why some files aren't being recognised etc.  For
    189 # example,
    190 #
    191 #	cd $builddir/tune
    192 #	perl $srcdir/tune/many.pl -t $HOME/newcode/add_n_weird.asm
    193 #
    194 # In general, when including new code, all that's really necessary is that
    195 # it will compile or assemble under the current configuration.  It's fine if
    196 # some code doesn't actually run due to bugs, or to needing a newer CPU or
    197 # whatever, simply don't ask for the offending routines when invoking
    198 # speed-many or try-many, or don't try to run them on sizes they don't yet
    199 # support, or whatever.
    200 #
    201 #
    202 # CPU SPECIFICS
    203 #
    204 # x86 - All the x86 code will assemble on any system, but code for newer
    205 #       chips might not run on older chips.  Expect SIGILLs from new
    206 #       instructions on old chips.
    207 #
    208 #       A few "new" instructions, like cmov for instance, are done as macros
    209 #       and will generate some equivalent plain i386 code when HAVE_HOST_CPU
    210 #       in config.m4 indicates an old CPU.  It won't run fast, but it does
    211 #       make it possible to test correctness.
    212 #
    213 #
    214 # INTERNALS
    215 #
    216 # The nonsense involving $ENV is some hooks used during development to add
    217 # additional functions temporarily.
    218 #
    219 #
    220 # FUTURE
    221 #
    222 # Maybe the C files should be compiled pic and non-pic too.  Wait until
    223 # there's a difference that might be of interest.
    224 #
    225 # Warn if a file provides no functions.
    226 #
    227 # Allow mpz and mpn files of the same name.  Currently the mpn fib2_ui
    228 # matching hides the mpz version of that.  Will need to check the file
    229 # contents to see which it is.  Would be worth allowing an "mpz_" or "mpn_"
    230 # prefix on the filenames to have working versions of both in one directory.
    231 #
    232 #
    233 # LIMITATIONS
    234 #
    235 # Some of the command lines can become very long when a lot of files are
    236 # included.  If this is a problem on a given system the only suggestion is
    237 # to run many.pl for just those that are actually wanted at a particular
    238 # time.
    239 #
    240 # DOS 8.3 or SysV 14 char filesystems won't work, since the long filenames
    241 # generated will almost certainly fail to be unique.
    242 
    243 
    244 use strict;
    245 use File::Basename;
    246 use Getopt::Std;
    247 
    248 my %opt;
    249 getopts('t', \%opt);
    250 
    251 my @DIRECTORIES = @ARGV;
    252 if (defined $ENV{directories}) { push @DIRECTORIES, @{$ENV{directories}} }
    253 
    254 
    255 # regexp - matched against the start of the filename.  If a grouping "(...)"
    256 #          is present then only the first such part is used.
    257 #
    258 # mulfunc - filenames to be generated from a multi-function file.
    259 #
    260 # funs - functions provided by the file, defaulting to the filename with mpn
    261 #          (or mpX).
    262 #
    263 # mpX - prefix like "mpz", defaulting to "mpn".
    264 #
    265 # ret - return value type.
    266 #
    267 # args, args_<fun> - arguments for the given function.  If an args_<fun> is
    268 #          set then it's used, otherwise plain args is used.  "mp_limb_t
    269 #          carry" is appended for carry-in variants.
    270 #
    271 # try - try.c TYPE_ to use, defaulting to TYPE_fun with the function name
    272 #          in upper case.  "C" is appended for carry-in variants.  Can be
    273 #          'none' for no try program entry.
    274 #
    275 # speed - SPEED_ROUTINE_ to use, handled like "try".
    276 #
    277 # speed_flags - SPEED_ROUTINE_ to use, handled like "try".
    278 
    279 
    280 my @table =
    281     (
    282      {
    283        'regexp'=> 'add_n|sub_n|addlsh1_n|sublsh1_n|rsh1add_n|rsh1sub_n',
    284        'ret'   => 'mp_limb_t',
    285        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    286        'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
    287        'speed_flags'=> 'FLAG_R_OPTIONAL',
    288      },
    289      {
    290        'regexp'=> 'aors_n',
    291        'mulfunc'=> ['add_n','sub_n'],
    292        'ret'   => 'mp_limb_t',
    293        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    294        'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
    295        'speed_flags'=> 'FLAG_R_OPTIONAL',
    296      },
    297 
    298      {
    299        'regexp'=> 'addmul_1|submul_1',
    300        'ret'   => 'mp_limb_t',
    301        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
    302        'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
    303        'speed_flags'=> 'FLAG_R',
    304      },
    305      {
    306        'regexp'=> 'aorsmul_1',
    307        'mulfunc'=> ['addmul_1','submul_1'],
    308        'ret'   => 'mp_limb_t',
    309        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
    310        'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
    311        'speed_flags'=> 'FLAG_R',
    312      },
    313 
    314      {
    315        'regexp'=> 'addmul_2|submul_2',
    316        'ret'   => 'mp_limb_t',
    317        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    318        'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
    319        'speed_flags'=> 'FLAG_R_OPTIONAL',
    320        'try-minsize' => 2,
    321      },
    322      {
    323        'regexp'=> 'addmul_3|submul_3',
    324        'ret'   => 'mp_limb_t',
    325        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    326        'speed' => 'SPEED_ROUTINE_MPN_UNARY_3',
    327        'speed_flags'=> 'FLAG_R_OPTIONAL',
    328        'try-minsize' => 3,
    329      },
    330      {
    331        'regexp'=> 'addmul_4|submul_4',
    332        'ret'   => 'mp_limb_t',
    333        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    334        'speed' => 'SPEED_ROUTINE_MPN_UNARY_4',
    335        'speed_flags'=> 'FLAG_R_OPTIONAL',
    336        'try-minsize' => 4,
    337      },
    338      {
    339        'regexp'=> 'addmul_5|submul_5',
    340        'ret'   => 'mp_limb_t',
    341        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    342        'speed' => 'SPEED_ROUTINE_MPN_UNARY_5',
    343        'speed_flags'=> 'FLAG_R_OPTIONAL',
    344        'try-minsize' => 5,
    345      },
    346      {
    347        'regexp'=> 'addmul_6|submul_6',
    348        'ret'   => 'mp_limb_t',
    349        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    350        'speed' => 'SPEED_ROUTINE_MPN_UNARY_6',
    351        'speed_flags'=> 'FLAG_R_OPTIONAL',
    352        'try-minsize' => 6,
    353      },
    354      {
    355        'regexp'=> 'addmul_7|submul_7',
    356        'ret'   => 'mp_limb_t',
    357        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    358        'speed' => 'SPEED_ROUTINE_MPN_UNARY_7',
    359        'speed_flags'=> 'FLAG_R_OPTIONAL',
    360        'try-minsize' => 7,
    361      },
    362      {
    363        'regexp'=> 'addmul_8|submul_8',
    364        'ret'   => 'mp_limb_t',
    365        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
    366        'speed' => 'SPEED_ROUTINE_MPN_UNARY_8',
    367        'speed_flags'=> 'FLAG_R_OPTIONAL',
    368        'try-minsize' => 8,
    369      },
    370 
    371      {
    372        'regexp'=> 'add_n_sub_n',
    373        'ret'   => 'mp_limb_t',
    374        'args'  => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    375        'speed_flags'=> 'FLAG_R_OPTIONAL',
    376      },
    377 
    378      {
    379        'regexp'=> 'com|copyi|copyd',
    380        'ret'   => 'void',
    381        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
    382        'speed' => 'SPEED_ROUTINE_MPN_COPY',
    383      },
    384 
    385      {
    386        'regexp'=> 'dive_1',
    387        'funs'  => ['divexact_1'],
    388        'ret'   => 'void',
    389        'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor',
    390        'speed_flags'=> 'FLAG_R',
    391      },
    392      {
    393        'regexp'=> 'diveby3',
    394        'funs'  => ['divexact_by3c'],
    395        'ret'   => 'mp_limb_t',
    396        'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size',
    397        'carrys'=> [''],
    398        'speed' => 'SPEED_ROUTINE_MPN_COPY',
    399      },
    400 
    401      # mpn_preinv_divrem_1 is an optional extra entrypoint
    402      {
    403        'regexp'=> 'divrem_1',
    404        'funs'  => ['divrem_1', 'preinv_divrem_1'],
    405        'ret'   => 'mp_limb_t',
    406        'args_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor',
    407        'args_preinv_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift',
    408        'speed_flags'=> 'FLAG_R',
    409        'speed_suffixes' => ['f'],
    410      },
    411      {
    412        'regexp'=> 'pre_divrem_1',
    413        'funs'  => ['preinv_divrem_1'],
    414        'ret'   => 'mp_limb_t',
    415        'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr ap, mp_size_t asize, mp_limb_t divisor, mp_limb_t inverse, int shift',
    416        'speed_flags' => 'FLAG_R',
    417      },
    418 
    419      {
    420        'regexp'=> 'divrem_2',
    421        'ret'   => 'mp_limb_t',
    422        'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr np, mp_size_t nsize, mp_srcptr dp',
    423        'try'   => 'none',
    424      },
    425 
    426      {
    427        'regexp'=> 'sb_divrem_mn',
    428        'ret'   => 'mp_limb_t',
    429        'args'  => 'mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
    430        'speed' => 'SPEED_ROUTINE_MPN_DC_DIVREM_SB',
    431        'try-minsize' => 3,
    432      },
    433      {
    434        'regexp'=> 'tdiv_qr',
    435        'ret'   => 'void',
    436        'args'  => 'mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
    437        'speed' => 'none',
    438      },
    439 
    440      {
    441        'regexp'=> 'get_str',
    442        'ret'   => 'size_t',
    443        'args'  => 'unsigned char *str, int base, mp_ptr mptr, mp_size_t msize',
    444        'speed_flags' => 'FLAG_R_OPTIONAL',
    445        'try'   => 'none',
    446      },
    447      {
    448        'regexp'=> 'set_str',
    449        'ret'   => 'mp_size_t',
    450        'args'  => 'mp_ptr xp, const unsigned char *str, size_t str_len, int base',
    451        'speed_flags' => 'FLAG_R_OPTIONAL',
    452        'try'   => 'none',
    453      },
    454 
    455      {
    456        'regexp'=> 'fac_ui',
    457        'mpX'   => 'mpz',
    458        'ret'   => 'void',
    459        'args'  => 'mpz_ptr r, unsigned long n',
    460        'speed_flags' => 'FLAG_NODATA',
    461        'try'   => 'none',
    462      },
    463 
    464      {
    465        'regexp'=> 'fib2_ui',
    466        'ret'   => 'void',
    467        'args'  => 'mp_ptr fp, mp_ptr f1p, unsigned long n',
    468        'rename'=> ['__gmp_fib_table'],
    469        'speed_flags' => 'FLAG_NODATA',
    470        'try'   => 'none',
    471      },
    472      {
    473        'regexp'=> 'fib_ui',
    474        'mpX'   => 'mpz',
    475        'ret'   => 'void',
    476        'args'  => 'mpz_ptr fn, unsigned long n',
    477        'speed_flags' => 'FLAG_NODATA',
    478        'try'   => 'none',
    479      },
    480      {
    481        'regexp'=> 'fib2_ui',
    482        'mpX'   => 'mpz',
    483        'ret'   => 'void',
    484        'args'  => 'mpz_ptr fn, mpz_ptr fnsub1, unsigned long n',
    485        'speed_flags' => 'FLAG_NODATA',
    486        'try'   => 'none',
    487      },
    488 
    489      {
    490        'regexp'=> 'lucnum_ui',
    491        'mpX'   => 'mpz',
    492        'ret'   => 'void',
    493        'args'  => 'mpz_ptr ln, unsigned long n',
    494        'speed_flags' => 'FLAG_NODATA',
    495        'try'   => 'none',
    496      },
    497      {
    498        'regexp'=> 'lucnum2_ui',
    499        'mpX'   => 'mpz',
    500        'ret'   => 'void',
    501        'args'  => 'mpz_ptr ln, mpz_ptr lnsub1, unsigned long n',
    502        'speed_flags' => 'FLAG_NODATA',
    503        'try'   => 'none',
    504      },
    505 
    506      {
    507        'regexp'=> 'gcd_1',
    508        'ret'   => 'mp_limb_t',
    509        'args'  => 'mp_ptr xp, mp_size_t xsize, mp_limb_t y',
    510        'speed_flags'=> 'FLAG_R_OPTIONAL',
    511        'speed_suffixes' => ['N'],
    512      },
    513      {
    514        'regexp'=> '(gcd)(?!(_1|ext|_finda))',
    515        'ret'   => 'mp_size_t',
    516        'args'  => 'mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize',
    517      },
    518      {
    519        'regexp'=> 'gcd_finda',
    520        'ret'   => 'mp_limb_t',
    521        'args'  => 'mp_srcptr cp',
    522      },
    523 
    524 
    525      {
    526        'regexp'=> 'jacobi',
    527        'funs'  => ['jacobi', 'legendre', 'kronecker'],
    528        'mpX'   => 'mpz',
    529        'ret'   => 'int',
    530        'args'  => 'mpz_srcptr a, mpz_srcptr b',
    531        'try-legendre' => 'TYPE_MPZ_JACOBI',
    532      },
    533      {
    534        'regexp'=> 'jacbase',
    535        'funs'  => ['jacobi_base'],
    536        'ret'   => 'mp_limb_t',
    537        'args'  => 'mp_limb_t a, mp_limb_t b, int bit1',
    538        'speed' => 'SPEED_ROUTINE_MPN_JACBASE',
    539        'try'   => 'none',
    540      },
    541 
    542      {
    543        'regexp'=> 'logops_n',
    544        'mulfunc'=> ['and_n','andn_n','nand_n','ior_n','iorn_n','nior_n','xor_n','xnor_n'],
    545        'ret'   => 'void',
    546        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    547        'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
    548      },
    549 
    550      {
    551        'regexp'=> '[lr]shift',
    552        'ret'   => 'mp_limb_t',
    553        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift',
    554        'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
    555        'speed_flags'=> 'FLAG_R',
    556      },
    557 
    558      # mpn_preinv_mod_1 is an optional extra entrypoint
    559      {
    560        'regexp'=> '(mod_1)(?!_rs)',
    561        'funs'  => ['mod_1','preinv_mod_1'],
    562        'ret'   => 'mp_limb_t',
    563        'args_mod_1'       => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor',
    564        'args_preinv_mod_1'=> 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
    565        'speed_flags'=> 'FLAG_R',
    566      },
    567      {
    568        'regexp'=> 'pre_mod_1',
    569        'funs'  => ['preinv_mod_1'],
    570        'ret'   => 'mp_limb_t',
    571        'args'  => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
    572        'speed_flags'=> 'FLAG_R',
    573      },
    574      {
    575        'regexp'=> 'mod_34lsub1',
    576        'ret'   => 'mp_limb_t',
    577        'args'  => 'mp_srcptr src, mp_size_t len',
    578      },
    579      {
    580        'regexp'=> 'invert_limb',
    581        'ret'   => 'mp_limb_t',
    582        'args'  => 'mp_limb_t divisor',
    583        'speed_flags'=> 'FLAG_R_OPTIONAL',
    584        'try'   => 'none',
    585      },
    586 
    587      {
    588        # not for use with hppa reversed argument versions of mpn_umul_ppmm
    589        'regexp'=> 'udiv',
    590        'funs'  => ['udiv_qrnnd','udiv_qrnnd_r'],
    591        'ret'   => 'mp_limb_t',
    592        'args_udiv_qrnnd'   => 'mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t',
    593        'args_udiv_qrnnd_r' => 'mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *',
    594        'speed' => 'none',
    595        'try-minsize' => 2,
    596      },
    597 
    598      {
    599        'regexp'=> 'mode1o',
    600        'funs'  => ['modexact_1_odd'],
    601        'ret'   => 'mp_limb_t',
    602        'args'  => 'mp_srcptr src, mp_size_t size, mp_limb_t divisor',
    603        'speed_flags'=> 'FLAG_R',
    604      },
    605      {
    606        'regexp'=> 'modlinv',
    607        'funs'  => ['modlimb_invert'],
    608        'ret'   => 'mp_limb_t',
    609        'args'  => 'mp_limb_t v',
    610        'carrys'=> [''],
    611        'try'   => 'none',
    612      },
    613 
    614      {
    615        'regexp'=> 'mul_1',
    616        'ret'   => 'mp_limb_t',
    617        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
    618        'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
    619        'speed_flags'=> 'FLAG_R',
    620      },
    621      {
    622        'regexp'=> 'mul_2',
    623        'ret'   => 'mp_limb_t',
    624        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr mult',
    625        'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
    626        'speed_flags'=> 'FLAG_R',
    627      },
    628 
    629      {
    630        'regexp'=> 'mul_basecase',
    631        'ret'   => 'void',
    632        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize',
    633        'speed_flags' => 'FLAG_R_OPTIONAL | FLAG_RSIZE',
    634      },
    635      {
    636        'regexp'=> '(mul_n)[_.]',
    637        'ret'   => 'void',
    638        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    639        'rename'=> ['kara_mul_n','kara_sqr_n','toom3_mul_n','toom3_sqr_n'],
    640      },
    641      {
    642        'regexp'=> 'umul',
    643        'funs'  => ['umul_ppmm','umul_ppmm_r'],
    644        'ret'   => 'mp_limb_t',
    645        'args_umul_ppmm'   => 'mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2',
    646        'args_umul_ppmm_r' => 'mp_limb_t m1, mp_limb_t m2, mp_limb_t *lowptr',
    647        'speed' => 'none',
    648        'try-minsize' => 3,
    649      },
    650 
    651 
    652      {
    653        'regexp'=> 'popham',
    654        'mulfunc'=> ['popcount','hamdist'],
    655        'ret'   => 'unsigned long',
    656        'args_popcount'=> 'mp_srcptr xp, mp_size_t size',
    657        'args_hamdist' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    658      },
    659      {
    660        'regexp'=> 'popcount',
    661        'ret'   => 'unsigned long',
    662        'args'  => 'mp_srcptr xp, mp_size_t size',
    663      },
    664      {
    665        'regexp'=> 'hamdist',
    666        'ret'   => 'unsigned long',
    667        'args'  => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
    668        # extra renaming to support sharing a data table with mpn_popcount
    669        'rename'=> ['popcount'],
    670      },
    671 
    672      {
    673        'regexp'=> 'sqr_basecase',
    674        'ret'   => 'void',
    675        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
    676        'speed' => 'SPEED_ROUTINE_MPN_SQR',
    677        'try'   => 'TYPE_SQR',
    678      },
    679      {
    680        'regexp'=> 'sqr_diagonal',
    681        'ret'   => 'void',
    682        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
    683        'try'   => 'none',
    684      },
    685 
    686      {
    687        'regexp'=> 'sqrtrem',
    688        'ret'   => 'mp_size_t',
    689        'args'  => 'mp_ptr root, mp_ptr rem, mp_srcptr src, mp_size_t size',
    690        'try'   => 'none',
    691      },
    692 
    693      {
    694        'regexp'=> 'cntlz',
    695        'funs'  => ['count_leading_zeros'],
    696        'ret'   => 'unsigned',
    697        'args'  => 'mp_limb_t',
    698        'macro-before' => "#undef COUNT_LEADING_ZEROS_0",
    699        'macro-speed'  =>
    700 '#ifdef COUNT_LEADING_ZEROS_0
    701 #define COUNT_LEADING_ZEROS_0_ALLOWED   1
    702 #else
    703 #define COUNT_LEADING_ZEROS_0_ALLOWED   0
    704 #endif
    705   SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
    706   $fun (c, n);
    707   SPEED_ROUTINE_COUNT_ZEROS_B ()',
    708        'speed_flags'=> 'FLAG_R_OPTIONAL',
    709        'try'   => 'none',
    710      },
    711      {
    712        'regexp'=> 'cnttz',
    713        'funs'  => ['count_trailing_zeros'],
    714        'ret'   => 'unsigned',
    715        'args'  => 'mp_limb_t',
    716        'macro-speed' => '
    717   SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
    718   $fun (c, n);
    719   SPEED_ROUTINE_COUNT_ZEROS_B ()',
    720        'speed_flags' => 'FLAG_R_OPTIONAL',
    721        'try'   => 'none',
    722      },
    723 
    724      {
    725        'regexp'=> 'zero',
    726        'ret'   => 'void',
    727        'args'  => 'mp_ptr ptr, mp_size_t size',
    728      },
    729 
    730      {
    731        'regexp'=> '(powm)(?!_ui)',
    732        'mpX'   => 'mpz',
    733        'ret'   => 'void',
    734        'args'  => 'mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m',
    735        'try'   => 'none',
    736      },
    737      {
    738        'regexp'=> 'powm_ui',
    739        'mpX'   => 'mpz',
    740        'ret'   => 'void',
    741        'args'  => 'mpz_ptr r, mpz_srcptr b, unsigned long e, mpz_srcptr m',
    742        'try'   => 'none',
    743      },
    744 
    745      # special for use during development
    746      {
    747        'regexp'=> 'back',
    748        'funs'  => ['back_to_back'],
    749        'ret'   => 'void',
    750        'args'  => 'void',
    751        'pic'   => 'no',
    752        'try'   => 'none',
    753        'speed_flags'=> 'FLAG_NODATA',
    754      },
    755      );
    756 
    757 if (defined $ENV{table2}) {
    758   my @newtable = @{$ENV{table2}};
    759   push @newtable, @table;
    760   @table = @newtable;
    761 }
    762 
    763 
    764 my %pictable =
    765     (
    766      'yes' => {
    767        'suffix' =>  '_pic',
    768        'asmflags'=> '$(ASMFLAGS_PIC)',
    769        'cflags' =>  '$(CFLAGS_PIC)',
    770      },
    771      'no' => {
    772        'suffix' =>  '',
    773        'asmflags'=> '',
    774        'cflags' =>  '',
    775      },
    776      );
    777 
    778 
    779 my $builddir = $ENV{builddir};
    780 $builddir = "." if (! defined $builddir);
    781 
    782 my $top_builddir = "${builddir}/..";
    783 
    784 
    785 open(MAKEFILE, "<${builddir}/Makefile")
    786   or die "Cannot open ${builddir}/Makefile: $!\n"
    787        . "Is this a tune build directory?";
    788 my ($srcdir, $top_srcdir);
    789 while (<MAKEFILE>) {
    790   if (/^srcdir = (.*)/) {     $srcdir = $1;     }
    791   if (/^top_srcdir = (.*)/) { $top_srcdir = $1; }
    792 }
    793 die "Cannot find \$srcdir in Makefile\n" if (! defined $srcdir);
    794 die "Cannot find \$top_srcdir in Makefile\n" if (! defined $top_srcdir);
    795 print "srcdir $srcdir\n" if $opt{'t'};
    796 print "top_srcdir $top_srcdir\n" if $opt{'t'};
    797 close(MAKEFILE);
    798 
    799 
    800 open(SPEED, ">speed-many.c") or die;
    801 print SPEED
    802 "/* speed-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
    803 
    804 ";
    805 my $SPEED_EXTRA_ROUTINES = "#define SPEED_EXTRA_ROUTINES \\\n";
    806 my $SPEED_EXTRA_PROTOS = "#define SPEED_EXTRA_PROTOS \\\n";
    807 my $SPEED_CODE = "";
    808 
    809 open(TRY, ">try-many.c") or die;
    810 print TRY
    811     "/* try-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n" .
    812     "\n";
    813 my $TRY_EXTRA_ROUTINES = "#define EXTRA_ROUTINES \\\n";
    814 my $TRY_EXTRA_PROTOS = "#define EXTRA_PROTOS \\\n";
    815 
    816 open(FD,"<${top_builddir}/libtool") or die "Cannot open \"${top_builddir}/libtool\": $!\n";
    817 my $pic_flag;
    818 while (<FD>) {
    819   if (/^pic_flag="?([^"]*)"?$/) {
    820     $pic_flag=$1;
    821     last;
    822   }
    823 }
    824 close FD;
    825 if (! defined $pic_flag) {
    826   die "Cannot find pic_flag in ${top_builddir}/libtool";
    827 }
    828 
    829 my $CFLAGS_PIC = $pic_flag;
    830 
    831 my $ASMFLAGS_PIC = "";
    832 foreach (split /[ \t]/, $pic_flag) {
    833   if (/^-D/) {
    834     $ASMFLAGS_PIC .= " " . $_;
    835   }
    836 }
    837 
    838 open(MAKEFILE, ">Makefile.many") or die;
    839 print MAKEFILE
    840     "# Makefile.many generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST\n" .
    841     "\n" .
    842     "all: speed-many try-many\n" .
    843     "\n" .
    844     "#--------- begin included copy of basic Makefile ----------\n" .
    845     "\n";
    846 open(FD,"<${builddir}/Makefile") or die "Cannot open \"${builddir}/Makefile\": $!\n";
    847 print MAKEFILE <FD>;
    848 close FD;
    849 print MAKEFILE
    850     "\n" .
    851     "#--------- end included copy of basic Makefile ----------\n" .
    852     "\n" .
    853     "CFLAGS_PIC = $CFLAGS_PIC\n" .
    854     "ASMFLAGS_PIC = $ASMFLAGS_PIC\n" .
    855     "\n";
    856 
    857 my $CLEAN="";
    858 my $MANY_OBJS="";
    859 
    860 
    861 sub print_ansi2knr {
    862   my ($base,$file,$includes) = @_;
    863   if (! defined $file)     { $file = "$base.c"; }
    864   if (! defined $includes) { $includes = ""; }
    865 
    866   print MAKEFILE <<EOF;
    867 ${base}_.c: $file \$(ANSI2KNR)
    868 	\$(CPP) \$(DEFS) \$(INCLUDES) $includes \$(AM_CPPFLAGS) \$(CPPFLAGS) $file | sed 's/^# \([0-9]\)/#line \\1/' | \$(ANSI2KNR) >${base}_.c
    869 
    870 EOF
    871 }
    872 
    873 
    874 # Spawning a glob is a touch slow when there's lots of files.
    875 my @files = ();
    876 foreach my $dir (@DIRECTORIES) {
    877   print "dir $dir\n" if $opt{'t'};
    878   if (-f $dir) {
    879     push @files,$dir;
    880   } else {
    881     if (! opendir DD,$dir) {
    882       print "Cannot open $dir: $!\n";
    883     } else {
    884       push @files, map {$_="$dir/$_"} grep /\.(c|asm|S|h)$/, readdir DD;
    885       closedir DD;
    886     }
    887   }
    888 }
    889 @files = sort @files;
    890 print "@files ",join(" ",@files),"\n" if $opt{'t'};
    891 
    892 my $count_files = 0;
    893 my $count_functions = 0;
    894 my %seen_obj;
    895 my %seen_file;
    896 
    897 foreach my $file_full (@files) {
    898   if (! -f $file_full) {
    899     print "Not a file: $file_full\n";
    900     next;
    901   }
    902   if (defined $seen_file{$file_full}) {
    903     print "Skipping duplicate file: $file_full\n";
    904     next;
    905   }
    906   $seen_file{$file_full} = 1;
    907 
    908   my ($FILE,$path,$lang) = fileparse($file_full,"\.[a-zA-Z]+");
    909   $path =~ s/\/$//;
    910   print "file $FILE path $path lang $lang\n" if $opt{'t'};
    911 
    912   my @pic_choices;
    913   if ($lang eq '.asm')  { @pic_choices=('no','yes'); }
    914   elsif ($lang eq '.c') { @pic_choices=('no'); }
    915   elsif ($lang eq '.S') { @pic_choices=('no','yes'); }
    916   elsif ($lang eq '.h') { @pic_choices=('no'); }
    917   else { next };
    918 
    919   my ($t, $file_match);
    920   foreach my $p (@table) {
    921     # print " ",$p->{'regexp'},"\n" if $opt{'t'};
    922     if ($FILE =~ "^($p->{'regexp'})") {
    923       $t = $p;
    924       $file_match = $1;
    925       $file_match = $2 if defined $2;
    926       last;
    927     }
    928   }
    929   next if ! defined $t;
    930   print "match $t->{'regexp'} $FILE ($file_full)\n" if $opt{'t'};
    931 
    932   if (! open FD,"<$file_full") { print "Can't open $file_full: $!\n"; next }
    933   my @file_contents = <FD>;
    934   close FD;
    935 
    936   my $objs;
    937   if (defined $t->{'mulfunc'}) { $objs = $t->{'mulfunc'}; }
    938   else                         { $objs = [$file_match]; }
    939   print "objs @$objs\n" if $opt{'t'};
    940 
    941   my $ret = $t->{'ret'};
    942   if (! defined $ret && $lang eq '.h') { $ret = ''; }
    943   if (! defined $ret) { die "$FILE return type not defined\n" };
    944   print "ret $ret\n" if $opt{'t'};
    945 
    946   my $mpX = $t->{'mpX'};
    947   if (! defined $mpX) { $mpX = ($lang eq '.h' ? '' : 'mpn'); }
    948   $mpX = "${mpX}_" if $mpX ne '';
    949   print "mpX $mpX\n" if $opt{'t'};
    950 
    951   my $carrys;
    952   if (defined $t->{'carrys'}) { $carrys = $t->{'carrys'}; }
    953   else                        { $carrys = ['','c'];       }
    954   print "carrys $carrys @$carrys\n" if $opt{'t'};
    955 
    956   # some restriction functions are implemented, but they're not very useful
    957   my $restriction='';
    958 
    959   my $suffix;
    960   if ($FILE =~ ("${file_match}_(.+)")) {
    961     $suffix = $1;
    962   } elsif ($path =~ /\/mp[zn]\/(.*)$/) {
    963     # derive the suffix from the path
    964     $suffix = $1;
    965     $suffix =~ s/\//_/g;
    966     # use last directory name, or if there's 3 or more then the last two
    967     if ($suffix =~ /([^_]*_)+([^_]+_[^_]+)$/) {
    968       $suffix = $2;
    969     } elsif ($suffix =~ /([^_]*_)*([^_]+)$/) {
    970       $suffix = $2;
    971     }
    972   } else {
    973     die "Can't determine suffix for: $file_full (path $path)\n";
    974   }
    975   print "suffix $suffix\n" if $opt{'t'};
    976 
    977   $count_files++;
    978 
    979   foreach my $obj (@{$objs}) {
    980     print "obj $obj\n" if $opt{'t'};
    981 
    982     my $obj_with_suffix = "${obj}_$suffix";
    983     if (defined $seen_obj{$obj_with_suffix}) {
    984       print "Skipping duplicate object: $obj_with_suffix\n";
    985       print "   first from: $seen_obj{$obj_with_suffix}\n";
    986       print "   now from:   $file_full\n";
    987       next;
    988     }
    989     $seen_obj{$obj_with_suffix} = $file_full;
    990 
    991     my $funs = $t->{'funs'};
    992     $funs = [$obj] if ! defined $funs;
    993     print "funs @$funs\n" if $opt{'t'};
    994 
    995     if (defined $t->{'pic'}) { @pic_choices = ('no'); }
    996 
    997     foreach my $pic (map {$pictable{$_}} @pic_choices) {
    998       print "pic $pic->{'suffix'}\n" if $opt{'t'};
    999 
   1000       my $objbase = "${obj}_$suffix$pic->{'suffix'}";
   1001       print "objbase $objbase\n" if $opt{'t'};
   1002 
   1003       if ($path !~ "." && -f "${objbase}.c") {
   1004 	die "Already have ${objbase}.c";
   1005       }
   1006 
   1007       my $tmp_file = "tmp-$objbase.c";
   1008 
   1009       my $renaming;
   1010       foreach my $fun (@{$funs}) {
   1011         if ($mpX eq 'mpn_' && $lang eq '.c') {
   1012           $renaming .= "\t\t-DHAVE_NATIVE_mpn_$fun=1 \\\n";
   1013         }
   1014 
   1015         # The carry-in variant is with a "c" appended, unless there's a "_1"
   1016         # somewhere, eg. "modexact_1_odd", in which case that becomes "_1c".
   1017 	my $fun_carry = $fun;
   1018 	if (! ($fun_carry =~ s/_1/_1c/)) { $fun_carry = "${fun}c"; }
   1019 
   1020 	$renaming .=
   1021 	    "\t\t-D__g$mpX$fun=$mpX${fun}_$suffix$pic->{'suffix'} \\\n" .
   1022 	    "\t\t-D__g$mpX$fun_carry=$mpX${fun_carry}_$suffix$pic->{'suffix'} \\\n";
   1023       }
   1024       foreach my $r (@{$t->{'rename'}}) {
   1025 	if ($r =~ /^__gmp/) {
   1026 	  $renaming .= "\\\n" .
   1027 	      "\t\t-D$r=${r}_$suffix$pic->{'suffix'}";
   1028 	} else {
   1029 	  $renaming .= "\\\n" .
   1030 	      "\t\t-D__g$mpX$r=$mpX${r}_$suffix$pic->{'suffix'}";
   1031 	}
   1032       }
   1033       print "renaming $renaming\n" if $opt{'t'};
   1034 
   1035       print MAKEFILE "\n";
   1036       if ($lang eq '.asm') {
   1037 	print MAKEFILE
   1038 	    "$objbase.o: $file_full \$(ASM_HEADERS)\n" .
   1039 	    "	\$(M4) \$(M4FLAGS) -DOPERATION_$obj $pic->{'asmflags'} \\\n" .
   1040   	    "$renaming" .
   1041 	    "		$file_full >tmp-$objbase.s\n" .
   1042             "	\$(CCAS) \$(COMPILE_FLAGS) $pic->{'cflags'} tmp-$objbase.s -o $objbase.o\n" .
   1043             "	\$(RM_TMP) tmp-$objbase.s\n";
   1044 	$MANY_OBJS .= " $objbase.o";
   1045 
   1046       } elsif ($lang eq '.c') {
   1047 	print MAKEFILE
   1048 	    "$objbase.o: $file_full\n" .
   1049 	    "	\$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
   1050   	    "$renaming" .
   1051 	    "		-c $file_full -o $objbase.o\n";
   1052 	print_ansi2knr($objbase,
   1053 		       $file_full,
   1054 		       " -DOPERATION_$obj\\\n$renaming\t\t");
   1055 	$MANY_OBJS .= " $objbase\$U.o";
   1056 
   1057       } elsif ($lang eq '.S') {
   1058 	print MAKEFILE
   1059 	    "$objbase.o: $file_full\n" .
   1060             "	\$(COMPILE) -g $pic->{'asmflags'} \\\n" .
   1061   	    "$renaming" .
   1062             "	-c $file_full -o $objbase.o\n";
   1063 	$MANY_OBJS .= " $objbase.o";
   1064 
   1065       } elsif ($lang eq '.h') {
   1066 	print MAKEFILE
   1067 	    "$objbase.o: tmp-$objbase.c $file_full\n" .
   1068 	    "	\$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
   1069   	    "$renaming" .
   1070 	    "		-c tmp-$objbase.c -o $objbase.o\n";
   1071 	print_ansi2knr($objbase,
   1072 		       "tmp-$objbase.c",
   1073 		       " -DOPERATION_$obj\\\n$renaming\t\t");
   1074 	$MANY_OBJS .= " $objbase\$U.o";
   1075 
   1076         $CLEAN .= " tmp-$objbase.c";
   1077 	open(TMP_C,">tmp-$objbase.c")
   1078 	    or die "Can't create tmp-$objbase.c: $!\n";
   1079 	print TMP_C
   1080 "/* tmp-$objbase.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
   1081 
   1082 #include \"gmp.h\"
   1083 #include \"gmp-impl.h\"
   1084 #include \"longlong.h\"
   1085 #include \"speed.h\"
   1086 
   1087 ";
   1088       }
   1089 
   1090       my $tests_program = "$top_srcdir/tests/devel/$obj.c";
   1091       if (-f $tests_program) {
   1092 	$tests_program = "\$(top_srcdir)/tests/devel/$obj.c";
   1093 	print_ansi2knr("tests_${objbase}",
   1094 		       $tests_program,
   1095 		       "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
   1096 	print_ansi2knr("tests_${objbase}_sp",
   1097 		       $tests_program,
   1098 		       "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
   1099 
   1100 	print MAKEFILE <<EOF;
   1101 tests_$objbase.o: $tests_program
   1102 	\$(COMPILE) \$(CFLAGS_TESTS) \\
   1103 $renaming		-c $tests_program -o tests_$objbase.o
   1104 
   1105 tests_$objbase: $objbase\$U.o tests_$objbase\$U.o ../libgmp.la
   1106 	\$(LINK) tests_$objbase\$U.o $objbase\$U.o ../libgmp.la -o tests_$objbase
   1107 
   1108 tests_${objbase}_sp.o: $tests_program
   1109 	\$(COMPILE) \$(CFLAGS_TESTS_SP) \\
   1110 $renaming		-c $tests_program -o tests_${objbase}_sp.o
   1111 
   1112 tests_${objbase}_sp: $objbase\$U.o tests_${objbase}_sp\$U.o ../libgmp.la
   1113 	\$(LINK) tests_${objbase}_sp\$U.o $objbase\$U.o ../libgmp.la -o tests_${objbase}_sp
   1114 
   1115 EOF
   1116         $CLEAN .= " tests_$objbase tests_${objbase}_sp";
   1117       }
   1118 
   1119       foreach my $fun (@{$funs}) {
   1120 	print "fun $fun\n" if $opt{'t'};
   1121 
   1122 	if ($lang eq '.h') {
   1123           my $macro_before = $t->{'macro_before'};
   1124           $macro_before = "" if ! defined $macro_before;
   1125 	  print TMP_C
   1126 "$macro_before
   1127 #undef $fun
   1128 #include \"$file_full\"
   1129 
   1130 ";
   1131 	}
   1132 
   1133 	my $args = $t->{"args_$fun"};
   1134 	if (! defined $args) { $args = $t->{'args'}; }
   1135 	if (! defined $args) { die "Need args for $fun\n"; }
   1136 	print "args $args\n" if $opt{'t'};
   1137 
   1138 	foreach my $carry (@$carrys) {
   1139 	  print "carry $carry\n" if $opt{'t'};
   1140 
   1141 	  my $fun_carry = $fun;
   1142 	  if (! ($fun_carry =~ s/_1/_1$carry/)) { $fun_carry = "$fun$carry"; }
   1143           print "fun_carry $fun_carry\n" if $opt{'t'};
   1144 
   1145 	  if ($lang =~ /\.(asm|S)/
   1146 	      && ! grep(m"PROLOGUE\((.* )?$mpX$fun_carry[ ,)]",@file_contents)) {
   1147 	    print "no PROLOGUE $mpX$fun_carry\n" if $opt{'t'};
   1148 	    next;
   1149 	  }
   1150 	  if ($lang eq '.c'
   1151 	      && ! grep(m"^(#define FUNCTION\s+)?$mpX$fun_carry\W", @file_contents)) {
   1152 	    print "no mention of $mpX$fun_carry\n" if $opt{'t'};
   1153 	    next;
   1154 	  }
   1155 	  if ($lang eq '.h'
   1156 	      && ! grep(m"^#define $fun_carry\W", @file_contents)) {
   1157 	    print "no mention of #define $fun_carry\n" if $opt{'t'};
   1158 	    next;
   1159 	  }
   1160 
   1161 	  $count_functions++;
   1162 
   1163 	  my $carryarg;
   1164 	  if (defined $t->{'carryarg'}) { $carryarg = $t->{'carryarg'}; }
   1165 	  if ($carry eq '')             { $carryarg = ''; }
   1166 	  else                          { $carryarg = ', mp_limb_t carry'; }
   1167 	  print "carryarg $carryarg\n" if $opt{'t'};
   1168 
   1169 	  my $funfull="$mpX${fun_carry}_$suffix$pic->{'suffix'}";
   1170 	  print "funfull $funfull\n" if $opt{'t'};
   1171 
   1172 	  if ($lang ne '.h') {
   1173 	    my $proto = "$t->{'ret'} $funfull _PROTO (($args$carryarg)); \\\n";
   1174 	    $SPEED_EXTRA_PROTOS .= $proto;
   1175 	    $TRY_EXTRA_PROTOS .= $proto;
   1176 	  }
   1177 
   1178 	  my $try_type = $t->{"try-$fun"};
   1179 	  $try_type = $t->{'try'} if ! defined $try_type;
   1180 	  if (! defined $try_type) {
   1181 	    if ($mpX eq 'mpn_') {
   1182 	      $try_type = "TYPE_\U$fun_carry";
   1183 	    } else {
   1184 	      $try_type = "TYPE_\U$mpX\U$fun_carry";
   1185 	    }
   1186 	  }
   1187 	  print "try_type $try_type\n" if $opt{'t'};
   1188 
   1189 	  my $try_minsize = $t->{'try-minsize'};
   1190 	  if (defined $try_minsize) {
   1191 	    $try_minsize = ", " . $try_minsize;
   1192 	  } else {
   1193 	    $try_minsize = "";
   1194 	  }
   1195 	  print "try_minsize $try_minsize\n" if $opt{'t'};
   1196 
   1197 	  if ($try_type ne 'none') {
   1198 	    $TRY_EXTRA_ROUTINES .=
   1199 		"  { TRY($mpX${fun_carry}_$suffix$pic->{'suffix'}), $try_type$try_minsize }, \\\n";
   1200 	  }
   1201 
   1202 	  my $speed_flags = $t->{'speed_flags'};
   1203 	  $speed_flags = '0' if ! defined $speed_flags;
   1204 	  print "speed_flags $speed_flags\n" if $opt{'t'};
   1205 
   1206 	  my $speed_routine = $t->{'speed'};
   1207 	  $speed_routine = "SPEED_ROUTINE_\U$mpX\U$fun"
   1208 	      if !defined $speed_routine;
   1209 	  if (! ($speed_routine =~ s/_1/_1\U$carry/)) {
   1210 	    $speed_routine = "$speed_routine\U$carry";
   1211 	  }
   1212 	  print "speed_routine $speed_routine\n" if $opt{'t'};
   1213 
   1214 	  my @speed_suffixes = ();
   1215 	  push (@speed_suffixes, '') if $speed_routine ne 'none';
   1216 	  push (@speed_suffixes, @{$t->{'speed_suffixes'}})
   1217 	      if defined $t->{'speed_suffixes'};
   1218 
   1219           my $macro_speed = $t->{'macro-speed'};
   1220           $macro_speed = "$speed_routine ($fun_carry)" if ! defined $macro_speed;
   1221           $macro_speed =~ s/\$fun/$fun_carry/g;
   1222 
   1223 	  foreach my $S (@speed_suffixes) {
   1224 	    my $Sfunfull="$mpX${fun_carry}${S}_$suffix$pic->{'suffix'}";
   1225 
   1226 	    $SPEED_EXTRA_PROTOS .=
   1227 	      "double speed_$Sfunfull _PROTO ((struct speed_params *s)); \\\n";
   1228 	    $SPEED_EXTRA_ROUTINES .=
   1229 	      "  { \"$Sfunfull\", speed_$Sfunfull, $speed_flags }, \\\n";
   1230 	    if ($lang eq '.h') {
   1231               print TMP_C
   1232 "double
   1233 speed_$Sfunfull (struct speed_params *s)
   1234 {
   1235 $macro_speed
   1236 }
   1237 
   1238 ";
   1239             } else {
   1240 	      $SPEED_CODE .=
   1241 	        "double\n" .
   1242 	        "speed_$Sfunfull (struct speed_params *s)\n" .
   1243                 "{\n" .
   1244                 "$restriction" .
   1245 	        "  $speed_routine\U$S\E ($funfull)\n" .
   1246                 "}\n";
   1247             }
   1248 	  }
   1249 	}
   1250       }
   1251     }
   1252   }
   1253 }
   1254 
   1255 
   1256 print SPEED $SPEED_EXTRA_PROTOS . "\n";
   1257 print SPEED $SPEED_EXTRA_ROUTINES . "\n";
   1258 if (defined $ENV{speedinc}) { print SPEED $ENV{speedinc} . "\n"; }
   1259 print SPEED
   1260     "#include \"speed.c\"\n" .
   1261     "\n";
   1262 print SPEED $SPEED_CODE;
   1263 
   1264 print TRY $TRY_EXTRA_ROUTINES . "\n";
   1265 print TRY $TRY_EXTRA_PROTOS . "\n";
   1266 my $tryinc = "";
   1267 if (defined $ENV{tryinc}) {
   1268   $tryinc = $ENV{tryinc};
   1269   print TRY "#include \"$tryinc\"\n";
   1270 }
   1271 print "tryinc $tryinc\n" if $opt{'t'};
   1272 print TRY
   1273     "#include \"try.c\"\n" .
   1274     "\n";
   1275 
   1276 my $extra_libraries = "";
   1277 if (defined $ENV{extra_libraries}) { $extra_libraries = $ENV{extra_libraries};}
   1278 
   1279 my $trydeps = "";
   1280 if (defined $ENV{trydeps}) { $trydeps = $ENV{trydeps}; }
   1281 $trydeps .= " $tryinc";
   1282 print "trydeps $trydeps\n" if $opt{'t'};
   1283 
   1284 print MAKEFILE <<EOF;
   1285 
   1286 MANY_OBJS = $MANY_OBJS
   1287 MANY_CLEAN = \$(MANY_OBJS) \\
   1288 	speed-many.c speed-many\$U.o speed-many\$(EXEEXT) \\
   1289 	try-many.c try-many\$U.o try-many \\
   1290 	$CLEAN
   1291 MANY_DISTCLEAN = Makefile.many
   1292 
   1293 speed-many: \$(MANY_OBJS) speed-many\$U.o libspeed.la $extra_libraries
   1294 	\$(LINK) \$(LDFLAGS) speed-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries
   1295 
   1296 try-many: \$(MANY_OBJS) try-many\$U.o libspeed.la $extra_libraries
   1297 	\$(LINK) \$(LDFLAGS) try-many\$U.o \$(MANY_OBJS)  \$(LDADD) \$(LIBS) $extra_libraries
   1298 
   1299 try-many.o: try-many.c \$(top_srcdir)/tests/devel/try.c $trydeps
   1300 	\$(COMPILE) -I\$(top_srcdir)/tests/devel -c try-many.c
   1301 
   1302 EOF
   1303 
   1304 print_ansi2knr("speed-many");
   1305 print_ansi2knr("try-many",
   1306 	       "\$(top_srcdir)/tests/devel/try.c",
   1307 	       "-I\$(top_srcdir)/tests/devel");
   1308 
   1309 print MAKEFILE <<EOF;
   1310 RM_TMP = rm -f
   1311 CFLAGS_TESTS = -DSIZE=50 -DTIMES=1 -DRANDOM -DCLOCK=333000000
   1312 CFLAGS_TESTS_SP = -DSIZE=1024 -DNOCHECK -DOPS=200000000 -DCLOCK=333000000
   1313 EOF
   1314 
   1315 close MAKEFILE or die;
   1316 
   1317 print "Total $count_files files, $count_functions functions\n";
   1318 
   1319 
   1320 
   1321 # Local variables:
   1322 # perl-indent-level: 2
   1323 # End:
   1324