Home | History | Annotate | Line # | Download | only in tune
tuneup.c revision 1.1.1.1
      1  1.1  mrg /* Tune various threshold of MPFR
      2  1.1  mrg 
      3  1.1  mrg Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
      4  1.1  mrg Contributed by the AriC and Caramel projects, INRIA.
      5  1.1  mrg 
      6  1.1  mrg This file is part of the GNU MPFR Library.
      7  1.1  mrg 
      8  1.1  mrg The GNU MPFR Library is free software; you can redistribute it and/or modify
      9  1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     10  1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     11  1.1  mrg option) any later version.
     12  1.1  mrg 
     13  1.1  mrg The GNU MPFR Library is distributed in the hope that it will be useful, but
     14  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15  1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16  1.1  mrg License for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU Lesser General Public License
     19  1.1  mrg along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20  1.1  mrg http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21  1.1  mrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22  1.1  mrg 
     23  1.1  mrg #include <stdlib.h>
     24  1.1  mrg #include <time.h>
     25  1.1  mrg 
     26  1.1  mrg #define MPFR_NEED_LONGLONG_H
     27  1.1  mrg #include "mpfr-impl.h"
     28  1.1  mrg 
     29  1.1  mrg #undef _PROTO
     30  1.1  mrg #define _PROTO __GMP_PROTO
     31  1.1  mrg #include "speed.h"
     32  1.1  mrg 
     33  1.1  mrg int verbose;
     34  1.1  mrg 
     35  1.1  mrg /* template for an unary function */
     36  1.1  mrg /* s->size: precision of both input and output
     37  1.1  mrg    s->xp  : Mantissa of first input
     38  1.1  mrg    s->yp  : mantissa of second input                    */
     39  1.1  mrg #define SPEED_MPFR_FUNC(mean_fun)                       \
     40  1.1  mrg   do                                                    \
     41  1.1  mrg     {                                                   \
     42  1.1  mrg       unsigned  i;                                      \
     43  1.1  mrg       mpfr_limb_ptr wp;                                 \
     44  1.1  mrg       double    t;                                      \
     45  1.1  mrg       mpfr_t    w, x;                                   \
     46  1.1  mrg       mp_size_t size;                                   \
     47  1.1  mrg       MPFR_TMP_DECL (marker);                           \
     48  1.1  mrg                                                         \
     49  1.1  mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     50  1.1  mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     51  1.1  mrg       MPFR_TMP_MARK (marker);                           \
     52  1.1  mrg                                                         \
     53  1.1  mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     54  1.1  mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     55  1.1  mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     56  1.1  mrg       MPFR_SET_EXP (x, 0);                              \
     57  1.1  mrg                                                         \
     58  1.1  mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
     59  1.1  mrg                                                         \
     60  1.1  mrg       speed_operand_src (s, s->xp, size);               \
     61  1.1  mrg       speed_operand_dst (s, wp, size);                  \
     62  1.1  mrg       speed_cache_fill (s);                             \
     63  1.1  mrg                                                         \
     64  1.1  mrg       speed_starttime ();                               \
     65  1.1  mrg       i = s->reps;                                      \
     66  1.1  mrg       do                                                \
     67  1.1  mrg         mean_fun (w, x, MPFR_RNDN);                     \
     68  1.1  mrg       while (--i != 0);                                 \
     69  1.1  mrg       t = speed_endtime ();                             \
     70  1.1  mrg                                                         \
     71  1.1  mrg       MPFR_TMP_FREE (marker);                           \
     72  1.1  mrg       return t;                                         \
     73  1.1  mrg     }                                                   \
     74  1.1  mrg   while (0)
     75  1.1  mrg 
     76  1.1  mrg /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
     77  1.1  mrg #define SPEED_MPFR_FUNC2(mean_fun)                      \
     78  1.1  mrg   do                                                    \
     79  1.1  mrg     {                                                   \
     80  1.1  mrg       unsigned  i;                                      \
     81  1.1  mrg       mpfr_limb_ptr vp, wp;                             \
     82  1.1  mrg       double    t;                                      \
     83  1.1  mrg       mpfr_t    v, w, x;                                \
     84  1.1  mrg       mp_size_t size;                                   \
     85  1.1  mrg       MPFR_TMP_DECL (marker);                           \
     86  1.1  mrg                                                         \
     87  1.1  mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     88  1.1  mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     89  1.1  mrg       MPFR_TMP_MARK (marker);                           \
     90  1.1  mrg                                                         \
     91  1.1  mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     92  1.1  mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     93  1.1  mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     94  1.1  mrg       MPFR_SET_EXP (x, 0);                              \
     95  1.1  mrg                                                         \
     96  1.1  mrg       MPFR_TMP_INIT (vp, v, s->size, size);             \
     97  1.1  mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
     98  1.1  mrg                                                         \
     99  1.1  mrg       speed_operand_src (s, s->xp, size);               \
    100  1.1  mrg       speed_operand_dst (s, vp, size);                  \
    101  1.1  mrg       speed_operand_dst (s, wp, size);                  \
    102  1.1  mrg       speed_cache_fill (s);                             \
    103  1.1  mrg                                                         \
    104  1.1  mrg       speed_starttime ();                               \
    105  1.1  mrg       i = s->reps;                                      \
    106  1.1  mrg       do                                                \
    107  1.1  mrg         mean_fun (v, w, x, MPFR_RNDN);                  \
    108  1.1  mrg       while (--i != 0);                                 \
    109  1.1  mrg       t = speed_endtime ();                             \
    110  1.1  mrg                                                         \
    111  1.1  mrg       MPFR_TMP_FREE (marker);                           \
    112  1.1  mrg       return t;                                         \
    113  1.1  mrg     }                                                   \
    114  1.1  mrg   while (0)
    115  1.1  mrg 
    116  1.1  mrg /* template for a function like mpfr_mul */
    117  1.1  mrg #define SPEED_MPFR_OP(mean_fun)                         \
    118  1.1  mrg   do                                                    \
    119  1.1  mrg     {                                                   \
    120  1.1  mrg       unsigned  i;                                      \
    121  1.1  mrg       mpfr_limb_ptr wp;                                 \
    122  1.1  mrg       double    t;                                      \
    123  1.1  mrg       mpfr_t    w, x, y;                                \
    124  1.1  mrg       mp_size_t size;                                   \
    125  1.1  mrg       MPFR_TMP_DECL (marker);                           \
    126  1.1  mrg                                                         \
    127  1.1  mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    128  1.1  mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    129  1.1  mrg       MPFR_TMP_MARK (marker);                           \
    130  1.1  mrg                                                         \
    131  1.1  mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    132  1.1  mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    133  1.1  mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    134  1.1  mrg       MPFR_SET_EXP (x, 0);                              \
    135  1.1  mrg       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    136  1.1  mrg       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
    137  1.1  mrg       MPFR_SET_EXP (y, 0);                              \
    138  1.1  mrg                                                         \
    139  1.1  mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    140  1.1  mrg                                                         \
    141  1.1  mrg       speed_operand_src (s, s->xp, size);               \
    142  1.1  mrg       speed_operand_src (s, s->yp, size);               \
    143  1.1  mrg       speed_operand_dst (s, wp, size);                  \
    144  1.1  mrg       speed_cache_fill (s);                             \
    145  1.1  mrg                                                         \
    146  1.1  mrg       speed_starttime ();                               \
    147  1.1  mrg       i = s->reps;                                      \
    148  1.1  mrg       do                                                \
    149  1.1  mrg         mean_fun (w, x, y, MPFR_RNDN);                  \
    150  1.1  mrg       while (--i != 0);                                 \
    151  1.1  mrg       t = speed_endtime ();                             \
    152  1.1  mrg                                                         \
    153  1.1  mrg       MPFR_TMP_FREE (marker);                           \
    154  1.1  mrg       return t;                                         \
    155  1.1  mrg     }                                                   \
    156  1.1  mrg   while (0)
    157  1.1  mrg 
    158  1.1  mrg /* special template for mpfr_mul(a,b,b) */
    159  1.1  mrg #define SPEED_MPFR_SQR(mean_fun)                        \
    160  1.1  mrg   do                                                    \
    161  1.1  mrg     {                                                   \
    162  1.1  mrg       unsigned  i;                                      \
    163  1.1  mrg       mpfr_limb_ptr wp;                                 \
    164  1.1  mrg       double    t;                                      \
    165  1.1  mrg       mpfr_t    w, x;                                   \
    166  1.1  mrg       mp_size_t size;                                   \
    167  1.1  mrg       MPFR_TMP_DECL (marker);                           \
    168  1.1  mrg                                                         \
    169  1.1  mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    170  1.1  mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    171  1.1  mrg       MPFR_TMP_MARK (marker);                           \
    172  1.1  mrg                                                         \
    173  1.1  mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    174  1.1  mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    175  1.1  mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    176  1.1  mrg       MPFR_SET_EXP (x, 0);                              \
    177  1.1  mrg                                                         \
    178  1.1  mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    179  1.1  mrg                                                         \
    180  1.1  mrg       speed_operand_src (s, s->xp, size);               \
    181  1.1  mrg       speed_operand_dst (s, wp, size);                  \
    182  1.1  mrg       speed_cache_fill (s);                             \
    183  1.1  mrg                                                         \
    184  1.1  mrg       speed_starttime ();                               \
    185  1.1  mrg       i = s->reps;                                      \
    186  1.1  mrg       do                                                \
    187  1.1  mrg         mean_fun (w, x, x, MPFR_RNDN);                  \
    188  1.1  mrg       while (--i != 0);                                 \
    189  1.1  mrg       t = speed_endtime ();                             \
    190  1.1  mrg                                                         \
    191  1.1  mrg       MPFR_TMP_FREE (marker);                           \
    192  1.1  mrg       return t;                                         \
    193  1.1  mrg     }                                                   \
    194  1.1  mrg   while (0)
    195  1.1  mrg 
    196  1.1  mrg /* s->size: precision of both input and output
    197  1.1  mrg    s->xp  : Mantissa of first input
    198  1.1  mrg    s->r   : exponent
    199  1.1  mrg    s->align_xp : sign (1 means positive, 2 means negative)
    200  1.1  mrg */
    201  1.1  mrg #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
    202  1.1  mrg   do                                                    \
    203  1.1  mrg     {                                                   \
    204  1.1  mrg       unsigned  i;                                      \
    205  1.1  mrg       mpfr_limb_ptr wp;                                 \
    206  1.1  mrg       double    t;                                      \
    207  1.1  mrg       mpfr_t    w, x;                                   \
    208  1.1  mrg       mp_size_t size;                                   \
    209  1.1  mrg       MPFR_TMP_DECL (marker);                           \
    210  1.1  mrg                                                         \
    211  1.1  mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    212  1.1  mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    213  1.1  mrg       MPFR_TMP_MARK (marker);                           \
    214  1.1  mrg                                                         \
    215  1.1  mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    216  1.1  mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    217  1.1  mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    218  1.1  mrg       MPFR_SET_EXP (x, s->r);                           \
    219  1.1  mrg       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
    220  1.1  mrg                                                         \
    221  1.1  mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    222  1.1  mrg                                                         \
    223  1.1  mrg       speed_operand_src (s, s->xp, size);               \
    224  1.1  mrg       speed_operand_dst (s, wp, size);                  \
    225  1.1  mrg       speed_cache_fill (s);                             \
    226  1.1  mrg                                                         \
    227  1.1  mrg       speed_starttime ();                               \
    228  1.1  mrg       i = s->reps;                                      \
    229  1.1  mrg       do                                                \
    230  1.1  mrg         mean_fun (w, x, MPFR_RNDN);                     \
    231  1.1  mrg       while (--i != 0);                                 \
    232  1.1  mrg       t = speed_endtime ();                             \
    233  1.1  mrg                                                         \
    234  1.1  mrg       MPFR_TMP_FREE (marker);                           \
    235  1.1  mrg       return t;                                         \
    236  1.1  mrg     }                                                   \
    237  1.1  mrg   while (0)
    238  1.1  mrg 
    239  1.1  mrg /* First we include all the functions we want to tune inside this program.
    240  1.1  mrg    We can't use the GNU MPFR library since the thresholds are fixed macros. */
    241  1.1  mrg 
    242  1.1  mrg /* Setup mpfr_exp_2 */
    243  1.1  mrg mpfr_prec_t mpfr_exp_2_threshold;
    244  1.1  mrg #undef  MPFR_EXP_2_THRESHOLD
    245  1.1  mrg #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
    246  1.1  mrg #include "exp_2.c"
    247  1.1  mrg static double
    248  1.1  mrg speed_mpfr_exp_2 (struct speed_params *s)
    249  1.1  mrg {
    250  1.1  mrg   SPEED_MPFR_FUNC (mpfr_exp_2);
    251  1.1  mrg }
    252  1.1  mrg 
    253  1.1  mrg /* Setup mpfr_exp */
    254  1.1  mrg mpfr_prec_t mpfr_exp_threshold;
    255  1.1  mrg #undef  MPFR_EXP_THRESHOLD
    256  1.1  mrg #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
    257  1.1  mrg #include "exp.c"
    258  1.1  mrg static double
    259  1.1  mrg speed_mpfr_exp (struct speed_params *s)
    260  1.1  mrg {
    261  1.1  mrg   SPEED_MPFR_FUNC (mpfr_exp);
    262  1.1  mrg }
    263  1.1  mrg 
    264  1.1  mrg /* Setup mpfr_sin_cos */
    265  1.1  mrg mpfr_prec_t mpfr_sincos_threshold;
    266  1.1  mrg #undef MPFR_SINCOS_THRESHOLD
    267  1.1  mrg #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
    268  1.1  mrg #include "sin_cos.c"
    269  1.1  mrg #include "cos.c"
    270  1.1  mrg static double
    271  1.1  mrg speed_mpfr_sincos (struct speed_params *s)
    272  1.1  mrg {
    273  1.1  mrg   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
    274  1.1  mrg }
    275  1.1  mrg 
    276  1.1  mrg /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
    277  1.1  mrg mpfr_prec_t mpfr_mul_threshold;
    278  1.1  mrg mpfr_prec_t mpfr_sqr_threshold;
    279  1.1  mrg mpfr_prec_t mpfr_div_threshold;
    280  1.1  mrg #undef  MPFR_MUL_THRESHOLD
    281  1.1  mrg #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
    282  1.1  mrg #undef  MPFR_SQR_THRESHOLD
    283  1.1  mrg #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
    284  1.1  mrg #undef  MPFR_DIV_THRESHOLD
    285  1.1  mrg #define MPFR_DIV_THRESHOLD mpfr_div_threshold
    286  1.1  mrg #include "mul.c"
    287  1.1  mrg #include "div.c"
    288  1.1  mrg static double
    289  1.1  mrg speed_mpfr_mul (struct speed_params *s)
    290  1.1  mrg {
    291  1.1  mrg   SPEED_MPFR_OP (mpfr_mul);
    292  1.1  mrg }
    293  1.1  mrg static double
    294  1.1  mrg speed_mpfr_sqr (struct speed_params *s)
    295  1.1  mrg {
    296  1.1  mrg   SPEED_MPFR_SQR (mpfr_mul);
    297  1.1  mrg }
    298  1.1  mrg static double
    299  1.1  mrg speed_mpfr_div (struct speed_params *s)
    300  1.1  mrg {
    301  1.1  mrg   SPEED_MPFR_OP (mpfr_div);
    302  1.1  mrg }
    303  1.1  mrg 
    304  1.1  mrg /************************************************
    305  1.1  mrg  * Common functions (inspired by GMP function)  *
    306  1.1  mrg  ************************************************/
    307  1.1  mrg static int
    308  1.1  mrg analyze_data (double *dat, int ndat)
    309  1.1  mrg {
    310  1.1  mrg   double  x, min_x;
    311  1.1  mrg   int     j, min_j;
    312  1.1  mrg 
    313  1.1  mrg   x = 0.0;
    314  1.1  mrg   for (j = 0; j < ndat; j++)
    315  1.1  mrg     if (dat[j] > 0.0)
    316  1.1  mrg       x += dat[j];
    317  1.1  mrg 
    318  1.1  mrg   min_x = x;
    319  1.1  mrg   min_j = 0;
    320  1.1  mrg 
    321  1.1  mrg   for (j = 0; j < ndat; x -= dat[j], j++)
    322  1.1  mrg     {
    323  1.1  mrg       if (x < min_x)
    324  1.1  mrg         {
    325  1.1  mrg           min_x = x;
    326  1.1  mrg           min_j = j;
    327  1.1  mrg         }
    328  1.1  mrg     }
    329  1.1  mrg   return min_j;
    330  1.1  mrg }
    331  1.1  mrg 
    332  1.1  mrg static double
    333  1.1  mrg mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
    334  1.1  mrg {
    335  1.1  mrg   double t = -1.0;
    336  1.1  mrg   int i;
    337  1.1  mrg   int number_of_iterations = 30;
    338  1.1  mrg   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
    339  1.1  mrg     {
    340  1.1  mrg       t = speed_measure (fun, s);
    341  1.1  mrg       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
    342  1.1  mrg         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
    343  1.1  mrg                s->size, i+1, number_of_iterations);
    344  1.1  mrg     }
    345  1.1  mrg   if (t == -1.0)
    346  1.1  mrg     {
    347  1.1  mrg       fprintf (stderr, "Failed to measure %s!\n", m);
    348  1.1  mrg       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
    349  1.1  mrg       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
    350  1.1  mrg       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
    351  1.1  mrg       abort ();
    352  1.1  mrg     }
    353  1.1  mrg   return t;
    354  1.1  mrg }
    355  1.1  mrg 
    356  1.1  mrg #define THRESHOLD_WINDOW 16
    357  1.1  mrg #define THRESHOLD_FINAL_WINDOW 128
    358  1.1  mrg static double
    359  1.1  mrg domeasure (mpfr_prec_t *threshold,
    360  1.1  mrg            double (*func) (struct speed_params *),
    361  1.1  mrg            mpfr_prec_t p)
    362  1.1  mrg {
    363  1.1  mrg   struct speed_params s;
    364  1.1  mrg   mp_size_t size;
    365  1.1  mrg   double t1, t2, d;
    366  1.1  mrg 
    367  1.1  mrg   s.align_xp = s.align_yp = s.align_wp = 64;
    368  1.1  mrg   s.size = p;
    369  1.1  mrg   size = (p - 1)/GMP_NUMB_BITS+1;
    370  1.1  mrg   s.xp = malloc (2*size*sizeof (mp_limb_t));
    371  1.1  mrg   if (s.xp == NULL)
    372  1.1  mrg     {
    373  1.1  mrg       fprintf (stderr, "Can't allocate memory.\n");
    374  1.1  mrg       abort ();
    375  1.1  mrg     }
    376  1.1  mrg   mpn_random (s.xp, size);
    377  1.1  mrg   s.yp = s.xp + size;
    378  1.1  mrg   mpn_random (s.yp, size);
    379  1.1  mrg   *threshold = MPFR_PREC_MAX;
    380  1.1  mrg   t1 = mpfr_speed_measure (func, &s, "function 1");
    381  1.1  mrg   *threshold = 1;
    382  1.1  mrg   t2 = mpfr_speed_measure (func, &s, "function 2");
    383  1.1  mrg   free (s.xp);
    384  1.1  mrg   /* t1 is the time of the first algo (used for low prec) */
    385  1.1  mrg   if (t2 >= t1)
    386  1.1  mrg     d = (t2 - t1) / t2;
    387  1.1  mrg   else
    388  1.1  mrg     d = (t2 - t1) / t1;
    389  1.1  mrg   /* d > 0 if we have to use algo 1.
    390  1.1  mrg      d < 0 if we have to use algo 2 */
    391  1.1  mrg   return d;
    392  1.1  mrg }
    393  1.1  mrg 
    394  1.1  mrg /* Performs measures when both the precision and the point of evaluation
    395  1.1  mrg    shall vary. s.yp is ignored and not initialized.
    396  1.1  mrg    It assumes that func depends on three thresholds with a boundary of the
    397  1.1  mrg    form threshold1*x + threshold2*p = some scaling factor, if x<0,
    398  1.1  mrg    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
    399  1.1  mrg */
    400  1.1  mrg static double
    401  1.1  mrg domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
    402  1.1  mrg             double (*func) (struct speed_params *),
    403  1.1  mrg             mpfr_prec_t p,
    404  1.1  mrg             mpfr_t x)
    405  1.1  mrg {
    406  1.1  mrg   struct speed_params s;
    407  1.1  mrg   mp_size_t size;
    408  1.1  mrg   double t1, t2, d;
    409  1.1  mrg   mpfr_t xtmp;
    410  1.1  mrg 
    411  1.1  mrg   if (MPFR_IS_SINGULAR (x))
    412  1.1  mrg     {
    413  1.1  mrg       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
    414  1.1  mrg       abort ();
    415  1.1  mrg     }
    416  1.1  mrg   if (MPFR_IS_NEG (x))
    417  1.1  mrg     s.align_xp = 2;
    418  1.1  mrg   else
    419  1.1  mrg     s.align_xp = 1;
    420  1.1  mrg 
    421  1.1  mrg   s.align_yp = s.align_wp = 64;
    422  1.1  mrg   s.size = p;
    423  1.1  mrg   size = (p - 1)/GMP_NUMB_BITS+1;
    424  1.1  mrg 
    425  1.1  mrg   mpfr_init2 (xtmp, p);
    426  1.1  mrg   mpn_random (xtmp->_mpfr_d, size);
    427  1.1  mrg   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
    428  1.1  mrg   MPFR_SET_EXP (xtmp, -53);
    429  1.1  mrg   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
    430  1.1  mrg   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
    431  1.1  mrg                                       /* where perturb ~ 2^(-53) is */
    432  1.1  mrg                                       /* randomly chosen.           */
    433  1.1  mrg   s.xp = xtmp->_mpfr_d;
    434  1.1  mrg   s.r = MPFR_GET_EXP (xtmp);
    435  1.1  mrg 
    436  1.1  mrg   *threshold1 = 0;
    437  1.1  mrg   *threshold2 = 0;
    438  1.1  mrg   *threshold3 = 0;
    439  1.1  mrg   t1 = mpfr_speed_measure (func, &s, "function 1");
    440  1.1  mrg 
    441  1.1  mrg   if (MPFR_IS_NEG (x))
    442  1.1  mrg     *threshold1 = INT_MIN;
    443  1.1  mrg   else
    444  1.1  mrg     *threshold3 = INT_MAX;
    445  1.1  mrg   *threshold2 = INT_MAX;
    446  1.1  mrg   t2 = mpfr_speed_measure (func, &s, "function 2");
    447  1.1  mrg 
    448  1.1  mrg   /* t1 is the time of the first algo (used for low prec) */
    449  1.1  mrg   if (t2 >= t1)
    450  1.1  mrg     d = (t2 - t1) / t2;
    451  1.1  mrg   else
    452  1.1  mrg     d = (t2 - t1) / t1;
    453  1.1  mrg   /* d > 0 if we have to use algo 1.
    454  1.1  mrg      d < 0 if we have to use algo 2 */
    455  1.1  mrg   mpfr_clear (xtmp);
    456  1.1  mrg   return d;
    457  1.1  mrg }
    458  1.1  mrg 
    459  1.1  mrg /* Tune a function with a simple THRESHOLD
    460  1.1  mrg    The function doesn't depend on another threshold.
    461  1.1  mrg    It assumes that it uses algo1 if p < THRESHOLD
    462  1.1  mrg    and algo2 otherwise.
    463  1.1  mrg    if algo2 is better for low prec, and algo1 better for high prec,
    464  1.1  mrg    the behaviour of this function is undefined. */
    465  1.1  mrg static void
    466  1.1  mrg tune_simple_func (mpfr_prec_t *threshold,
    467  1.1  mrg                   double (*func) (struct speed_params *),
    468  1.1  mrg                   mpfr_prec_t pstart)
    469  1.1  mrg {
    470  1.1  mrg   double measure[THRESHOLD_FINAL_WINDOW+1];
    471  1.1  mrg   double d;
    472  1.1  mrg   mpfr_prec_t pstep;
    473  1.1  mrg   int i, numpos, numneg, try;
    474  1.1  mrg   mpfr_prec_t pmin, pmax, p;
    475  1.1  mrg 
    476  1.1  mrg   /* first look for a lower bound within 10% */
    477  1.1  mrg   pmin = p = pstart;
    478  1.1  mrg   d = domeasure (threshold, func, pmin);
    479  1.1  mrg   if (d < 0.0)
    480  1.1  mrg     {
    481  1.1  mrg       if (verbose)
    482  1.1  mrg         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    483  1.1  mrg                 (unsigned long) pmin);
    484  1.1  mrg       *threshold = MPFR_PREC_MIN;
    485  1.1  mrg       return;
    486  1.1  mrg     }
    487  1.1  mrg   if (d >= 1.00)
    488  1.1  mrg     for (;;)
    489  1.1  mrg       {
    490  1.1  mrg         d = domeasure (threshold, func, pmin);
    491  1.1  mrg         if (d < 1.00)
    492  1.1  mrg           break;
    493  1.1  mrg         p = pmin;
    494  1.1  mrg         pmin += pmin/2;
    495  1.1  mrg       }
    496  1.1  mrg   pmin = p;
    497  1.1  mrg   for (;;)
    498  1.1  mrg     {
    499  1.1  mrg       d = domeasure (threshold, func, pmin);
    500  1.1  mrg       if (d < 0.10)
    501  1.1  mrg         break;
    502  1.1  mrg       pmin += GMP_NUMB_BITS;
    503  1.1  mrg     }
    504  1.1  mrg 
    505  1.1  mrg   /* then look for an upper bound within 20% */
    506  1.1  mrg   pmax = pmin * 2;
    507  1.1  mrg   for (;;)
    508  1.1  mrg     {
    509  1.1  mrg       d = domeasure (threshold, func, pmax);
    510  1.1  mrg       if (d < -0.20)
    511  1.1  mrg         break;
    512  1.1  mrg       pmax += pmin / 2; /* don't increase too rapidly */
    513  1.1  mrg     }
    514  1.1  mrg 
    515  1.1  mrg   /* The threshold is between pmin and pmax. Affine them */
    516  1.1  mrg   try = 0;
    517  1.1  mrg   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    518  1.1  mrg     {
    519  1.1  mrg       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    520  1.1  mrg       if (verbose)
    521  1.1  mrg         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    522  1.1  mrg       p = (pmin + pmax) / 2;
    523  1.1  mrg       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    524  1.1  mrg         {
    525  1.1  mrg           measure[i] = domeasure (threshold, func,
    526  1.1  mrg                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
    527  1.1  mrg           if (measure[i] > 0)
    528  1.1  mrg             numpos ++;
    529  1.1  mrg           else if (measure[i] < 0)
    530  1.1  mrg             numneg ++;
    531  1.1  mrg         }
    532  1.1  mrg       if (numpos > numneg)
    533  1.1  mrg         /* We use more often algo 1 than algo 2 */
    534  1.1  mrg         pmin = p - THRESHOLD_WINDOW/2*pstep;
    535  1.1  mrg       else if (numpos < numneg)
    536  1.1  mrg         pmax = p + THRESHOLD_WINDOW/2*pstep;
    537  1.1  mrg       else
    538  1.1  mrg         /* numpos == numneg ... */
    539  1.1  mrg         if (++ try > 2)
    540  1.1  mrg           {
    541  1.1  mrg             *threshold = p;
    542  1.1  mrg             if (verbose)
    543  1.1  mrg               printf ("Quick find: %lu\n", *threshold);
    544  1.1  mrg             return ;
    545  1.1  mrg           }
    546  1.1  mrg     }
    547  1.1  mrg 
    548  1.1  mrg   /* Final tune... */
    549  1.1  mrg   if (verbose)
    550  1.1  mrg     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    551  1.1  mrg   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    552  1.1  mrg     measure[i] = domeasure (threshold, func, pmin+i);
    553  1.1  mrg   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    554  1.1  mrg   *threshold = pmin + i;
    555  1.1  mrg   if (verbose)
    556  1.1  mrg     printf ("%lu\n", *threshold);
    557  1.1  mrg   return;
    558  1.1  mrg }
    559  1.1  mrg 
    560  1.1  mrg /* Tune a function which behavior depends on both p and x,
    561  1.1  mrg    in a given direction.
    562  1.1  mrg    It assumes that for (x,p) close to zero, algo1 is used
    563  1.1  mrg    and algo2 is used when (x,p) is far from zero.
    564  1.1  mrg    If algo2 is better for low prec, and algo1 better for high prec,
    565  1.1  mrg    the behaviour of this function is undefined.
    566  1.1  mrg    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
    567  1.1  mrg    until it finds a point on the boundary. It returns ell.
    568  1.1  mrg  */
    569  1.1  mrg static void
    570  1.1  mrg tune_simple_func_in_some_direction (long int *threshold1,
    571  1.1  mrg                                     long int *threshold2,
    572  1.1  mrg                                     long int *threshold3,
    573  1.1  mrg                                     double (*func) (struct speed_params *),
    574  1.1  mrg                                     mpfr_prec_t pstart,
    575  1.1  mrg                                     int dirx, int dirp,
    576  1.1  mrg                                     mpfr_t xres, mpfr_prec_t *pres)
    577  1.1  mrg {
    578  1.1  mrg   double measure[THRESHOLD_FINAL_WINDOW+1];
    579  1.1  mrg   double d;
    580  1.1  mrg   mpfr_prec_t pstep;
    581  1.1  mrg   int i, numpos, numneg, try;
    582  1.1  mrg   mpfr_prec_t pmin, pmax, p;
    583  1.1  mrg   mpfr_t xmin, xmax, x;
    584  1.1  mrg   mpfr_t ratio;
    585  1.1  mrg 
    586  1.1  mrg   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
    587  1.1  mrg   mpfr_set_si (ratio, dirx, MPFR_RNDN);
    588  1.1  mrg   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
    589  1.1  mrg 
    590  1.1  mrg   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
    591  1.1  mrg   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
    592  1.1  mrg   mpfr_init2 (x, MPFR_SMALL_PRECISION);
    593  1.1  mrg 
    594  1.1  mrg   /* first look for a lower bound within 10% */
    595  1.1  mrg   pmin = p = pstart;
    596  1.1  mrg   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    597  1.1  mrg   mpfr_set (x, xmin, MPFR_RNDN);
    598  1.1  mrg 
    599  1.1  mrg   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    600  1.1  mrg   if (d < 0.0)
    601  1.1  mrg     {
    602  1.1  mrg       if (verbose)
    603  1.1  mrg         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    604  1.1  mrg                 (unsigned long) pmin);
    605  1.1  mrg       *pres = MPFR_PREC_MIN;
    606  1.1  mrg       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    607  1.1  mrg       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    608  1.1  mrg       return;
    609  1.1  mrg     }
    610  1.1  mrg   if (d >= 1.00)
    611  1.1  mrg     for (;;)
    612  1.1  mrg       {
    613  1.1  mrg         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    614  1.1  mrg         if (d < 1.00)
    615  1.1  mrg           break;
    616  1.1  mrg         p = pmin;
    617  1.1  mrg         mpfr_set (x, xmin, MPFR_RNDN);
    618  1.1  mrg         pmin += pmin/2;
    619  1.1  mrg         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    620  1.1  mrg       }
    621  1.1  mrg   pmin = p;
    622  1.1  mrg   mpfr_set (xmin, x, MPFR_RNDN);
    623  1.1  mrg   for (;;)
    624  1.1  mrg     {
    625  1.1  mrg       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    626  1.1  mrg       if (d < 0.10)
    627  1.1  mrg         break;
    628  1.1  mrg       pmin += GMP_NUMB_BITS;
    629  1.1  mrg       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    630  1.1  mrg     }
    631  1.1  mrg 
    632  1.1  mrg   /* then look for an upper bound within 20% */
    633  1.1  mrg   pmax = pmin * 2;
    634  1.1  mrg   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    635  1.1  mrg   for (;;)
    636  1.1  mrg     {
    637  1.1  mrg       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
    638  1.1  mrg       if (d < -0.20)
    639  1.1  mrg         break;
    640  1.1  mrg       pmax += pmin / 2; /* don't increase too rapidly */
    641  1.1  mrg       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    642  1.1  mrg     }
    643  1.1  mrg 
    644  1.1  mrg   /* The threshold is between pmin and pmax. Affine them */
    645  1.1  mrg   try = 0;
    646  1.1  mrg   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    647  1.1  mrg     {
    648  1.1  mrg       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    649  1.1  mrg       if (verbose)
    650  1.1  mrg         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    651  1.1  mrg       p = (pmin + pmax) / 2;
    652  1.1  mrg       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
    653  1.1  mrg       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    654  1.1  mrg         {
    655  1.1  mrg           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
    656  1.1  mrg           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    657  1.1  mrg           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    658  1.1  mrg                                    func, *pres, xres);
    659  1.1  mrg           if (measure[i] > 0)
    660  1.1  mrg             numpos ++;
    661  1.1  mrg           else if (measure[i] < 0)
    662  1.1  mrg             numneg ++;
    663  1.1  mrg         }
    664  1.1  mrg       if (numpos > numneg)
    665  1.1  mrg         {
    666  1.1  mrg           /* We use more often algo 1 than algo 2 */
    667  1.1  mrg           pmin = p - THRESHOLD_WINDOW/2*pstep;
    668  1.1  mrg           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    669  1.1  mrg         }
    670  1.1  mrg       else if (numpos < numneg)
    671  1.1  mrg         {
    672  1.1  mrg           pmax = p + THRESHOLD_WINDOW/2*pstep;
    673  1.1  mrg           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    674  1.1  mrg         }
    675  1.1  mrg       else
    676  1.1  mrg         /* numpos == numneg ... */
    677  1.1  mrg         if (++ try > 2)
    678  1.1  mrg           {
    679  1.1  mrg             *pres = p;
    680  1.1  mrg             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    681  1.1  mrg             if (verbose)
    682  1.1  mrg               printf ("Quick find: %lu\n", *pres);
    683  1.1  mrg             mpfr_clear (ratio);
    684  1.1  mrg             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    685  1.1  mrg             return ;
    686  1.1  mrg           }
    687  1.1  mrg     }
    688  1.1  mrg 
    689  1.1  mrg   /* Final tune... */
    690  1.1  mrg   if (verbose)
    691  1.1  mrg     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    692  1.1  mrg   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    693  1.1  mrg     {
    694  1.1  mrg       *pres = pmin+i;
    695  1.1  mrg       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    696  1.1  mrg       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    697  1.1  mrg                                func, *pres, xres);
    698  1.1  mrg     }
    699  1.1  mrg   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    700  1.1  mrg   *pres = pmin + i;
    701  1.1  mrg   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    702  1.1  mrg   if (verbose)
    703  1.1  mrg     printf ("%lu\n", *pres);
    704  1.1  mrg   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    705  1.1  mrg   return;
    706  1.1  mrg }
    707  1.1  mrg 
    708  1.1  mrg /************************************
    709  1.1  mrg  * Tune Mulders' mulhigh function   *
    710  1.1  mrg  ************************************/
    711  1.1  mrg #define TOLERANCE 1.00
    712  1.1  mrg #define MULDERS_TABLE_SIZE 1024
    713  1.1  mrg #ifndef MPFR_MULHIGH_SIZE
    714  1.1  mrg # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
    715  1.1  mrg #endif
    716  1.1  mrg #ifndef MPFR_SQRHIGH_SIZE
    717  1.1  mrg # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
    718  1.1  mrg #endif
    719  1.1  mrg #ifndef MPFR_DIVHIGH_SIZE
    720  1.1  mrg # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
    721  1.1  mrg #endif
    722  1.1  mrg #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
    723  1.1  mrg #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
    724  1.1  mrg #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
    725  1.1  mrg #include "mulders.c"
    726  1.1  mrg 
    727  1.1  mrg static double
    728  1.1  mrg speed_mpfr_mulhigh (struct speed_params *s)
    729  1.1  mrg {
    730  1.1  mrg   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
    731  1.1  mrg }
    732  1.1  mrg 
    733  1.1  mrg static double
    734  1.1  mrg speed_mpfr_sqrhigh (struct speed_params *s)
    735  1.1  mrg {
    736  1.1  mrg   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
    737  1.1  mrg }
    738  1.1  mrg 
    739  1.1  mrg static double
    740  1.1  mrg speed_mpfr_divhigh (struct speed_params *s)
    741  1.1  mrg {
    742  1.1  mrg   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
    743  1.1  mrg }
    744  1.1  mrg 
    745  1.1  mrg #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
    746  1.1  mrg 
    747  1.1  mrg /* Tune mpfr_mulhigh_n for size n */
    748  1.1  mrg static mp_size_t
    749  1.1  mrg tune_mul_mulders_upto (mp_size_t n)
    750  1.1  mrg {
    751  1.1  mrg   struct speed_params s;
    752  1.1  mrg   mp_size_t k, kbest, step;
    753  1.1  mrg   double t, tbest;
    754  1.1  mrg   MPFR_TMP_DECL (marker);
    755  1.1  mrg 
    756  1.1  mrg   if (n == 0)
    757  1.1  mrg     return -1;
    758  1.1  mrg 
    759  1.1  mrg   MPFR_TMP_MARK (marker);
    760  1.1  mrg   s.align_xp = s.align_yp = s.align_wp = 64;
    761  1.1  mrg   s.size = n;
    762  1.1  mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    763  1.1  mrg   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    764  1.1  mrg   mpn_random (s.xp, n);
    765  1.1  mrg   mpn_random (s.yp, n);
    766  1.1  mrg 
    767  1.1  mrg   /* Check k == -1, mpn_mul_basecase */
    768  1.1  mrg   mulhigh_ktab[n] = -1;
    769  1.1  mrg   kbest = -1;
    770  1.1  mrg   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    771  1.1  mrg 
    772  1.1  mrg   /* Check k == 0, mpn_mulhigh_n_basecase */
    773  1.1  mrg   mulhigh_ktab[n] = 0;
    774  1.1  mrg   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    775  1.1  mrg   if (t * TOLERANCE < tbest)
    776  1.1  mrg     kbest = 0, tbest = t;
    777  1.1  mrg 
    778  1.1  mrg   /* Check Mulders with cutoff point k */
    779  1.1  mrg   step = 1 + n / (2 * MAX_STEPS);
    780  1.1  mrg   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    781  1.1  mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    782  1.1  mrg     {
    783  1.1  mrg       mulhigh_ktab[n] = k;
    784  1.1  mrg       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    785  1.1  mrg       if (t * TOLERANCE < tbest)
    786  1.1  mrg         kbest = k, tbest = t;
    787  1.1  mrg     }
    788  1.1  mrg 
    789  1.1  mrg   mulhigh_ktab[n] = kbest;
    790  1.1  mrg 
    791  1.1  mrg   MPFR_TMP_FREE (marker);
    792  1.1  mrg   return kbest;
    793  1.1  mrg }
    794  1.1  mrg 
    795  1.1  mrg /* Tune mpfr_sqrhigh_n for size n */
    796  1.1  mrg static mp_size_t
    797  1.1  mrg tune_sqr_mulders_upto (mp_size_t n)
    798  1.1  mrg {
    799  1.1  mrg   struct speed_params s;
    800  1.1  mrg   mp_size_t k, kbest, step;
    801  1.1  mrg   double t, tbest;
    802  1.1  mrg   MPFR_TMP_DECL (marker);
    803  1.1  mrg 
    804  1.1  mrg   if (n == 0)
    805  1.1  mrg     return -1;
    806  1.1  mrg 
    807  1.1  mrg   MPFR_TMP_MARK (marker);
    808  1.1  mrg   s.align_xp = s.align_wp = 64;
    809  1.1  mrg   s.size = n;
    810  1.1  mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    811  1.1  mrg   mpn_random (s.xp, n);
    812  1.1  mrg 
    813  1.1  mrg   /* Check k == -1, mpn_sqr_basecase */
    814  1.1  mrg   sqrhigh_ktab[n] = -1;
    815  1.1  mrg   kbest = -1;
    816  1.1  mrg   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    817  1.1  mrg 
    818  1.1  mrg   /* Check k == 0, mpfr_mulhigh_n_basecase */
    819  1.1  mrg   sqrhigh_ktab[n] = 0;
    820  1.1  mrg   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    821  1.1  mrg   if (t * TOLERANCE < tbest)
    822  1.1  mrg     kbest = 0, tbest = t;
    823  1.1  mrg 
    824  1.1  mrg   /* Check Mulders */
    825  1.1  mrg   step = 1 + n / (2 * MAX_STEPS);
    826  1.1  mrg   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    827  1.1  mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    828  1.1  mrg     {
    829  1.1  mrg       sqrhigh_ktab[n] = k;
    830  1.1  mrg       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    831  1.1  mrg       if (t * TOLERANCE < tbest)
    832  1.1  mrg         kbest = k, tbest = t;
    833  1.1  mrg     }
    834  1.1  mrg 
    835  1.1  mrg   sqrhigh_ktab[n] = kbest;
    836  1.1  mrg 
    837  1.1  mrg   MPFR_TMP_FREE (marker);
    838  1.1  mrg   return kbest;
    839  1.1  mrg }
    840  1.1  mrg 
    841  1.1  mrg /* Tune mpfr_divhigh_n for size n */
    842  1.1  mrg static mp_size_t
    843  1.1  mrg tune_div_mulders_upto (mp_size_t n)
    844  1.1  mrg {
    845  1.1  mrg   struct speed_params s;
    846  1.1  mrg   mp_size_t k, kbest, step;
    847  1.1  mrg   double t, tbest;
    848  1.1  mrg   MPFR_TMP_DECL (marker);
    849  1.1  mrg 
    850  1.1  mrg   if (n == 0)
    851  1.1  mrg     return 0;
    852  1.1  mrg 
    853  1.1  mrg   MPFR_TMP_MARK (marker);
    854  1.1  mrg   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
    855  1.1  mrg   s.size = n;
    856  1.1  mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    857  1.1  mrg   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    858  1.1  mrg   mpn_random (s.xp, n);
    859  1.1  mrg   mpn_random (s.yp, n);
    860  1.1  mrg 
    861  1.1  mrg   /* Check k == n, i.e., mpn_divrem */
    862  1.1  mrg   divhigh_ktab[n] = n;
    863  1.1  mrg   kbest = n;
    864  1.1  mrg   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    865  1.1  mrg 
    866  1.1  mrg   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
    867  1.1  mrg #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
    868  1.1  mrg   if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
    869  1.1  mrg #endif
    870  1.1  mrg     {
    871  1.1  mrg       divhigh_ktab[n] = 0;
    872  1.1  mrg       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    873  1.1  mrg       if (t * TOLERANCE < tbest)
    874  1.1  mrg         kbest = 0, tbest = t;
    875  1.1  mrg     }
    876  1.1  mrg 
    877  1.1  mrg   /* Check Mulders */
    878  1.1  mrg   step = 1 + n / (2 * MAX_STEPS);
    879  1.1  mrg   /* we should have (n+3)/2 <= k < n, which translates into
    880  1.1  mrg      (n+4)/2 <= k < n in C */
    881  1.1  mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    882  1.1  mrg     {
    883  1.1  mrg       divhigh_ktab[n] = k;
    884  1.1  mrg       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    885  1.1  mrg       if (t * TOLERANCE < tbest)
    886  1.1  mrg         kbest = k, tbest = t;
    887  1.1  mrg     }
    888  1.1  mrg 
    889  1.1  mrg   divhigh_ktab[n] = kbest;
    890  1.1  mrg 
    891  1.1  mrg   MPFR_TMP_FREE (marker);
    892  1.1  mrg 
    893  1.1  mrg   return kbest;
    894  1.1  mrg }
    895  1.1  mrg 
    896  1.1  mrg static void
    897  1.1  mrg tune_mul_mulders (FILE *f)
    898  1.1  mrg {
    899  1.1  mrg   mp_size_t k;
    900  1.1  mrg 
    901  1.1  mrg   if (verbose)
    902  1.1  mrg     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
    903  1.1  mrg   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
    904  1.1  mrg   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
    905  1.1  mrg     {
    906  1.1  mrg       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
    907  1.1  mrg       if (k != MPFR_MULHIGH_TAB_SIZE-1)
    908  1.1  mrg         fputc (',', f);
    909  1.1  mrg       if ((k+1) % 16 == 0)
    910  1.1  mrg         fprintf (f, " \\\n ");
    911  1.1  mrg       if (verbose)
    912  1.1  mrg         putchar ('.');
    913  1.1  mrg     }
    914  1.1  mrg   fprintf (f, " \n");
    915  1.1  mrg   if (verbose)
    916  1.1  mrg     putchar ('\n');
    917  1.1  mrg }
    918  1.1  mrg 
    919  1.1  mrg static void
    920  1.1  mrg tune_sqr_mulders (FILE *f)
    921  1.1  mrg {
    922  1.1  mrg   mp_size_t k;
    923  1.1  mrg 
    924  1.1  mrg   if (verbose)
    925  1.1  mrg     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
    926  1.1  mrg   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
    927  1.1  mrg   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
    928  1.1  mrg     {
    929  1.1  mrg       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
    930  1.1  mrg       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
    931  1.1  mrg         fputc (',', f);
    932  1.1  mrg       if ((k+1) % 16 == 0)
    933  1.1  mrg         fprintf (f, " \\\n ");
    934  1.1  mrg       if (verbose)
    935  1.1  mrg         putchar ('.');
    936  1.1  mrg     }
    937  1.1  mrg   fprintf (f, " \n");
    938  1.1  mrg   if (verbose)
    939  1.1  mrg     putchar ('\n');
    940  1.1  mrg }
    941  1.1  mrg 
    942  1.1  mrg static void
    943  1.1  mrg tune_div_mulders (FILE *f)
    944  1.1  mrg {
    945  1.1  mrg   mp_size_t k;
    946  1.1  mrg 
    947  1.1  mrg   if (verbose)
    948  1.1  mrg     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
    949  1.1  mrg   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
    950  1.1  mrg   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
    951  1.1  mrg     {
    952  1.1  mrg       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
    953  1.1  mrg       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
    954  1.1  mrg         fputc (',', f);
    955  1.1  mrg       if ((k+1) % 16 == 0)
    956  1.1  mrg         fprintf (f, " /*%zu-%zu*/ \\\n ", k - 15, k);
    957  1.1  mrg       if (verbose)
    958  1.1  mrg         putchar ('.');
    959  1.1  mrg     }
    960  1.1  mrg   fprintf (f, " \n");
    961  1.1  mrg   if (verbose)
    962  1.1  mrg     putchar ('\n');
    963  1.1  mrg }
    964  1.1  mrg 
    965  1.1  mrg /*******************************************************
    966  1.1  mrg  *            Tuning functions for mpfr_ai             *
    967  1.1  mrg  *******************************************************/
    968  1.1  mrg 
    969  1.1  mrg long int mpfr_ai_threshold1;
    970  1.1  mrg long int mpfr_ai_threshold2;
    971  1.1  mrg long int mpfr_ai_threshold3;
    972  1.1  mrg #undef  MPFR_AI_THRESHOLD1
    973  1.1  mrg #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
    974  1.1  mrg #undef  MPFR_AI_THRESHOLD2
    975  1.1  mrg #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
    976  1.1  mrg #undef  MPFR_AI_THRESHOLD3
    977  1.1  mrg #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
    978  1.1  mrg 
    979  1.1  mrg #include "ai.c"
    980  1.1  mrg 
    981  1.1  mrg static double
    982  1.1  mrg speed_mpfr_ai (struct speed_params *s)
    983  1.1  mrg {
    984  1.1  mrg   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
    985  1.1  mrg }
    986  1.1  mrg 
    987  1.1  mrg 
    988  1.1  mrg /*******************************************************
    989  1.1  mrg  *            Tune all the threshold of MPFR           *
    990  1.1  mrg  * Warning: tune the function in their dependent order!*
    991  1.1  mrg  *******************************************************/
    992  1.1  mrg static void
    993  1.1  mrg all (const char *filename)
    994  1.1  mrg {
    995  1.1  mrg   FILE *f;
    996  1.1  mrg   time_t  start_time, end_time;
    997  1.1  mrg   struct tm  *tp;
    998  1.1  mrg   mpfr_t x1, x2, x3, tmp1, tmp2;
    999  1.1  mrg   mpfr_prec_t p1, p2, p3;
   1000  1.1  mrg 
   1001  1.1  mrg   f = fopen (filename, "w");
   1002  1.1  mrg   if (f == NULL)
   1003  1.1  mrg     {
   1004  1.1  mrg       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
   1005  1.1  mrg       abort ();
   1006  1.1  mrg     }
   1007  1.1  mrg 
   1008  1.1  mrg   speed_time_init ();
   1009  1.1  mrg   if (verbose)
   1010  1.1  mrg     {
   1011  1.1  mrg       printf ("Using: %s\n", speed_time_string);
   1012  1.1  mrg       printf ("speed_precision %d", speed_precision);
   1013  1.1  mrg       if (speed_unittime == 1.0)
   1014  1.1  mrg         printf (", speed_unittime 1 cycle");
   1015  1.1  mrg       else
   1016  1.1  mrg         printf (", speed_unittime %.2e secs", speed_unittime);
   1017  1.1  mrg       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
   1018  1.1  mrg         printf (", CPU freq unknown\n");
   1019  1.1  mrg       else
   1020  1.1  mrg         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
   1021  1.1  mrg     }
   1022  1.1  mrg 
   1023  1.1  mrg   time (&start_time);
   1024  1.1  mrg   tp = localtime (&start_time);
   1025  1.1  mrg   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
   1026  1.1  mrg           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
   1027  1.1  mrg 
   1028  1.1  mrg #ifdef __ICC
   1029  1.1  mrg   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
   1030  1.1  mrg #elif defined(__GNUC__)
   1031  1.1  mrg #ifdef __GNUC_PATCHLEVEL__
   1032  1.1  mrg   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
   1033  1.1  mrg            __GNUC_PATCHLEVEL__);
   1034  1.1  mrg #else
   1035  1.1  mrg   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
   1036  1.1  mrg #endif
   1037  1.1  mrg #elif defined (__SUNPRO_C)
   1038  1.1  mrg   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
   1039  1.1  mrg #elif defined (__sgi) && defined (_COMPILER_VERSION)
   1040  1.1  mrg   fprintf (f, "MIPSpro C %d.%d.%d */\n",
   1041  1.1  mrg            _COMPILER_VERSION / 100,
   1042  1.1  mrg            _COMPILER_VERSION / 10 % 10,
   1043  1.1  mrg            _COMPILER_VERSION % 10);
   1044  1.1  mrg #elif defined (__DECC) && defined (__DECC_VER)
   1045  1.1  mrg   fprintf (f, "DEC C %d */\n", __DECC_VER);
   1046  1.1  mrg #else
   1047  1.1  mrg   fprintf (f, "system compiler */\n");
   1048  1.1  mrg #endif
   1049  1.1  mrg   fprintf (f, "\n\n");
   1050  1.1  mrg   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
   1051  1.1  mrg   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
   1052  1.1  mrg   fprintf (f, "#endif\n\n");
   1053  1.1  mrg 
   1054  1.1  mrg   /* Tune mulhigh */
   1055  1.1  mrg   tune_mul_mulders (f);
   1056  1.1  mrg 
   1057  1.1  mrg   /* Tune sqrhigh */
   1058  1.1  mrg   tune_sqr_mulders (f);
   1059  1.1  mrg 
   1060  1.1  mrg   /* Tune divhigh */
   1061  1.1  mrg   tune_div_mulders (f);
   1062  1.1  mrg   fflush (f);
   1063  1.1  mrg 
   1064  1.1  mrg   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
   1065  1.1  mrg   if (verbose)
   1066  1.1  mrg     printf ("Tuning mpfr_mul...\n");
   1067  1.1  mrg   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
   1068  1.1  mrg                     2*GMP_NUMB_BITS+1);
   1069  1.1  mrg   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
   1070  1.1  mrg            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
   1071  1.1  mrg 
   1072  1.1  mrg   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
   1073  1.1  mrg   if (verbose)
   1074  1.1  mrg     printf ("Tuning mpfr_sqr...\n");
   1075  1.1  mrg   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
   1076  1.1  mrg                     2*GMP_NUMB_BITS+1);
   1077  1.1  mrg   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
   1078  1.1  mrg            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
   1079  1.1  mrg 
   1080  1.1  mrg   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
   1081  1.1  mrg   if (verbose)
   1082  1.1  mrg     printf ("Tuning mpfr_div...\n");
   1083  1.1  mrg   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
   1084  1.1  mrg                     2*GMP_NUMB_BITS+1);
   1085  1.1  mrg   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
   1086  1.1  mrg            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
   1087  1.1  mrg 
   1088  1.1  mrg   /* Tune mpfr_exp_2 */
   1089  1.1  mrg   if (verbose)
   1090  1.1  mrg     printf ("Tuning mpfr_exp_2...\n");
   1091  1.1  mrg   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
   1092  1.1  mrg   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
   1093  1.1  mrg            (unsigned long) mpfr_exp_2_threshold);
   1094  1.1  mrg 
   1095  1.1  mrg   /* Tune mpfr_exp */
   1096  1.1  mrg   if (verbose)
   1097  1.1  mrg     printf ("Tuning mpfr_exp...\n");
   1098  1.1  mrg   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
   1099  1.1  mrg                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1100  1.1  mrg   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
   1101  1.1  mrg            (unsigned long) mpfr_exp_threshold);
   1102  1.1  mrg 
   1103  1.1  mrg   /* Tune mpfr_sin_cos */
   1104  1.1  mrg   if (verbose)
   1105  1.1  mrg     printf ("Tuning mpfr_sin_cos...\n");
   1106  1.1  mrg   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
   1107  1.1  mrg                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1108  1.1  mrg   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
   1109  1.1  mrg            (unsigned long) mpfr_sincos_threshold);
   1110  1.1  mrg 
   1111  1.1  mrg   /* Tune mpfr_ai */
   1112  1.1  mrg   if (verbose)
   1113  1.1  mrg     printf ("Tuning mpfr_ai...\n");
   1114  1.1  mrg   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
   1115  1.1  mrg   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
   1116  1.1  mrg   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
   1117  1.1  mrg   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
   1118  1.1  mrg   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
   1119  1.1  mrg 
   1120  1.1  mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1121  1.1  mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1122  1.1  mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1123  1.1  mrg                                       -60, 200, x1, &p1);
   1124  1.1  mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1125  1.1  mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1126  1.1  mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1127  1.1  mrg                                       -20, 500, x2, &p2);
   1128  1.1  mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1129  1.1  mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1130  1.1  mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1131  1.1  mrg                                       40, 200, x3, &p3);
   1132  1.1  mrg 
   1133  1.1  mrg   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
   1134  1.1  mrg   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
   1135  1.1  mrg   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
   1136  1.1  mrg   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
   1137  1.1  mrg 
   1138  1.1  mrg   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
   1139  1.1  mrg   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
   1140  1.1  mrg   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1141  1.1  mrg   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
   1142  1.1  mrg 
   1143  1.1  mrg   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
   1144  1.1  mrg   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1145  1.1  mrg   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
   1146  1.1  mrg 
   1147  1.1  mrg   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
   1148  1.1  mrg   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
   1149  1.1  mrg   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
   1150  1.1  mrg   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
   1151  1.1  mrg   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
   1152  1.1  mrg 
   1153  1.1  mrg   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
   1154  1.1  mrg   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
   1155  1.1  mrg   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
   1156  1.1  mrg 
   1157  1.1  mrg   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
   1158  1.1  mrg   mpfr_clear (tmp1); mpfr_clear (tmp2);
   1159  1.1  mrg 
   1160  1.1  mrg   /* End of tuning */
   1161  1.1  mrg   time (&end_time);
   1162  1.1  mrg   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
   1163  1.1  mrg            (long) (end_time - start_time));
   1164  1.1  mrg   if (verbose)
   1165  1.1  mrg     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
   1166  1.1  mrg 
   1167  1.1  mrg   fclose (f);
   1168  1.1  mrg }
   1169  1.1  mrg 
   1170  1.1  mrg 
   1171  1.1  mrg /* Main function */
   1172  1.1  mrg int main (int argc, char *argv[])
   1173  1.1  mrg {
   1174  1.1  mrg   /* Unbuffered so if output is redirected to a file it isn't lost if the
   1175  1.1  mrg      program is killed part way through.  */
   1176  1.1  mrg   setbuf (stdout, NULL);
   1177  1.1  mrg   setbuf (stderr, NULL);
   1178  1.1  mrg 
   1179  1.1  mrg   verbose = argc > 1;
   1180  1.1  mrg 
   1181  1.1  mrg   if (verbose)
   1182  1.1  mrg     printf ("Tuning MPFR (Coffee time?)...\n");
   1183  1.1  mrg 
   1184  1.1  mrg   all ("mparam.h");
   1185  1.1  mrg 
   1186  1.1  mrg   return 0;
   1187  1.1  mrg }
   1188