Home | History | Annotate | Line # | Download | only in tune
tuneup.c revision 1.1.1.1
      1 /* Tune various threshold of MPFR
      2 
      3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramel projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #include <stdlib.h>
     24 #include <time.h>
     25 
     26 #define MPFR_NEED_LONGLONG_H
     27 #include "mpfr-impl.h"
     28 
     29 #undef _PROTO
     30 #define _PROTO __GMP_PROTO
     31 #include "speed.h"
     32 
     33 int verbose;
     34 
     35 /* template for an unary function */
     36 /* s->size: precision of both input and output
     37    s->xp  : Mantissa of first input
     38    s->yp  : mantissa of second input                    */
     39 #define SPEED_MPFR_FUNC(mean_fun)                       \
     40   do                                                    \
     41     {                                                   \
     42       unsigned  i;                                      \
     43       mpfr_limb_ptr wp;                                 \
     44       double    t;                                      \
     45       mpfr_t    w, x;                                   \
     46       mp_size_t size;                                   \
     47       MPFR_TMP_DECL (marker);                           \
     48                                                         \
     49       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     50       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     51       MPFR_TMP_MARK (marker);                           \
     52                                                         \
     53       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     54       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     55       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     56       MPFR_SET_EXP (x, 0);                              \
     57                                                         \
     58       MPFR_TMP_INIT (wp, w, s->size, size);             \
     59                                                         \
     60       speed_operand_src (s, s->xp, size);               \
     61       speed_operand_dst (s, wp, size);                  \
     62       speed_cache_fill (s);                             \
     63                                                         \
     64       speed_starttime ();                               \
     65       i = s->reps;                                      \
     66       do                                                \
     67         mean_fun (w, x, MPFR_RNDN);                     \
     68       while (--i != 0);                                 \
     69       t = speed_endtime ();                             \
     70                                                         \
     71       MPFR_TMP_FREE (marker);                           \
     72       return t;                                         \
     73     }                                                   \
     74   while (0)
     75 
     76 /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
     77 #define SPEED_MPFR_FUNC2(mean_fun)                      \
     78   do                                                    \
     79     {                                                   \
     80       unsigned  i;                                      \
     81       mpfr_limb_ptr vp, wp;                             \
     82       double    t;                                      \
     83       mpfr_t    v, w, x;                                \
     84       mp_size_t size;                                   \
     85       MPFR_TMP_DECL (marker);                           \
     86                                                         \
     87       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     88       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     89       MPFR_TMP_MARK (marker);                           \
     90                                                         \
     91       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     92       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     93       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     94       MPFR_SET_EXP (x, 0);                              \
     95                                                         \
     96       MPFR_TMP_INIT (vp, v, s->size, size);             \
     97       MPFR_TMP_INIT (wp, w, s->size, size);             \
     98                                                         \
     99       speed_operand_src (s, s->xp, size);               \
    100       speed_operand_dst (s, vp, size);                  \
    101       speed_operand_dst (s, wp, size);                  \
    102       speed_cache_fill (s);                             \
    103                                                         \
    104       speed_starttime ();                               \
    105       i = s->reps;                                      \
    106       do                                                \
    107         mean_fun (v, w, x, MPFR_RNDN);                  \
    108       while (--i != 0);                                 \
    109       t = speed_endtime ();                             \
    110                                                         \
    111       MPFR_TMP_FREE (marker);                           \
    112       return t;                                         \
    113     }                                                   \
    114   while (0)
    115 
    116 /* template for a function like mpfr_mul */
    117 #define SPEED_MPFR_OP(mean_fun)                         \
    118   do                                                    \
    119     {                                                   \
    120       unsigned  i;                                      \
    121       mpfr_limb_ptr wp;                                 \
    122       double    t;                                      \
    123       mpfr_t    w, x, y;                                \
    124       mp_size_t size;                                   \
    125       MPFR_TMP_DECL (marker);                           \
    126                                                         \
    127       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    128       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    129       MPFR_TMP_MARK (marker);                           \
    130                                                         \
    131       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    132       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    133       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    134       MPFR_SET_EXP (x, 0);                              \
    135       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    136       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
    137       MPFR_SET_EXP (y, 0);                              \
    138                                                         \
    139       MPFR_TMP_INIT (wp, w, s->size, size);             \
    140                                                         \
    141       speed_operand_src (s, s->xp, size);               \
    142       speed_operand_src (s, s->yp, size);               \
    143       speed_operand_dst (s, wp, size);                  \
    144       speed_cache_fill (s);                             \
    145                                                         \
    146       speed_starttime ();                               \
    147       i = s->reps;                                      \
    148       do                                                \
    149         mean_fun (w, x, y, MPFR_RNDN);                  \
    150       while (--i != 0);                                 \
    151       t = speed_endtime ();                             \
    152                                                         \
    153       MPFR_TMP_FREE (marker);                           \
    154       return t;                                         \
    155     }                                                   \
    156   while (0)
    157 
    158 /* special template for mpfr_mul(a,b,b) */
    159 #define SPEED_MPFR_SQR(mean_fun)                        \
    160   do                                                    \
    161     {                                                   \
    162       unsigned  i;                                      \
    163       mpfr_limb_ptr wp;                                 \
    164       double    t;                                      \
    165       mpfr_t    w, x;                                   \
    166       mp_size_t size;                                   \
    167       MPFR_TMP_DECL (marker);                           \
    168                                                         \
    169       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    170       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    171       MPFR_TMP_MARK (marker);                           \
    172                                                         \
    173       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    174       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    175       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    176       MPFR_SET_EXP (x, 0);                              \
    177                                                         \
    178       MPFR_TMP_INIT (wp, w, s->size, size);             \
    179                                                         \
    180       speed_operand_src (s, s->xp, size);               \
    181       speed_operand_dst (s, wp, size);                  \
    182       speed_cache_fill (s);                             \
    183                                                         \
    184       speed_starttime ();                               \
    185       i = s->reps;                                      \
    186       do                                                \
    187         mean_fun (w, x, x, MPFR_RNDN);                  \
    188       while (--i != 0);                                 \
    189       t = speed_endtime ();                             \
    190                                                         \
    191       MPFR_TMP_FREE (marker);                           \
    192       return t;                                         \
    193     }                                                   \
    194   while (0)
    195 
    196 /* s->size: precision of both input and output
    197    s->xp  : Mantissa of first input
    198    s->r   : exponent
    199    s->align_xp : sign (1 means positive, 2 means negative)
    200 */
    201 #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
    202   do                                                    \
    203     {                                                   \
    204       unsigned  i;                                      \
    205       mpfr_limb_ptr wp;                                 \
    206       double    t;                                      \
    207       mpfr_t    w, x;                                   \
    208       mp_size_t size;                                   \
    209       MPFR_TMP_DECL (marker);                           \
    210                                                         \
    211       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    212       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    213       MPFR_TMP_MARK (marker);                           \
    214                                                         \
    215       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    216       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    217       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    218       MPFR_SET_EXP (x, s->r);                           \
    219       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
    220                                                         \
    221       MPFR_TMP_INIT (wp, w, s->size, size);             \
    222                                                         \
    223       speed_operand_src (s, s->xp, size);               \
    224       speed_operand_dst (s, wp, size);                  \
    225       speed_cache_fill (s);                             \
    226                                                         \
    227       speed_starttime ();                               \
    228       i = s->reps;                                      \
    229       do                                                \
    230         mean_fun (w, x, MPFR_RNDN);                     \
    231       while (--i != 0);                                 \
    232       t = speed_endtime ();                             \
    233                                                         \
    234       MPFR_TMP_FREE (marker);                           \
    235       return t;                                         \
    236     }                                                   \
    237   while (0)
    238 
    239 /* First we include all the functions we want to tune inside this program.
    240    We can't use the GNU MPFR library since the thresholds are fixed macros. */
    241 
    242 /* Setup mpfr_exp_2 */
    243 mpfr_prec_t mpfr_exp_2_threshold;
    244 #undef  MPFR_EXP_2_THRESHOLD
    245 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
    246 #include "exp_2.c"
    247 static double
    248 speed_mpfr_exp_2 (struct speed_params *s)
    249 {
    250   SPEED_MPFR_FUNC (mpfr_exp_2);
    251 }
    252 
    253 /* Setup mpfr_exp */
    254 mpfr_prec_t mpfr_exp_threshold;
    255 #undef  MPFR_EXP_THRESHOLD
    256 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
    257 #include "exp.c"
    258 static double
    259 speed_mpfr_exp (struct speed_params *s)
    260 {
    261   SPEED_MPFR_FUNC (mpfr_exp);
    262 }
    263 
    264 /* Setup mpfr_sin_cos */
    265 mpfr_prec_t mpfr_sincos_threshold;
    266 #undef MPFR_SINCOS_THRESHOLD
    267 #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
    268 #include "sin_cos.c"
    269 #include "cos.c"
    270 static double
    271 speed_mpfr_sincos (struct speed_params *s)
    272 {
    273   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
    274 }
    275 
    276 /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
    277 mpfr_prec_t mpfr_mul_threshold;
    278 mpfr_prec_t mpfr_sqr_threshold;
    279 mpfr_prec_t mpfr_div_threshold;
    280 #undef  MPFR_MUL_THRESHOLD
    281 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
    282 #undef  MPFR_SQR_THRESHOLD
    283 #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
    284 #undef  MPFR_DIV_THRESHOLD
    285 #define MPFR_DIV_THRESHOLD mpfr_div_threshold
    286 #include "mul.c"
    287 #include "div.c"
    288 static double
    289 speed_mpfr_mul (struct speed_params *s)
    290 {
    291   SPEED_MPFR_OP (mpfr_mul);
    292 }
    293 static double
    294 speed_mpfr_sqr (struct speed_params *s)
    295 {
    296   SPEED_MPFR_SQR (mpfr_mul);
    297 }
    298 static double
    299 speed_mpfr_div (struct speed_params *s)
    300 {
    301   SPEED_MPFR_OP (mpfr_div);
    302 }
    303 
    304 /************************************************
    305  * Common functions (inspired by GMP function)  *
    306  ************************************************/
    307 static int
    308 analyze_data (double *dat, int ndat)
    309 {
    310   double  x, min_x;
    311   int     j, min_j;
    312 
    313   x = 0.0;
    314   for (j = 0; j < ndat; j++)
    315     if (dat[j] > 0.0)
    316       x += dat[j];
    317 
    318   min_x = x;
    319   min_j = 0;
    320 
    321   for (j = 0; j < ndat; x -= dat[j], j++)
    322     {
    323       if (x < min_x)
    324         {
    325           min_x = x;
    326           min_j = j;
    327         }
    328     }
    329   return min_j;
    330 }
    331 
    332 static double
    333 mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
    334 {
    335   double t = -1.0;
    336   int i;
    337   int number_of_iterations = 30;
    338   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
    339     {
    340       t = speed_measure (fun, s);
    341       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
    342         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
    343                s->size, i+1, number_of_iterations);
    344     }
    345   if (t == -1.0)
    346     {
    347       fprintf (stderr, "Failed to measure %s!\n", m);
    348       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
    349       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
    350       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
    351       abort ();
    352     }
    353   return t;
    354 }
    355 
    356 #define THRESHOLD_WINDOW 16
    357 #define THRESHOLD_FINAL_WINDOW 128
    358 static double
    359 domeasure (mpfr_prec_t *threshold,
    360            double (*func) (struct speed_params *),
    361            mpfr_prec_t p)
    362 {
    363   struct speed_params s;
    364   mp_size_t size;
    365   double t1, t2, d;
    366 
    367   s.align_xp = s.align_yp = s.align_wp = 64;
    368   s.size = p;
    369   size = (p - 1)/GMP_NUMB_BITS+1;
    370   s.xp = malloc (2*size*sizeof (mp_limb_t));
    371   if (s.xp == NULL)
    372     {
    373       fprintf (stderr, "Can't allocate memory.\n");
    374       abort ();
    375     }
    376   mpn_random (s.xp, size);
    377   s.yp = s.xp + size;
    378   mpn_random (s.yp, size);
    379   *threshold = MPFR_PREC_MAX;
    380   t1 = mpfr_speed_measure (func, &s, "function 1");
    381   *threshold = 1;
    382   t2 = mpfr_speed_measure (func, &s, "function 2");
    383   free (s.xp);
    384   /* t1 is the time of the first algo (used for low prec) */
    385   if (t2 >= t1)
    386     d = (t2 - t1) / t2;
    387   else
    388     d = (t2 - t1) / t1;
    389   /* d > 0 if we have to use algo 1.
    390      d < 0 if we have to use algo 2 */
    391   return d;
    392 }
    393 
    394 /* Performs measures when both the precision and the point of evaluation
    395    shall vary. s.yp is ignored and not initialized.
    396    It assumes that func depends on three thresholds with a boundary of the
    397    form threshold1*x + threshold2*p = some scaling factor, if x<0,
    398    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
    399 */
    400 static double
    401 domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
    402             double (*func) (struct speed_params *),
    403             mpfr_prec_t p,
    404             mpfr_t x)
    405 {
    406   struct speed_params s;
    407   mp_size_t size;
    408   double t1, t2, d;
    409   mpfr_t xtmp;
    410 
    411   if (MPFR_IS_SINGULAR (x))
    412     {
    413       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
    414       abort ();
    415     }
    416   if (MPFR_IS_NEG (x))
    417     s.align_xp = 2;
    418   else
    419     s.align_xp = 1;
    420 
    421   s.align_yp = s.align_wp = 64;
    422   s.size = p;
    423   size = (p - 1)/GMP_NUMB_BITS+1;
    424 
    425   mpfr_init2 (xtmp, p);
    426   mpn_random (xtmp->_mpfr_d, size);
    427   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
    428   MPFR_SET_EXP (xtmp, -53);
    429   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
    430   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
    431                                       /* where perturb ~ 2^(-53) is */
    432                                       /* randomly chosen.           */
    433   s.xp = xtmp->_mpfr_d;
    434   s.r = MPFR_GET_EXP (xtmp);
    435 
    436   *threshold1 = 0;
    437   *threshold2 = 0;
    438   *threshold3 = 0;
    439   t1 = mpfr_speed_measure (func, &s, "function 1");
    440 
    441   if (MPFR_IS_NEG (x))
    442     *threshold1 = INT_MIN;
    443   else
    444     *threshold3 = INT_MAX;
    445   *threshold2 = INT_MAX;
    446   t2 = mpfr_speed_measure (func, &s, "function 2");
    447 
    448   /* t1 is the time of the first algo (used for low prec) */
    449   if (t2 >= t1)
    450     d = (t2 - t1) / t2;
    451   else
    452     d = (t2 - t1) / t1;
    453   /* d > 0 if we have to use algo 1.
    454      d < 0 if we have to use algo 2 */
    455   mpfr_clear (xtmp);
    456   return d;
    457 }
    458 
    459 /* Tune a function with a simple THRESHOLD
    460    The function doesn't depend on another threshold.
    461    It assumes that it uses algo1 if p < THRESHOLD
    462    and algo2 otherwise.
    463    if algo2 is better for low prec, and algo1 better for high prec,
    464    the behaviour of this function is undefined. */
    465 static void
    466 tune_simple_func (mpfr_prec_t *threshold,
    467                   double (*func) (struct speed_params *),
    468                   mpfr_prec_t pstart)
    469 {
    470   double measure[THRESHOLD_FINAL_WINDOW+1];
    471   double d;
    472   mpfr_prec_t pstep;
    473   int i, numpos, numneg, try;
    474   mpfr_prec_t pmin, pmax, p;
    475 
    476   /* first look for a lower bound within 10% */
    477   pmin = p = pstart;
    478   d = domeasure (threshold, func, pmin);
    479   if (d < 0.0)
    480     {
    481       if (verbose)
    482         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    483                 (unsigned long) pmin);
    484       *threshold = MPFR_PREC_MIN;
    485       return;
    486     }
    487   if (d >= 1.00)
    488     for (;;)
    489       {
    490         d = domeasure (threshold, func, pmin);
    491         if (d < 1.00)
    492           break;
    493         p = pmin;
    494         pmin += pmin/2;
    495       }
    496   pmin = p;
    497   for (;;)
    498     {
    499       d = domeasure (threshold, func, pmin);
    500       if (d < 0.10)
    501         break;
    502       pmin += GMP_NUMB_BITS;
    503     }
    504 
    505   /* then look for an upper bound within 20% */
    506   pmax = pmin * 2;
    507   for (;;)
    508     {
    509       d = domeasure (threshold, func, pmax);
    510       if (d < -0.20)
    511         break;
    512       pmax += pmin / 2; /* don't increase too rapidly */
    513     }
    514 
    515   /* The threshold is between pmin and pmax. Affine them */
    516   try = 0;
    517   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    518     {
    519       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    520       if (verbose)
    521         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    522       p = (pmin + pmax) / 2;
    523       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    524         {
    525           measure[i] = domeasure (threshold, func,
    526                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
    527           if (measure[i] > 0)
    528             numpos ++;
    529           else if (measure[i] < 0)
    530             numneg ++;
    531         }
    532       if (numpos > numneg)
    533         /* We use more often algo 1 than algo 2 */
    534         pmin = p - THRESHOLD_WINDOW/2*pstep;
    535       else if (numpos < numneg)
    536         pmax = p + THRESHOLD_WINDOW/2*pstep;
    537       else
    538         /* numpos == numneg ... */
    539         if (++ try > 2)
    540           {
    541             *threshold = p;
    542             if (verbose)
    543               printf ("Quick find: %lu\n", *threshold);
    544             return ;
    545           }
    546     }
    547 
    548   /* Final tune... */
    549   if (verbose)
    550     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    551   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    552     measure[i] = domeasure (threshold, func, pmin+i);
    553   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    554   *threshold = pmin + i;
    555   if (verbose)
    556     printf ("%lu\n", *threshold);
    557   return;
    558 }
    559 
    560 /* Tune a function which behavior depends on both p and x,
    561    in a given direction.
    562    It assumes that for (x,p) close to zero, algo1 is used
    563    and algo2 is used when (x,p) is far from zero.
    564    If algo2 is better for low prec, and algo1 better for high prec,
    565    the behaviour of this function is undefined.
    566    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
    567    until it finds a point on the boundary. It returns ell.
    568  */
    569 static void
    570 tune_simple_func_in_some_direction (long int *threshold1,
    571                                     long int *threshold2,
    572                                     long int *threshold3,
    573                                     double (*func) (struct speed_params *),
    574                                     mpfr_prec_t pstart,
    575                                     int dirx, int dirp,
    576                                     mpfr_t xres, mpfr_prec_t *pres)
    577 {
    578   double measure[THRESHOLD_FINAL_WINDOW+1];
    579   double d;
    580   mpfr_prec_t pstep;
    581   int i, numpos, numneg, try;
    582   mpfr_prec_t pmin, pmax, p;
    583   mpfr_t xmin, xmax, x;
    584   mpfr_t ratio;
    585 
    586   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
    587   mpfr_set_si (ratio, dirx, MPFR_RNDN);
    588   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
    589 
    590   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
    591   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
    592   mpfr_init2 (x, MPFR_SMALL_PRECISION);
    593 
    594   /* first look for a lower bound within 10% */
    595   pmin = p = pstart;
    596   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    597   mpfr_set (x, xmin, MPFR_RNDN);
    598 
    599   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    600   if (d < 0.0)
    601     {
    602       if (verbose)
    603         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    604                 (unsigned long) pmin);
    605       *pres = MPFR_PREC_MIN;
    606       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    607       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    608       return;
    609     }
    610   if (d >= 1.00)
    611     for (;;)
    612       {
    613         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    614         if (d < 1.00)
    615           break;
    616         p = pmin;
    617         mpfr_set (x, xmin, MPFR_RNDN);
    618         pmin += pmin/2;
    619         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    620       }
    621   pmin = p;
    622   mpfr_set (xmin, x, MPFR_RNDN);
    623   for (;;)
    624     {
    625       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    626       if (d < 0.10)
    627         break;
    628       pmin += GMP_NUMB_BITS;
    629       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    630     }
    631 
    632   /* then look for an upper bound within 20% */
    633   pmax = pmin * 2;
    634   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    635   for (;;)
    636     {
    637       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
    638       if (d < -0.20)
    639         break;
    640       pmax += pmin / 2; /* don't increase too rapidly */
    641       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    642     }
    643 
    644   /* The threshold is between pmin and pmax. Affine them */
    645   try = 0;
    646   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    647     {
    648       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    649       if (verbose)
    650         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    651       p = (pmin + pmax) / 2;
    652       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
    653       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    654         {
    655           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
    656           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    657           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    658                                    func, *pres, xres);
    659           if (measure[i] > 0)
    660             numpos ++;
    661           else if (measure[i] < 0)
    662             numneg ++;
    663         }
    664       if (numpos > numneg)
    665         {
    666           /* We use more often algo 1 than algo 2 */
    667           pmin = p - THRESHOLD_WINDOW/2*pstep;
    668           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    669         }
    670       else if (numpos < numneg)
    671         {
    672           pmax = p + THRESHOLD_WINDOW/2*pstep;
    673           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    674         }
    675       else
    676         /* numpos == numneg ... */
    677         if (++ try > 2)
    678           {
    679             *pres = p;
    680             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    681             if (verbose)
    682               printf ("Quick find: %lu\n", *pres);
    683             mpfr_clear (ratio);
    684             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    685             return ;
    686           }
    687     }
    688 
    689   /* Final tune... */
    690   if (verbose)
    691     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    692   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    693     {
    694       *pres = pmin+i;
    695       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    696       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    697                                func, *pres, xres);
    698     }
    699   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    700   *pres = pmin + i;
    701   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    702   if (verbose)
    703     printf ("%lu\n", *pres);
    704   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    705   return;
    706 }
    707 
    708 /************************************
    709  * Tune Mulders' mulhigh function   *
    710  ************************************/
    711 #define TOLERANCE 1.00
    712 #define MULDERS_TABLE_SIZE 1024
    713 #ifndef MPFR_MULHIGH_SIZE
    714 # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
    715 #endif
    716 #ifndef MPFR_SQRHIGH_SIZE
    717 # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
    718 #endif
    719 #ifndef MPFR_DIVHIGH_SIZE
    720 # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
    721 #endif
    722 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
    723 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
    724 #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
    725 #include "mulders.c"
    726 
    727 static double
    728 speed_mpfr_mulhigh (struct speed_params *s)
    729 {
    730   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
    731 }
    732 
    733 static double
    734 speed_mpfr_sqrhigh (struct speed_params *s)
    735 {
    736   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
    737 }
    738 
    739 static double
    740 speed_mpfr_divhigh (struct speed_params *s)
    741 {
    742   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
    743 }
    744 
    745 #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
    746 
    747 /* Tune mpfr_mulhigh_n for size n */
    748 static mp_size_t
    749 tune_mul_mulders_upto (mp_size_t n)
    750 {
    751   struct speed_params s;
    752   mp_size_t k, kbest, step;
    753   double t, tbest;
    754   MPFR_TMP_DECL (marker);
    755 
    756   if (n == 0)
    757     return -1;
    758 
    759   MPFR_TMP_MARK (marker);
    760   s.align_xp = s.align_yp = s.align_wp = 64;
    761   s.size = n;
    762   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    763   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    764   mpn_random (s.xp, n);
    765   mpn_random (s.yp, n);
    766 
    767   /* Check k == -1, mpn_mul_basecase */
    768   mulhigh_ktab[n] = -1;
    769   kbest = -1;
    770   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    771 
    772   /* Check k == 0, mpn_mulhigh_n_basecase */
    773   mulhigh_ktab[n] = 0;
    774   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    775   if (t * TOLERANCE < tbest)
    776     kbest = 0, tbest = t;
    777 
    778   /* Check Mulders with cutoff point k */
    779   step = 1 + n / (2 * MAX_STEPS);
    780   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    781   for (k = (n + 4) / 2 ; k < n ; k += step)
    782     {
    783       mulhigh_ktab[n] = k;
    784       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    785       if (t * TOLERANCE < tbest)
    786         kbest = k, tbest = t;
    787     }
    788 
    789   mulhigh_ktab[n] = kbest;
    790 
    791   MPFR_TMP_FREE (marker);
    792   return kbest;
    793 }
    794 
    795 /* Tune mpfr_sqrhigh_n for size n */
    796 static mp_size_t
    797 tune_sqr_mulders_upto (mp_size_t n)
    798 {
    799   struct speed_params s;
    800   mp_size_t k, kbest, step;
    801   double t, tbest;
    802   MPFR_TMP_DECL (marker);
    803 
    804   if (n == 0)
    805     return -1;
    806 
    807   MPFR_TMP_MARK (marker);
    808   s.align_xp = s.align_wp = 64;
    809   s.size = n;
    810   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    811   mpn_random (s.xp, n);
    812 
    813   /* Check k == -1, mpn_sqr_basecase */
    814   sqrhigh_ktab[n] = -1;
    815   kbest = -1;
    816   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    817 
    818   /* Check k == 0, mpfr_mulhigh_n_basecase */
    819   sqrhigh_ktab[n] = 0;
    820   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    821   if (t * TOLERANCE < tbest)
    822     kbest = 0, tbest = t;
    823 
    824   /* Check Mulders */
    825   step = 1 + n / (2 * MAX_STEPS);
    826   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    827   for (k = (n + 4) / 2 ; k < n ; k += step)
    828     {
    829       sqrhigh_ktab[n] = k;
    830       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    831       if (t * TOLERANCE < tbest)
    832         kbest = k, tbest = t;
    833     }
    834 
    835   sqrhigh_ktab[n] = kbest;
    836 
    837   MPFR_TMP_FREE (marker);
    838   return kbest;
    839 }
    840 
    841 /* Tune mpfr_divhigh_n for size n */
    842 static mp_size_t
    843 tune_div_mulders_upto (mp_size_t n)
    844 {
    845   struct speed_params s;
    846   mp_size_t k, kbest, step;
    847   double t, tbest;
    848   MPFR_TMP_DECL (marker);
    849 
    850   if (n == 0)
    851     return 0;
    852 
    853   MPFR_TMP_MARK (marker);
    854   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
    855   s.size = n;
    856   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    857   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    858   mpn_random (s.xp, n);
    859   mpn_random (s.yp, n);
    860 
    861   /* Check k == n, i.e., mpn_divrem */
    862   divhigh_ktab[n] = n;
    863   kbest = n;
    864   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    865 
    866   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
    867 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
    868   if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
    869 #endif
    870     {
    871       divhigh_ktab[n] = 0;
    872       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    873       if (t * TOLERANCE < tbest)
    874         kbest = 0, tbest = t;
    875     }
    876 
    877   /* Check Mulders */
    878   step = 1 + n / (2 * MAX_STEPS);
    879   /* we should have (n+3)/2 <= k < n, which translates into
    880      (n+4)/2 <= k < n in C */
    881   for (k = (n + 4) / 2 ; k < n ; k += step)
    882     {
    883       divhigh_ktab[n] = k;
    884       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    885       if (t * TOLERANCE < tbest)
    886         kbest = k, tbest = t;
    887     }
    888 
    889   divhigh_ktab[n] = kbest;
    890 
    891   MPFR_TMP_FREE (marker);
    892 
    893   return kbest;
    894 }
    895 
    896 static void
    897 tune_mul_mulders (FILE *f)
    898 {
    899   mp_size_t k;
    900 
    901   if (verbose)
    902     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
    903   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
    904   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
    905     {
    906       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
    907       if (k != MPFR_MULHIGH_TAB_SIZE-1)
    908         fputc (',', f);
    909       if ((k+1) % 16 == 0)
    910         fprintf (f, " \\\n ");
    911       if (verbose)
    912         putchar ('.');
    913     }
    914   fprintf (f, " \n");
    915   if (verbose)
    916     putchar ('\n');
    917 }
    918 
    919 static void
    920 tune_sqr_mulders (FILE *f)
    921 {
    922   mp_size_t k;
    923 
    924   if (verbose)
    925     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
    926   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
    927   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
    928     {
    929       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
    930       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
    931         fputc (',', f);
    932       if ((k+1) % 16 == 0)
    933         fprintf (f, " \\\n ");
    934       if (verbose)
    935         putchar ('.');
    936     }
    937   fprintf (f, " \n");
    938   if (verbose)
    939     putchar ('\n');
    940 }
    941 
    942 static void
    943 tune_div_mulders (FILE *f)
    944 {
    945   mp_size_t k;
    946 
    947   if (verbose)
    948     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
    949   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
    950   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
    951     {
    952       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
    953       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
    954         fputc (',', f);
    955       if ((k+1) % 16 == 0)
    956         fprintf (f, " /*%zu-%zu*/ \\\n ", k - 15, k);
    957       if (verbose)
    958         putchar ('.');
    959     }
    960   fprintf (f, " \n");
    961   if (verbose)
    962     putchar ('\n');
    963 }
    964 
    965 /*******************************************************
    966  *            Tuning functions for mpfr_ai             *
    967  *******************************************************/
    968 
    969 long int mpfr_ai_threshold1;
    970 long int mpfr_ai_threshold2;
    971 long int mpfr_ai_threshold3;
    972 #undef  MPFR_AI_THRESHOLD1
    973 #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
    974 #undef  MPFR_AI_THRESHOLD2
    975 #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
    976 #undef  MPFR_AI_THRESHOLD3
    977 #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
    978 
    979 #include "ai.c"
    980 
    981 static double
    982 speed_mpfr_ai (struct speed_params *s)
    983 {
    984   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
    985 }
    986 
    987 
    988 /*******************************************************
    989  *            Tune all the threshold of MPFR           *
    990  * Warning: tune the function in their dependent order!*
    991  *******************************************************/
    992 static void
    993 all (const char *filename)
    994 {
    995   FILE *f;
    996   time_t  start_time, end_time;
    997   struct tm  *tp;
    998   mpfr_t x1, x2, x3, tmp1, tmp2;
    999   mpfr_prec_t p1, p2, p3;
   1000 
   1001   f = fopen (filename, "w");
   1002   if (f == NULL)
   1003     {
   1004       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
   1005       abort ();
   1006     }
   1007 
   1008   speed_time_init ();
   1009   if (verbose)
   1010     {
   1011       printf ("Using: %s\n", speed_time_string);
   1012       printf ("speed_precision %d", speed_precision);
   1013       if (speed_unittime == 1.0)
   1014         printf (", speed_unittime 1 cycle");
   1015       else
   1016         printf (", speed_unittime %.2e secs", speed_unittime);
   1017       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
   1018         printf (", CPU freq unknown\n");
   1019       else
   1020         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
   1021     }
   1022 
   1023   time (&start_time);
   1024   tp = localtime (&start_time);
   1025   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
   1026           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
   1027 
   1028 #ifdef __ICC
   1029   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
   1030 #elif defined(__GNUC__)
   1031 #ifdef __GNUC_PATCHLEVEL__
   1032   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
   1033            __GNUC_PATCHLEVEL__);
   1034 #else
   1035   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
   1036 #endif
   1037 #elif defined (__SUNPRO_C)
   1038   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
   1039 #elif defined (__sgi) && defined (_COMPILER_VERSION)
   1040   fprintf (f, "MIPSpro C %d.%d.%d */\n",
   1041            _COMPILER_VERSION / 100,
   1042            _COMPILER_VERSION / 10 % 10,
   1043            _COMPILER_VERSION % 10);
   1044 #elif defined (__DECC) && defined (__DECC_VER)
   1045   fprintf (f, "DEC C %d */\n", __DECC_VER);
   1046 #else
   1047   fprintf (f, "system compiler */\n");
   1048 #endif
   1049   fprintf (f, "\n\n");
   1050   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
   1051   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
   1052   fprintf (f, "#endif\n\n");
   1053 
   1054   /* Tune mulhigh */
   1055   tune_mul_mulders (f);
   1056 
   1057   /* Tune sqrhigh */
   1058   tune_sqr_mulders (f);
   1059 
   1060   /* Tune divhigh */
   1061   tune_div_mulders (f);
   1062   fflush (f);
   1063 
   1064   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
   1065   if (verbose)
   1066     printf ("Tuning mpfr_mul...\n");
   1067   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
   1068                     2*GMP_NUMB_BITS+1);
   1069   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
   1070            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
   1071 
   1072   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
   1073   if (verbose)
   1074     printf ("Tuning mpfr_sqr...\n");
   1075   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
   1076                     2*GMP_NUMB_BITS+1);
   1077   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
   1078            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
   1079 
   1080   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
   1081   if (verbose)
   1082     printf ("Tuning mpfr_div...\n");
   1083   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
   1084                     2*GMP_NUMB_BITS+1);
   1085   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
   1086            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
   1087 
   1088   /* Tune mpfr_exp_2 */
   1089   if (verbose)
   1090     printf ("Tuning mpfr_exp_2...\n");
   1091   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
   1092   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
   1093            (unsigned long) mpfr_exp_2_threshold);
   1094 
   1095   /* Tune mpfr_exp */
   1096   if (verbose)
   1097     printf ("Tuning mpfr_exp...\n");
   1098   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
   1099                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1100   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
   1101            (unsigned long) mpfr_exp_threshold);
   1102 
   1103   /* Tune mpfr_sin_cos */
   1104   if (verbose)
   1105     printf ("Tuning mpfr_sin_cos...\n");
   1106   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
   1107                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1108   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
   1109            (unsigned long) mpfr_sincos_threshold);
   1110 
   1111   /* Tune mpfr_ai */
   1112   if (verbose)
   1113     printf ("Tuning mpfr_ai...\n");
   1114   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
   1115   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
   1116   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
   1117   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
   1118   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
   1119 
   1120   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1121                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1122                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1123                                       -60, 200, x1, &p1);
   1124   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1125                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1126                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1127                                       -20, 500, x2, &p2);
   1128   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1129                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1130                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1131                                       40, 200, x3, &p3);
   1132 
   1133   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
   1134   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
   1135   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
   1136   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
   1137 
   1138   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
   1139   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
   1140   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1141   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
   1142 
   1143   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
   1144   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1145   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
   1146 
   1147   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
   1148   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
   1149   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
   1150   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
   1151   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
   1152 
   1153   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
   1154   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
   1155   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
   1156 
   1157   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
   1158   mpfr_clear (tmp1); mpfr_clear (tmp2);
   1159 
   1160   /* End of tuning */
   1161   time (&end_time);
   1162   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
   1163            (long) (end_time - start_time));
   1164   if (verbose)
   1165     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
   1166 
   1167   fclose (f);
   1168 }
   1169 
   1170 
   1171 /* Main function */
   1172 int main (int argc, char *argv[])
   1173 {
   1174   /* Unbuffered so if output is redirected to a file it isn't lost if the
   1175      program is killed part way through.  */
   1176   setbuf (stdout, NULL);
   1177   setbuf (stderr, NULL);
   1178 
   1179   verbose = argc > 1;
   1180 
   1181   if (verbose)
   1182     printf ("Tuning MPFR (Coffee time?)...\n");
   1183 
   1184   all ("mparam.h");
   1185 
   1186   return 0;
   1187 }
   1188