Home | History | Annotate | Line # | Download | only in tune
tuneup.c revision 1.1.1.2.2.1
      1 /* Tune various threshold of MPFR
      2 
      3 Copyright 2005-2018 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramba projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #include <stdlib.h>
     24 #include <time.h>
     25 
     26 #define MPFR_NEED_LONGLONG_H
     27 #include "mpfr-impl.h"
     28 
     29 #undef _PROTO
     30 #define _PROTO __GMP_PROTO
     31 #include "speed.h"
     32 
     33 /* Undefine static assertion system */
     34 #undef MPFR_DECL_STATIC_ASSERT
     35 #undef MPFR_STAT_STATIC_ASSERT
     36 #define MPFR_DECL_STATIC_ASSERT(a) MPFR_ASSERTN(a)
     37 #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
     38 
     39 int verbose;
     40 
     41 /* template for an unary function */
     42 /* s->size: precision of both input and output
     43    s->xp  : Mantissa of first input
     44    s->yp  : mantissa of second input                    */
     45 #define SPEED_MPFR_FUNC(mean_fun)                       \
     46   do                                                    \
     47     {                                                   \
     48       unsigned  i;                                      \
     49       mpfr_limb_ptr wp;                                 \
     50       double    t;                                      \
     51       mpfr_t    w, x;                                   \
     52       mp_size_t size;                                   \
     53       MPFR_TMP_DECL (marker);                           \
     54                                                         \
     55       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     56       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     57       MPFR_TMP_MARK (marker);                           \
     58                                                         \
     59       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     60       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     61       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     62       MPFR_SET_EXP (x, 0);                              \
     63                                                         \
     64       MPFR_TMP_INIT (wp, w, s->size, size);             \
     65                                                         \
     66       speed_operand_src (s, s->xp, size);               \
     67       speed_operand_dst (s, wp, size);                  \
     68       speed_cache_fill (s);                             \
     69                                                         \
     70       speed_starttime ();                               \
     71       i = s->reps;                                      \
     72       do                                                \
     73         mean_fun (w, x, MPFR_RNDN);                     \
     74       while (--i != 0);                                 \
     75       t = speed_endtime ();                             \
     76                                                         \
     77       MPFR_TMP_FREE (marker);                           \
     78       return t;                                         \
     79     }                                                   \
     80   while (0)
     81 
     82 /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
     83 #define SPEED_MPFR_FUNC2(mean_fun)                      \
     84   do                                                    \
     85     {                                                   \
     86       unsigned  i;                                      \
     87       mpfr_limb_ptr vp, wp;                             \
     88       double    t;                                      \
     89       mpfr_t    v, w, x;                                \
     90       mp_size_t size;                                   \
     91       MPFR_TMP_DECL (marker);                           \
     92                                                         \
     93       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     94       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     95       MPFR_TMP_MARK (marker);                           \
     96                                                         \
     97       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     98       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     99       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    100       MPFR_SET_EXP (x, 0);                              \
    101                                                         \
    102       MPFR_TMP_INIT (vp, v, s->size, size);             \
    103       MPFR_TMP_INIT (wp, w, s->size, size);             \
    104                                                         \
    105       speed_operand_src (s, s->xp, size);               \
    106       speed_operand_dst (s, vp, size);                  \
    107       speed_operand_dst (s, wp, size);                  \
    108       speed_cache_fill (s);                             \
    109                                                         \
    110       speed_starttime ();                               \
    111       i = s->reps;                                      \
    112       do                                                \
    113         mean_fun (v, w, x, MPFR_RNDN);                  \
    114       while (--i != 0);                                 \
    115       t = speed_endtime ();                             \
    116                                                         \
    117       MPFR_TMP_FREE (marker);                           \
    118       return t;                                         \
    119     }                                                   \
    120   while (0)
    121 
    122 /* template for a function like mpfr_mul */
    123 #define SPEED_MPFR_OP(mean_fun)                         \
    124   do                                                    \
    125     {                                                   \
    126       unsigned  i;                                      \
    127       mpfr_limb_ptr wp;                                 \
    128       double    t;                                      \
    129       mpfr_t    w, x, y;                                \
    130       mp_size_t size;                                   \
    131       MPFR_TMP_DECL (marker);                           \
    132                                                         \
    133       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    134       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    135       MPFR_TMP_MARK (marker);                           \
    136                                                         \
    137       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    138       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    139       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    140       MPFR_SET_EXP (x, 0);                              \
    141       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    142       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
    143       MPFR_SET_EXP (y, 0);                              \
    144                                                         \
    145       MPFR_TMP_INIT (wp, w, s->size, size);             \
    146                                                         \
    147       speed_operand_src (s, s->xp, size);               \
    148       speed_operand_src (s, s->yp, size);               \
    149       speed_operand_dst (s, wp, size);                  \
    150       speed_cache_fill (s);                             \
    151                                                         \
    152       speed_starttime ();                               \
    153       i = s->reps;                                      \
    154       do                                                \
    155         mean_fun (w, x, y, MPFR_RNDN);                  \
    156       while (--i != 0);                                 \
    157       t = speed_endtime ();                             \
    158                                                         \
    159       MPFR_TMP_FREE (marker);                           \
    160       return t;                                         \
    161     }                                                   \
    162   while (0)
    163 
    164 /* special template for mpfr_mul(a,b,b) */
    165 #define SPEED_MPFR_SQR(mean_fun)                        \
    166   do                                                    \
    167     {                                                   \
    168       unsigned  i;                                      \
    169       mpfr_limb_ptr wp;                                 \
    170       double    t;                                      \
    171       mpfr_t    w, x;                                   \
    172       mp_size_t size;                                   \
    173       MPFR_TMP_DECL (marker);                           \
    174                                                         \
    175       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    176       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    177       MPFR_TMP_MARK (marker);                           \
    178                                                         \
    179       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    180       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    181       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    182       MPFR_SET_EXP (x, 0);                              \
    183                                                         \
    184       MPFR_TMP_INIT (wp, w, s->size, size);             \
    185                                                         \
    186       speed_operand_src (s, s->xp, size);               \
    187       speed_operand_dst (s, wp, size);                  \
    188       speed_cache_fill (s);                             \
    189                                                         \
    190       speed_starttime ();                               \
    191       i = s->reps;                                      \
    192       do                                                \
    193         mean_fun (w, x, x, MPFR_RNDN);                  \
    194       while (--i != 0);                                 \
    195       t = speed_endtime ();                             \
    196                                                         \
    197       MPFR_TMP_FREE (marker);                           \
    198       return t;                                         \
    199     }                                                   \
    200   while (0)
    201 
    202 /* s->size: precision of both input and output
    203    s->xp  : Mantissa of first input
    204    s->r   : exponent
    205    s->align_xp : sign (1 means positive, 2 means negative)
    206 */
    207 #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
    208   do                                                    \
    209     {                                                   \
    210       unsigned  i;                                      \
    211       mpfr_limb_ptr wp;                                 \
    212       double    t;                                      \
    213       mpfr_t    w, x;                                   \
    214       mp_size_t size;                                   \
    215       MPFR_TMP_DECL (marker);                           \
    216                                                         \
    217       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    218       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    219       MPFR_TMP_MARK (marker);                           \
    220                                                         \
    221       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    222       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    223       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    224       MPFR_SET_EXP (x, s->r);                           \
    225       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
    226                                                         \
    227       MPFR_TMP_INIT (wp, w, s->size, size);             \
    228                                                         \
    229       speed_operand_src (s, s->xp, size);               \
    230       speed_operand_dst (s, wp, size);                  \
    231       speed_cache_fill (s);                             \
    232                                                         \
    233       speed_starttime ();                               \
    234       i = s->reps;                                      \
    235       do                                                \
    236         mean_fun (w, x, MPFR_RNDN);                     \
    237       while (--i != 0);                                 \
    238       t = speed_endtime ();                             \
    239                                                         \
    240       MPFR_TMP_FREE (marker);                           \
    241       return t;                                         \
    242     }                                                   \
    243   while (0)
    244 
    245 /* First we include all the functions we want to tune inside this program.
    246    We can't use the GNU MPFR library since the thresholds are fixed macros. */
    247 
    248 /* Setup mpfr_exp_2 */
    249 mpfr_prec_t mpfr_exp_2_threshold;
    250 #undef  MPFR_EXP_2_THRESHOLD
    251 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
    252 #include "exp_2.c"
    253 static double
    254 speed_mpfr_exp_2 (struct speed_params *s)
    255 {
    256   SPEED_MPFR_FUNC (mpfr_exp_2);
    257 }
    258 
    259 /* Setup mpfr_exp */
    260 mpfr_prec_t mpfr_exp_threshold;
    261 #undef  MPFR_EXP_THRESHOLD
    262 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
    263 #include "exp.c"
    264 static double
    265 speed_mpfr_exp (struct speed_params *s)
    266 {
    267   SPEED_MPFR_FUNC (mpfr_exp);
    268 }
    269 
    270 /* Setup mpfr_sin_cos */
    271 mpfr_prec_t mpfr_sincos_threshold;
    272 #undef MPFR_SINCOS_THRESHOLD
    273 #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
    274 #include "sin_cos.c"
    275 #include "cos.c"
    276 static double
    277 speed_mpfr_sincos (struct speed_params *s)
    278 {
    279   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
    280 }
    281 
    282 /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
    283 /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
    284    the thresholds are >= 1, we initialize both values to 1 to avoid a failed
    285    assertion (let's recall that static assertions are replaced by normal
    286    dynamic assertions here). */
    287 mpfr_prec_t mpfr_mul_threshold = 1;
    288 mpfr_prec_t mpfr_sqr_threshold = 1;
    289 mpfr_prec_t mpfr_div_threshold;
    290 #undef  MPFR_MUL_THRESHOLD
    291 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
    292 #undef  MPFR_SQR_THRESHOLD
    293 #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
    294 #undef  MPFR_DIV_THRESHOLD
    295 #define MPFR_DIV_THRESHOLD mpfr_div_threshold
    296 #include "mul.c"
    297 #include "div.c"
    298 static double
    299 speed_mpfr_mul (struct speed_params *s)
    300 {
    301   SPEED_MPFR_OP (mpfr_mul);
    302 }
    303 static double
    304 speed_mpfr_sqr (struct speed_params *s)
    305 {
    306   SPEED_MPFR_SQR (mpfr_mul);
    307 }
    308 static double
    309 speed_mpfr_div (struct speed_params *s)
    310 {
    311   SPEED_MPFR_OP (mpfr_div);
    312 }
    313 
    314 /************************************************
    315  * Common functions (inspired by GMP function)  *
    316  ************************************************/
    317 static int
    318 analyze_data (double *dat, int ndat)
    319 {
    320   double  x, min_x;
    321   int     j, min_j;
    322 
    323   x = 0.0;
    324   for (j = 0; j < ndat; j++)
    325     if (dat[j] > 0.0)
    326       x += dat[j];
    327 
    328   min_x = x;
    329   min_j = 0;
    330 
    331   for (j = 0; j < ndat; x -= dat[j], j++)
    332     {
    333       if (x < min_x)
    334         {
    335           min_x = x;
    336           min_j = j;
    337         }
    338     }
    339   return min_j;
    340 }
    341 
    342 static double
    343 mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
    344 {
    345   double t = -1.0;
    346   int i;
    347   int number_of_iterations = 30;
    348   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
    349     {
    350       t = speed_measure (fun, s);
    351       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
    352         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
    353                s->size, i+1, number_of_iterations);
    354     }
    355   if (t == -1.0)
    356     {
    357       fprintf (stderr, "Failed to measure %s!\n", m);
    358       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
    359       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
    360       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
    361       abort ();
    362     }
    363   return t;
    364 }
    365 
    366 #define THRESHOLD_WINDOW 16
    367 #define THRESHOLD_FINAL_WINDOW 128
    368 static double
    369 domeasure (mpfr_prec_t *threshold,
    370            double (*func) (struct speed_params *),
    371            mpfr_prec_t p)
    372 {
    373   struct speed_params s;
    374   mp_size_t size;
    375   double t1, t2, d;
    376 
    377   s.align_xp = s.align_yp = s.align_wp = 64;
    378   s.size = p;
    379   size = (p - 1)/GMP_NUMB_BITS+1;
    380   s.xp = malloc (2*size*sizeof (mp_limb_t));
    381   if (s.xp == NULL)
    382     {
    383       fprintf (stderr, "Can't allocate memory.\n");
    384       abort ();
    385     }
    386   mpn_random (s.xp, size);
    387   s.yp = s.xp + size;
    388   mpn_random (s.yp, size);
    389   *threshold = MPFR_PREC_MAX;
    390   t1 = mpfr_speed_measure (func, &s, "function 1");
    391   *threshold = 1;
    392   t2 = mpfr_speed_measure (func, &s, "function 2");
    393   free (s.xp);
    394   /* t1 is the time of the first algo (used for low prec) */
    395   if (t2 >= t1)
    396     d = (t2 - t1) / t2;
    397   else
    398     d = (t2 - t1) / t1;
    399   /* d > 0 if we have to use algo 1.
    400      d < 0 if we have to use algo 2 */
    401   return d;
    402 }
    403 
    404 /* Performs measures when both the precision and the point of evaluation
    405    shall vary. s.yp is ignored and not initialized.
    406    It assumes that func depends on three thresholds with a boundary of the
    407    form threshold1*x + threshold2*p = some scaling factor, if x<0,
    408    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
    409 */
    410 static double
    411 domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
    412             double (*func) (struct speed_params *),
    413             mpfr_prec_t p,
    414             mpfr_t x)
    415 {
    416   struct speed_params s;
    417   mp_size_t size;
    418   double t1, t2, d;
    419   mpfr_t xtmp;
    420 
    421   if (MPFR_IS_SINGULAR (x))
    422     {
    423       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
    424       abort ();
    425     }
    426   if (MPFR_IS_NEG (x))
    427     s.align_xp = 2;
    428   else
    429     s.align_xp = 1;
    430 
    431   s.align_yp = s.align_wp = 64;
    432   s.size = p;
    433   size = (p - 1)/GMP_NUMB_BITS+1;
    434 
    435   mpfr_init2 (xtmp, p);
    436   mpn_random (xtmp->_mpfr_d, size);
    437   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
    438   MPFR_SET_EXP (xtmp, -53);
    439   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
    440   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
    441                                       /* where perturb ~ 2^(-53) is */
    442                                       /* randomly chosen.           */
    443   s.xp = xtmp->_mpfr_d;
    444   s.r = MPFR_GET_EXP (xtmp);
    445 
    446   *threshold1 = 0;
    447   *threshold2 = 0;
    448   *threshold3 = 0;
    449   t1 = mpfr_speed_measure (func, &s, "function 1");
    450 
    451   if (MPFR_IS_NEG (x))
    452     *threshold1 = INT_MIN;
    453   else
    454     *threshold3 = INT_MAX;
    455   *threshold2 = INT_MAX;
    456   t2 = mpfr_speed_measure (func, &s, "function 2");
    457 
    458   /* t1 is the time of the first algo (used for low prec) */
    459   if (t2 >= t1)
    460     d = (t2 - t1) / t2;
    461   else
    462     d = (t2 - t1) / t1;
    463   /* d > 0 if we have to use algo 1.
    464      d < 0 if we have to use algo 2 */
    465   mpfr_clear (xtmp);
    466   return d;
    467 }
    468 
    469 /* Tune a function with a simple THRESHOLD
    470    The function doesn't depend on another threshold.
    471    It assumes that it uses algo1 if p < THRESHOLD
    472    and algo2 otherwise.
    473    if algo2 is better for low prec, and algo1 better for high prec,
    474    the behaviour of this function is undefined. */
    475 static void
    476 tune_simple_func (mpfr_prec_t *threshold,
    477                   double (*func) (struct speed_params *),
    478                   mpfr_prec_t pstart)
    479 {
    480   double measure[THRESHOLD_FINAL_WINDOW+1];
    481   double d;
    482   mpfr_prec_t pstep;
    483   int i, numpos, numneg, try;
    484   mpfr_prec_t pmin, pmax, p;
    485 
    486   /* first look for a lower bound within 10% */
    487   pmin = p = pstart;
    488   d = domeasure (threshold, func, pmin);
    489   if (d < 0.0)
    490     {
    491       if (verbose)
    492         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    493                 (unsigned long) pmin);
    494       *threshold = MPFR_PREC_MIN;
    495       return;
    496     }
    497   if (d >= 1.00)
    498     for (;;)
    499       {
    500         d = domeasure (threshold, func, pmin);
    501         if (d < 1.00)
    502           break;
    503         p = pmin;
    504         pmin += pmin/2;
    505       }
    506   pmin = p;
    507   for (;;)
    508     {
    509       d = domeasure (threshold, func, pmin);
    510       if (d < 0.10)
    511         break;
    512       pmin += GMP_NUMB_BITS;
    513     }
    514 
    515   /* then look for an upper bound within 20% */
    516   pmax = pmin * 2;
    517   for (;;)
    518     {
    519       d = domeasure (threshold, func, pmax);
    520       if (d < -0.20)
    521         break;
    522       pmax += pmin / 2; /* don't increase too rapidly */
    523     }
    524 
    525   /* The threshold is between pmin and pmax. Affine them */
    526   try = 0;
    527   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    528     {
    529       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    530       if (verbose)
    531         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    532       p = (pmin + pmax) / 2;
    533       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    534         {
    535           measure[i] = domeasure (threshold, func,
    536                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
    537           if (measure[i] > 0)
    538             numpos ++;
    539           else if (measure[i] < 0)
    540             numneg ++;
    541         }
    542       if (numpos > numneg)
    543         /* We use more often algo 1 than algo 2 */
    544         pmin = p - THRESHOLD_WINDOW/2*pstep;
    545       else if (numpos < numneg)
    546         pmax = p + THRESHOLD_WINDOW/2*pstep;
    547       else
    548         /* numpos == numneg ... */
    549         if (++ try > 2)
    550           {
    551             *threshold = p;
    552             if (verbose)
    553               printf ("Quick find: %lu\n", *threshold);
    554             return ;
    555           }
    556     }
    557 
    558   /* Final tune... */
    559   if (verbose)
    560     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    561   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    562     measure[i] = domeasure (threshold, func, pmin+i);
    563   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    564   *threshold = pmin + i;
    565   if (verbose)
    566     printf ("%lu\n", *threshold);
    567   return;
    568 }
    569 
    570 /* Tune a function which behavior depends on both p and x,
    571    in a given direction.
    572    It assumes that for (x,p) close to zero, algo1 is used
    573    and algo2 is used when (x,p) is far from zero.
    574    If algo2 is better for low prec, and algo1 better for high prec,
    575    the behavior of this function is undefined.
    576    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
    577    until it finds a point on the boundary. It returns ell.
    578  */
    579 static void
    580 tune_simple_func_in_some_direction (long int *threshold1,
    581                                     long int *threshold2,
    582                                     long int *threshold3,
    583                                     double (*func) (struct speed_params *),
    584                                     mpfr_prec_t pstart,
    585                                     int dirx, int dirp,
    586                                     mpfr_t xres, mpfr_prec_t *pres)
    587 {
    588   double measure[THRESHOLD_FINAL_WINDOW+1];
    589   double d;
    590   mpfr_prec_t pstep;
    591   int i, numpos, numneg, try;
    592   mpfr_prec_t pmin, pmax, p;
    593   mpfr_t xmin, xmax, x;
    594   mpfr_t ratio;
    595 
    596   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
    597   mpfr_set_si (ratio, dirx, MPFR_RNDN);
    598   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
    599 
    600   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
    601   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
    602   mpfr_init2 (x, MPFR_SMALL_PRECISION);
    603 
    604   /* first look for a lower bound within 10% */
    605   pmin = p = pstart;
    606   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    607   mpfr_set (x, xmin, MPFR_RNDN);
    608 
    609   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    610   if (d < 0.0)
    611     {
    612       if (verbose)
    613         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    614                 (unsigned long) pmin);
    615       *pres = MPFR_PREC_MIN;
    616       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    617       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    618       return;
    619     }
    620   if (d >= 1.00)
    621     for (;;)
    622       {
    623         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    624         if (d < 1.00)
    625           break;
    626         p = pmin;
    627         mpfr_set (x, xmin, MPFR_RNDN);
    628         pmin += pmin/2;
    629         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    630       }
    631   pmin = p;
    632   mpfr_set (xmin, x, MPFR_RNDN);
    633   for (;;)
    634     {
    635       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    636       if (d < 0.10)
    637         break;
    638       pmin += GMP_NUMB_BITS;
    639       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    640     }
    641 
    642   /* then look for an upper bound within 20% */
    643   pmax = pmin * 2;
    644   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    645   for (;;)
    646     {
    647       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
    648       if (d < -0.20)
    649         break;
    650       pmax += pmin / 2; /* don't increase too rapidly */
    651       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    652     }
    653 
    654   /* The threshold is between pmin and pmax. Affine them */
    655   try = 0;
    656   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    657     {
    658       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    659       if (verbose)
    660         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    661       p = (pmin + pmax) / 2;
    662       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
    663       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    664         {
    665           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
    666           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    667           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    668                                    func, *pres, xres);
    669           if (measure[i] > 0)
    670             numpos ++;
    671           else if (measure[i] < 0)
    672             numneg ++;
    673         }
    674       if (numpos > numneg)
    675         {
    676           /* We use more often algo 1 than algo 2 */
    677           pmin = p - THRESHOLD_WINDOW/2*pstep;
    678           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    679         }
    680       else if (numpos < numneg)
    681         {
    682           pmax = p + THRESHOLD_WINDOW/2*pstep;
    683           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    684         }
    685       else
    686         /* numpos == numneg ... */
    687         if (++ try > 2)
    688           {
    689             *pres = p;
    690             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    691             if (verbose)
    692               printf ("Quick find: %lu\n", *pres);
    693             mpfr_clear (ratio);
    694             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    695             return ;
    696           }
    697     }
    698 
    699   /* Final tune... */
    700   if (verbose)
    701     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    702   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    703     {
    704       *pres = pmin+i;
    705       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    706       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    707                                func, *pres, xres);
    708     }
    709   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    710   *pres = pmin + i;
    711   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    712   if (verbose)
    713     printf ("%lu\n", *pres);
    714   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    715   return;
    716 }
    717 
    718 /************************************
    719  * Tune Mulders' mulhigh function   *
    720  ************************************/
    721 #define TOLERANCE 1.00
    722 #define MULDERS_TABLE_SIZE 1024
    723 #ifndef MPFR_MULHIGH_SIZE
    724 # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
    725 #endif
    726 #ifndef MPFR_SQRHIGH_SIZE
    727 # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
    728 #endif
    729 #ifndef MPFR_DIVHIGH_SIZE
    730 # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
    731 #endif
    732 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
    733 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
    734 #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
    735 #include "mulders.c"
    736 
    737 static double
    738 speed_mpfr_mulhigh (struct speed_params *s)
    739 {
    740   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
    741 }
    742 
    743 static double
    744 speed_mpfr_sqrhigh (struct speed_params *s)
    745 {
    746   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
    747 }
    748 
    749 static double
    750 speed_mpfr_divhigh (struct speed_params *s)
    751 {
    752   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
    753 }
    754 
    755 #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
    756 
    757 /* Tune mpfr_mulhigh_n for size n */
    758 static mp_size_t
    759 tune_mul_mulders_upto (mp_size_t n)
    760 {
    761   struct speed_params s;
    762   mp_size_t k, kbest, step;
    763   double t, tbest;
    764   MPFR_TMP_DECL (marker);
    765 
    766   if (n == 0)
    767     return -1;
    768 
    769   MPFR_TMP_MARK (marker);
    770   s.align_xp = s.align_yp = s.align_wp = 64;
    771   s.size = n;
    772   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    773   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    774   mpn_random (s.xp, n);
    775   mpn_random (s.yp, n);
    776 
    777   /* Check k == -1, mpn_mul_basecase */
    778   mulhigh_ktab[n] = -1;
    779   kbest = -1;
    780   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    781 
    782   /* Check k == 0, mpn_mulhigh_n_basecase */
    783   mulhigh_ktab[n] = 0;
    784   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    785   if (t * TOLERANCE < tbest)
    786     kbest = 0, tbest = t;
    787 
    788   /* Check Mulders with cutoff point k */
    789   step = 1 + n / (2 * MAX_STEPS);
    790   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    791   for (k = (n + 4) / 2 ; k < n ; k += step)
    792     {
    793       mulhigh_ktab[n] = k;
    794       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    795       if (t * TOLERANCE < tbest)
    796         kbest = k, tbest = t;
    797     }
    798 
    799   mulhigh_ktab[n] = kbest;
    800 
    801   MPFR_TMP_FREE (marker);
    802   return kbest;
    803 }
    804 
    805 /* Tune mpfr_sqrhigh_n for size n */
    806 static mp_size_t
    807 tune_sqr_mulders_upto (mp_size_t n)
    808 {
    809   struct speed_params s;
    810   mp_size_t k, kbest, step;
    811   double t, tbest;
    812   MPFR_TMP_DECL (marker);
    813 
    814   if (n == 0)
    815     return -1;
    816 
    817   MPFR_TMP_MARK (marker);
    818   s.align_xp = s.align_wp = 64;
    819   s.size = n;
    820   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    821   mpn_random (s.xp, n);
    822 
    823   /* Check k == -1, mpn_sqr_basecase */
    824   sqrhigh_ktab[n] = -1;
    825   kbest = -1;
    826   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    827 
    828   /* Check k == 0, mpfr_mulhigh_n_basecase */
    829   sqrhigh_ktab[n] = 0;
    830   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    831   if (t * TOLERANCE < tbest)
    832     kbest = 0, tbest = t;
    833 
    834   /* Check Mulders */
    835   step = 1 + n / (2 * MAX_STEPS);
    836   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    837   for (k = (n + 4) / 2 ; k < n ; k += step)
    838     {
    839       sqrhigh_ktab[n] = k;
    840       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    841       if (t * TOLERANCE < tbest)
    842         kbest = k, tbest = t;
    843     }
    844 
    845   sqrhigh_ktab[n] = kbest;
    846 
    847   MPFR_TMP_FREE (marker);
    848   return kbest;
    849 }
    850 
    851 /* Tune mpfr_divhigh_n for size n */
    852 static mp_size_t
    853 tune_div_mulders_upto (mp_size_t n)
    854 {
    855   struct speed_params s;
    856   mp_size_t k, kbest, step;
    857   double t, tbest;
    858   MPFR_TMP_DECL (marker);
    859 
    860   if (n == 0)
    861     return 0;
    862 
    863   MPFR_TMP_MARK (marker);
    864   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
    865   s.size = n;
    866   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    867   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    868   mpn_random (s.xp, n);
    869   mpn_random (s.yp, n);
    870 
    871   /* Check k == n, i.e., mpn_divrem */
    872   divhigh_ktab[n] = n;
    873   kbest = n;
    874   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    875 
    876   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
    877 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
    878   if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
    879 #endif
    880     {
    881       divhigh_ktab[n] = 0;
    882       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    883       if (t * TOLERANCE < tbest)
    884         kbest = 0, tbest = t;
    885     }
    886 
    887   /* Check Mulders */
    888   step = 1 + n / (2 * MAX_STEPS);
    889   /* we should have (n+3)/2 <= k < n, which translates into
    890      (n+4)/2 <= k < n in C */
    891   for (k = (n + 4) / 2 ; k < n ; k += step)
    892     {
    893       divhigh_ktab[n] = k;
    894       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    895       if (t * TOLERANCE < tbest)
    896         kbest = k, tbest = t;
    897     }
    898 
    899   divhigh_ktab[n] = kbest;
    900 
    901   MPFR_TMP_FREE (marker);
    902 
    903   return kbest;
    904 }
    905 
    906 static void
    907 tune_mul_mulders (FILE *f)
    908 {
    909   mp_size_t k;
    910 
    911   if (verbose)
    912     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
    913   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
    914   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
    915     {
    916       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
    917       if (k != MPFR_MULHIGH_TAB_SIZE-1)
    918         fputc (',', f);
    919       if ((k+1) % 16 == 0)
    920         fprintf (f, " \\\n ");
    921       if (verbose)
    922         putchar ('.');
    923     }
    924   fprintf (f, " \n");
    925   if (verbose)
    926     putchar ('\n');
    927 }
    928 
    929 static void
    930 tune_sqr_mulders (FILE *f)
    931 {
    932   mp_size_t k;
    933 
    934   if (verbose)
    935     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
    936   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
    937   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
    938     {
    939       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
    940       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
    941         fputc (',', f);
    942       if ((k+1) % 16 == 0)
    943         fprintf (f, " \\\n ");
    944       if (verbose)
    945         putchar ('.');
    946     }
    947   fprintf (f, " \n");
    948   if (verbose)
    949     putchar ('\n');
    950 }
    951 
    952 static void
    953 tune_div_mulders (FILE *f)
    954 {
    955   mp_size_t k;
    956 
    957   if (verbose)
    958     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
    959   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
    960   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
    961     {
    962       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
    963       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
    964         fputc (',', f);
    965       if ((k+1) % 16 == 0)
    966         fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
    967       if (verbose)
    968         putchar ('.');
    969     }
    970   fprintf (f, " \n");
    971   if (verbose)
    972     putchar ('\n');
    973 }
    974 
    975 /*******************************************************
    976  *            Tuning functions for mpfr_ai             *
    977  *******************************************************/
    978 
    979 long int mpfr_ai_threshold1;
    980 long int mpfr_ai_threshold2;
    981 long int mpfr_ai_threshold3;
    982 #undef  MPFR_AI_THRESHOLD1
    983 #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
    984 #undef  MPFR_AI_THRESHOLD2
    985 #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
    986 #undef  MPFR_AI_THRESHOLD3
    987 #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
    988 
    989 #include "ai.c"
    990 
    991 static double
    992 speed_mpfr_ai (struct speed_params *s)
    993 {
    994   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
    995 }
    996 
    997 
    998 /*******************************************************
    999  *            Tune all the threshold of MPFR           *
   1000  * Warning: tune the function in their dependent order!*
   1001  *******************************************************/
   1002 static void
   1003 all (const char *filename)
   1004 {
   1005   FILE *f;
   1006   time_t  start_time, end_time;
   1007   struct tm  *tp;
   1008   mpfr_t x1, x2, x3, tmp1, tmp2;
   1009   mpfr_prec_t p1, p2, p3;
   1010 
   1011   f = fopen (filename, "w");
   1012   if (f == NULL)
   1013     {
   1014       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
   1015       abort ();
   1016     }
   1017 
   1018   speed_time_init ();
   1019   if (verbose)
   1020     {
   1021       printf ("Using: %s\n", speed_time_string);
   1022       printf ("speed_precision %d", speed_precision);
   1023       if (speed_unittime == 1.0)
   1024         printf (", speed_unittime 1 cycle");
   1025       else
   1026         printf (", speed_unittime %.2e secs", speed_unittime);
   1027       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
   1028         printf (", CPU freq unknown\n");
   1029       else
   1030         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
   1031     }
   1032 
   1033   time (&start_time);
   1034   tp = localtime (&start_time);
   1035   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
   1036           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
   1037 
   1038 #ifdef __ICC
   1039   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
   1040 #elif defined(__GNUC__)
   1041 #ifdef __GNUC_PATCHLEVEL__
   1042   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
   1043            __GNUC_PATCHLEVEL__);
   1044 #else
   1045   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
   1046 #endif
   1047 #elif defined (__SUNPRO_C)
   1048   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
   1049 #elif defined (__sgi) && defined (_COMPILER_VERSION)
   1050   fprintf (f, "MIPSpro C %d.%d.%d */\n",
   1051            _COMPILER_VERSION / 100,
   1052            _COMPILER_VERSION / 10 % 10,
   1053            _COMPILER_VERSION % 10);
   1054 #elif defined (__DECC) && defined (__DECC_VER)
   1055   fprintf (f, "DEC C %d */\n", __DECC_VER);
   1056 #else
   1057   fprintf (f, "system compiler */\n");
   1058 #endif
   1059   fprintf (f, "\n\n");
   1060   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
   1061   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
   1062   fprintf (f, "#endif\n\n");
   1063 
   1064   /* Tune mulhigh */
   1065   tune_mul_mulders (f);
   1066 
   1067   /* Tune sqrhigh */
   1068   tune_sqr_mulders (f);
   1069 
   1070   /* Tune divhigh */
   1071   tune_div_mulders (f);
   1072   fflush (f);
   1073 
   1074   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
   1075   if (verbose)
   1076     printf ("Tuning mpfr_mul...\n");
   1077   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
   1078                     2*GMP_NUMB_BITS+1);
   1079   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
   1080            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
   1081 
   1082   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
   1083   if (verbose)
   1084     printf ("Tuning mpfr_sqr...\n");
   1085   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
   1086                     2*GMP_NUMB_BITS+1);
   1087   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
   1088            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
   1089 
   1090   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
   1091   if (verbose)
   1092     printf ("Tuning mpfr_div...\n");
   1093   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
   1094                     2*GMP_NUMB_BITS+1);
   1095   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
   1096            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
   1097 
   1098   /* Tune mpfr_exp_2 */
   1099   if (verbose)
   1100     printf ("Tuning mpfr_exp_2...\n");
   1101   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
   1102   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
   1103            (unsigned long) mpfr_exp_2_threshold);
   1104 
   1105   /* Tune mpfr_exp */
   1106   if (verbose)
   1107     printf ("Tuning mpfr_exp...\n");
   1108   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
   1109                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1110   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
   1111            (unsigned long) mpfr_exp_threshold);
   1112 
   1113   /* Tune mpfr_sin_cos */
   1114   if (verbose)
   1115     printf ("Tuning mpfr_sin_cos...\n");
   1116   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
   1117                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1118   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
   1119            (unsigned long) mpfr_sincos_threshold);
   1120 
   1121   /* Tune mpfr_ai */
   1122   if (verbose)
   1123     printf ("Tuning mpfr_ai...\n");
   1124   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
   1125   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
   1126   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
   1127   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
   1128   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
   1129 
   1130   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1131                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1132                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1133                                       -60, 200, x1, &p1);
   1134   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1135                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1136                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1137                                       -20, 500, x2, &p2);
   1138   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1139                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1140                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1141                                       40, 200, x3, &p3);
   1142 
   1143   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
   1144   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
   1145   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
   1146   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
   1147 
   1148   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
   1149   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
   1150   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1151   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
   1152 
   1153   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
   1154   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1155   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
   1156 
   1157   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
   1158   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
   1159   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
   1160   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
   1161   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
   1162 
   1163   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
   1164   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
   1165   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
   1166 
   1167   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
   1168   mpfr_clear (tmp1); mpfr_clear (tmp2);
   1169 
   1170   /* End of tuning */
   1171   time (&end_time);
   1172   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
   1173            (long) (end_time - start_time));
   1174   if (verbose)
   1175     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
   1176 
   1177   fclose (f);
   1178 }
   1179 
   1180 
   1181 /* Main function */
   1182 int main (int argc, char *argv[])
   1183 {
   1184   /* Unbuffered so if output is redirected to a file it isn't lost if the
   1185      program is killed part way through.  */
   1186   setbuf (stdout, NULL);
   1187   setbuf (stderr, NULL);
   1188 
   1189   verbose = argc > 1;
   1190 
   1191   if (verbose)
   1192     printf ("Tuning MPFR (Coffee time?)...\n");
   1193 
   1194   all ("mparam.h");
   1195 
   1196   return 0;
   1197 }
   1198