Home | History | Annotate | Line # | Download | only in tune
tuneup.c revision 1.1.1.2.4.1
      1          1.1       mrg /* Tune various threshold of MPFR
      2          1.1       mrg 
      3  1.1.1.2.4.1  christos Copyright 2005-2018 Free Software Foundation, Inc.
      4      1.1.1.2       mrg Contributed by the AriC and Caramba projects, INRIA.
      5          1.1       mrg 
      6          1.1       mrg This file is part of the GNU MPFR Library.
      7          1.1       mrg 
      8          1.1       mrg The GNU MPFR Library is free software; you can redistribute it and/or modify
      9          1.1       mrg it under the terms of the GNU Lesser General Public License as published by
     10          1.1       mrg the Free Software Foundation; either version 3 of the License, or (at your
     11          1.1       mrg option) any later version.
     12          1.1       mrg 
     13          1.1       mrg The GNU MPFR Library is distributed in the hope that it will be useful, but
     14          1.1       mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15          1.1       mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16          1.1       mrg License for more details.
     17          1.1       mrg 
     18          1.1       mrg You should have received a copy of the GNU Lesser General Public License
     19          1.1       mrg along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20          1.1       mrg http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21          1.1       mrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22          1.1       mrg 
     23          1.1       mrg #include <stdlib.h>
     24          1.1       mrg #include <time.h>
     25          1.1       mrg 
     26          1.1       mrg #define MPFR_NEED_LONGLONG_H
     27          1.1       mrg #include "mpfr-impl.h"
     28          1.1       mrg 
     29          1.1       mrg #undef _PROTO
     30          1.1       mrg #define _PROTO __GMP_PROTO
     31          1.1       mrg #include "speed.h"
     32          1.1       mrg 
     33  1.1.1.2.4.1  christos /* Undefine static assertion system */
     34  1.1.1.2.4.1  christos #undef MPFR_DECL_STATIC_ASSERT
     35  1.1.1.2.4.1  christos #undef MPFR_STAT_STATIC_ASSERT
     36  1.1.1.2.4.1  christos #define MPFR_DECL_STATIC_ASSERT(a) MPFR_ASSERTN(a)
     37  1.1.1.2.4.1  christos #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
     38  1.1.1.2.4.1  christos 
     39          1.1       mrg int verbose;
     40          1.1       mrg 
     41          1.1       mrg /* template for an unary function */
     42          1.1       mrg /* s->size: precision of both input and output
     43          1.1       mrg    s->xp  : Mantissa of first input
     44          1.1       mrg    s->yp  : mantissa of second input                    */
     45          1.1       mrg #define SPEED_MPFR_FUNC(mean_fun)                       \
     46          1.1       mrg   do                                                    \
     47          1.1       mrg     {                                                   \
     48          1.1       mrg       unsigned  i;                                      \
     49          1.1       mrg       mpfr_limb_ptr wp;                                 \
     50          1.1       mrg       double    t;                                      \
     51          1.1       mrg       mpfr_t    w, x;                                   \
     52          1.1       mrg       mp_size_t size;                                   \
     53          1.1       mrg       MPFR_TMP_DECL (marker);                           \
     54          1.1       mrg                                                         \
     55          1.1       mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     56          1.1       mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     57          1.1       mrg       MPFR_TMP_MARK (marker);                           \
     58          1.1       mrg                                                         \
     59          1.1       mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     60          1.1       mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     61          1.1       mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     62          1.1       mrg       MPFR_SET_EXP (x, 0);                              \
     63          1.1       mrg                                                         \
     64          1.1       mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
     65          1.1       mrg                                                         \
     66          1.1       mrg       speed_operand_src (s, s->xp, size);               \
     67          1.1       mrg       speed_operand_dst (s, wp, size);                  \
     68          1.1       mrg       speed_cache_fill (s);                             \
     69          1.1       mrg                                                         \
     70          1.1       mrg       speed_starttime ();                               \
     71          1.1       mrg       i = s->reps;                                      \
     72          1.1       mrg       do                                                \
     73          1.1       mrg         mean_fun (w, x, MPFR_RNDN);                     \
     74          1.1       mrg       while (--i != 0);                                 \
     75          1.1       mrg       t = speed_endtime ();                             \
     76          1.1       mrg                                                         \
     77          1.1       mrg       MPFR_TMP_FREE (marker);                           \
     78          1.1       mrg       return t;                                         \
     79          1.1       mrg     }                                                   \
     80          1.1       mrg   while (0)
     81          1.1       mrg 
     82          1.1       mrg /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
     83          1.1       mrg #define SPEED_MPFR_FUNC2(mean_fun)                      \
     84          1.1       mrg   do                                                    \
     85          1.1       mrg     {                                                   \
     86          1.1       mrg       unsigned  i;                                      \
     87          1.1       mrg       mpfr_limb_ptr vp, wp;                             \
     88          1.1       mrg       double    t;                                      \
     89          1.1       mrg       mpfr_t    v, w, x;                                \
     90          1.1       mrg       mp_size_t size;                                   \
     91          1.1       mrg       MPFR_TMP_DECL (marker);                           \
     92          1.1       mrg                                                         \
     93          1.1       mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     94          1.1       mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     95          1.1       mrg       MPFR_TMP_MARK (marker);                           \
     96          1.1       mrg                                                         \
     97          1.1       mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
     98          1.1       mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     99          1.1       mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    100          1.1       mrg       MPFR_SET_EXP (x, 0);                              \
    101          1.1       mrg                                                         \
    102          1.1       mrg       MPFR_TMP_INIT (vp, v, s->size, size);             \
    103          1.1       mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    104          1.1       mrg                                                         \
    105          1.1       mrg       speed_operand_src (s, s->xp, size);               \
    106          1.1       mrg       speed_operand_dst (s, vp, size);                  \
    107          1.1       mrg       speed_operand_dst (s, wp, size);                  \
    108          1.1       mrg       speed_cache_fill (s);                             \
    109          1.1       mrg                                                         \
    110          1.1       mrg       speed_starttime ();                               \
    111          1.1       mrg       i = s->reps;                                      \
    112          1.1       mrg       do                                                \
    113          1.1       mrg         mean_fun (v, w, x, MPFR_RNDN);                  \
    114          1.1       mrg       while (--i != 0);                                 \
    115          1.1       mrg       t = speed_endtime ();                             \
    116          1.1       mrg                                                         \
    117          1.1       mrg       MPFR_TMP_FREE (marker);                           \
    118          1.1       mrg       return t;                                         \
    119          1.1       mrg     }                                                   \
    120          1.1       mrg   while (0)
    121          1.1       mrg 
    122          1.1       mrg /* template for a function like mpfr_mul */
    123          1.1       mrg #define SPEED_MPFR_OP(mean_fun)                         \
    124          1.1       mrg   do                                                    \
    125          1.1       mrg     {                                                   \
    126          1.1       mrg       unsigned  i;                                      \
    127          1.1       mrg       mpfr_limb_ptr wp;                                 \
    128          1.1       mrg       double    t;                                      \
    129          1.1       mrg       mpfr_t    w, x, y;                                \
    130          1.1       mrg       mp_size_t size;                                   \
    131          1.1       mrg       MPFR_TMP_DECL (marker);                           \
    132          1.1       mrg                                                         \
    133          1.1       mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    134          1.1       mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    135          1.1       mrg       MPFR_TMP_MARK (marker);                           \
    136          1.1       mrg                                                         \
    137          1.1       mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    138          1.1       mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    139          1.1       mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    140          1.1       mrg       MPFR_SET_EXP (x, 0);                              \
    141          1.1       mrg       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    142          1.1       mrg       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
    143          1.1       mrg       MPFR_SET_EXP (y, 0);                              \
    144          1.1       mrg                                                         \
    145          1.1       mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    146          1.1       mrg                                                         \
    147          1.1       mrg       speed_operand_src (s, s->xp, size);               \
    148          1.1       mrg       speed_operand_src (s, s->yp, size);               \
    149          1.1       mrg       speed_operand_dst (s, wp, size);                  \
    150          1.1       mrg       speed_cache_fill (s);                             \
    151          1.1       mrg                                                         \
    152          1.1       mrg       speed_starttime ();                               \
    153          1.1       mrg       i = s->reps;                                      \
    154          1.1       mrg       do                                                \
    155          1.1       mrg         mean_fun (w, x, y, MPFR_RNDN);                  \
    156          1.1       mrg       while (--i != 0);                                 \
    157          1.1       mrg       t = speed_endtime ();                             \
    158          1.1       mrg                                                         \
    159          1.1       mrg       MPFR_TMP_FREE (marker);                           \
    160          1.1       mrg       return t;                                         \
    161          1.1       mrg     }                                                   \
    162          1.1       mrg   while (0)
    163          1.1       mrg 
    164          1.1       mrg /* special template for mpfr_mul(a,b,b) */
    165          1.1       mrg #define SPEED_MPFR_SQR(mean_fun)                        \
    166          1.1       mrg   do                                                    \
    167          1.1       mrg     {                                                   \
    168          1.1       mrg       unsigned  i;                                      \
    169          1.1       mrg       mpfr_limb_ptr wp;                                 \
    170          1.1       mrg       double    t;                                      \
    171          1.1       mrg       mpfr_t    w, x;                                   \
    172          1.1       mrg       mp_size_t size;                                   \
    173          1.1       mrg       MPFR_TMP_DECL (marker);                           \
    174          1.1       mrg                                                         \
    175          1.1       mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    176          1.1       mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    177          1.1       mrg       MPFR_TMP_MARK (marker);                           \
    178          1.1       mrg                                                         \
    179          1.1       mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    180          1.1       mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    181          1.1       mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    182          1.1       mrg       MPFR_SET_EXP (x, 0);                              \
    183          1.1       mrg                                                         \
    184          1.1       mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    185          1.1       mrg                                                         \
    186          1.1       mrg       speed_operand_src (s, s->xp, size);               \
    187          1.1       mrg       speed_operand_dst (s, wp, size);                  \
    188          1.1       mrg       speed_cache_fill (s);                             \
    189          1.1       mrg                                                         \
    190          1.1       mrg       speed_starttime ();                               \
    191          1.1       mrg       i = s->reps;                                      \
    192          1.1       mrg       do                                                \
    193          1.1       mrg         mean_fun (w, x, x, MPFR_RNDN);                  \
    194          1.1       mrg       while (--i != 0);                                 \
    195          1.1       mrg       t = speed_endtime ();                             \
    196          1.1       mrg                                                         \
    197          1.1       mrg       MPFR_TMP_FREE (marker);                           \
    198          1.1       mrg       return t;                                         \
    199          1.1       mrg     }                                                   \
    200          1.1       mrg   while (0)
    201          1.1       mrg 
    202          1.1       mrg /* s->size: precision of both input and output
    203          1.1       mrg    s->xp  : Mantissa of first input
    204          1.1       mrg    s->r   : exponent
    205          1.1       mrg    s->align_xp : sign (1 means positive, 2 means negative)
    206          1.1       mrg */
    207          1.1       mrg #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
    208          1.1       mrg   do                                                    \
    209          1.1       mrg     {                                                   \
    210          1.1       mrg       unsigned  i;                                      \
    211          1.1       mrg       mpfr_limb_ptr wp;                                 \
    212          1.1       mrg       double    t;                                      \
    213          1.1       mrg       mpfr_t    w, x;                                   \
    214          1.1       mrg       mp_size_t size;                                   \
    215          1.1       mrg       MPFR_TMP_DECL (marker);                           \
    216          1.1       mrg                                                         \
    217          1.1       mrg       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
    218          1.1       mrg       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
    219          1.1       mrg       MPFR_TMP_MARK (marker);                           \
    220          1.1       mrg                                                         \
    221          1.1       mrg       size = (s->size-1)/GMP_NUMB_BITS+1;               \
    222          1.1       mrg       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
    223          1.1       mrg       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
    224          1.1       mrg       MPFR_SET_EXP (x, s->r);                           \
    225          1.1       mrg       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
    226          1.1       mrg                                                         \
    227          1.1       mrg       MPFR_TMP_INIT (wp, w, s->size, size);             \
    228          1.1       mrg                                                         \
    229          1.1       mrg       speed_operand_src (s, s->xp, size);               \
    230          1.1       mrg       speed_operand_dst (s, wp, size);                  \
    231          1.1       mrg       speed_cache_fill (s);                             \
    232          1.1       mrg                                                         \
    233          1.1       mrg       speed_starttime ();                               \
    234          1.1       mrg       i = s->reps;                                      \
    235          1.1       mrg       do                                                \
    236          1.1       mrg         mean_fun (w, x, MPFR_RNDN);                     \
    237          1.1       mrg       while (--i != 0);                                 \
    238          1.1       mrg       t = speed_endtime ();                             \
    239          1.1       mrg                                                         \
    240          1.1       mrg       MPFR_TMP_FREE (marker);                           \
    241          1.1       mrg       return t;                                         \
    242          1.1       mrg     }                                                   \
    243          1.1       mrg   while (0)
    244          1.1       mrg 
    245          1.1       mrg /* First we include all the functions we want to tune inside this program.
    246          1.1       mrg    We can't use the GNU MPFR library since the thresholds are fixed macros. */
    247          1.1       mrg 
    248          1.1       mrg /* Setup mpfr_exp_2 */
    249          1.1       mrg mpfr_prec_t mpfr_exp_2_threshold;
    250          1.1       mrg #undef  MPFR_EXP_2_THRESHOLD
    251          1.1       mrg #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
    252          1.1       mrg #include "exp_2.c"
    253          1.1       mrg static double
    254          1.1       mrg speed_mpfr_exp_2 (struct speed_params *s)
    255          1.1       mrg {
    256          1.1       mrg   SPEED_MPFR_FUNC (mpfr_exp_2);
    257          1.1       mrg }
    258          1.1       mrg 
    259          1.1       mrg /* Setup mpfr_exp */
    260          1.1       mrg mpfr_prec_t mpfr_exp_threshold;
    261          1.1       mrg #undef  MPFR_EXP_THRESHOLD
    262          1.1       mrg #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
    263          1.1       mrg #include "exp.c"
    264          1.1       mrg static double
    265          1.1       mrg speed_mpfr_exp (struct speed_params *s)
    266          1.1       mrg {
    267          1.1       mrg   SPEED_MPFR_FUNC (mpfr_exp);
    268          1.1       mrg }
    269          1.1       mrg 
    270          1.1       mrg /* Setup mpfr_sin_cos */
    271          1.1       mrg mpfr_prec_t mpfr_sincos_threshold;
    272          1.1       mrg #undef MPFR_SINCOS_THRESHOLD
    273          1.1       mrg #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
    274          1.1       mrg #include "sin_cos.c"
    275          1.1       mrg #include "cos.c"
    276          1.1       mrg static double
    277          1.1       mrg speed_mpfr_sincos (struct speed_params *s)
    278          1.1       mrg {
    279          1.1       mrg   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
    280          1.1       mrg }
    281          1.1       mrg 
    282          1.1       mrg /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
    283  1.1.1.2.4.1  christos /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
    284  1.1.1.2.4.1  christos    the thresholds are >= 1, we initialize both values to 1 to avoid a failed
    285  1.1.1.2.4.1  christos    assertion (let's recall that static assertions are replaced by normal
    286  1.1.1.2.4.1  christos    dynamic assertions here). */
    287  1.1.1.2.4.1  christos mpfr_prec_t mpfr_mul_threshold = 1;
    288  1.1.1.2.4.1  christos mpfr_prec_t mpfr_sqr_threshold = 1;
    289          1.1       mrg mpfr_prec_t mpfr_div_threshold;
    290          1.1       mrg #undef  MPFR_MUL_THRESHOLD
    291          1.1       mrg #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
    292          1.1       mrg #undef  MPFR_SQR_THRESHOLD
    293          1.1       mrg #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
    294          1.1       mrg #undef  MPFR_DIV_THRESHOLD
    295          1.1       mrg #define MPFR_DIV_THRESHOLD mpfr_div_threshold
    296          1.1       mrg #include "mul.c"
    297          1.1       mrg #include "div.c"
    298          1.1       mrg static double
    299          1.1       mrg speed_mpfr_mul (struct speed_params *s)
    300          1.1       mrg {
    301          1.1       mrg   SPEED_MPFR_OP (mpfr_mul);
    302          1.1       mrg }
    303          1.1       mrg static double
    304          1.1       mrg speed_mpfr_sqr (struct speed_params *s)
    305          1.1       mrg {
    306          1.1       mrg   SPEED_MPFR_SQR (mpfr_mul);
    307          1.1       mrg }
    308          1.1       mrg static double
    309          1.1       mrg speed_mpfr_div (struct speed_params *s)
    310          1.1       mrg {
    311          1.1       mrg   SPEED_MPFR_OP (mpfr_div);
    312          1.1       mrg }
    313          1.1       mrg 
    314          1.1       mrg /************************************************
    315          1.1       mrg  * Common functions (inspired by GMP function)  *
    316          1.1       mrg  ************************************************/
    317          1.1       mrg static int
    318          1.1       mrg analyze_data (double *dat, int ndat)
    319          1.1       mrg {
    320          1.1       mrg   double  x, min_x;
    321          1.1       mrg   int     j, min_j;
    322          1.1       mrg 
    323          1.1       mrg   x = 0.0;
    324          1.1       mrg   for (j = 0; j < ndat; j++)
    325          1.1       mrg     if (dat[j] > 0.0)
    326          1.1       mrg       x += dat[j];
    327          1.1       mrg 
    328          1.1       mrg   min_x = x;
    329          1.1       mrg   min_j = 0;
    330          1.1       mrg 
    331          1.1       mrg   for (j = 0; j < ndat; x -= dat[j], j++)
    332          1.1       mrg     {
    333          1.1       mrg       if (x < min_x)
    334          1.1       mrg         {
    335          1.1       mrg           min_x = x;
    336          1.1       mrg           min_j = j;
    337          1.1       mrg         }
    338          1.1       mrg     }
    339          1.1       mrg   return min_j;
    340          1.1       mrg }
    341          1.1       mrg 
    342          1.1       mrg static double
    343          1.1       mrg mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
    344          1.1       mrg {
    345          1.1       mrg   double t = -1.0;
    346          1.1       mrg   int i;
    347          1.1       mrg   int number_of_iterations = 30;
    348          1.1       mrg   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
    349          1.1       mrg     {
    350          1.1       mrg       t = speed_measure (fun, s);
    351          1.1       mrg       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
    352          1.1       mrg         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
    353          1.1       mrg                s->size, i+1, number_of_iterations);
    354          1.1       mrg     }
    355          1.1       mrg   if (t == -1.0)
    356          1.1       mrg     {
    357          1.1       mrg       fprintf (stderr, "Failed to measure %s!\n", m);
    358          1.1       mrg       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
    359          1.1       mrg       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
    360          1.1       mrg       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
    361          1.1       mrg       abort ();
    362          1.1       mrg     }
    363          1.1       mrg   return t;
    364          1.1       mrg }
    365          1.1       mrg 
    366          1.1       mrg #define THRESHOLD_WINDOW 16
    367          1.1       mrg #define THRESHOLD_FINAL_WINDOW 128
    368          1.1       mrg static double
    369          1.1       mrg domeasure (mpfr_prec_t *threshold,
    370          1.1       mrg            double (*func) (struct speed_params *),
    371          1.1       mrg            mpfr_prec_t p)
    372          1.1       mrg {
    373          1.1       mrg   struct speed_params s;
    374          1.1       mrg   mp_size_t size;
    375          1.1       mrg   double t1, t2, d;
    376          1.1       mrg 
    377          1.1       mrg   s.align_xp = s.align_yp = s.align_wp = 64;
    378          1.1       mrg   s.size = p;
    379          1.1       mrg   size = (p - 1)/GMP_NUMB_BITS+1;
    380          1.1       mrg   s.xp = malloc (2*size*sizeof (mp_limb_t));
    381          1.1       mrg   if (s.xp == NULL)
    382          1.1       mrg     {
    383          1.1       mrg       fprintf (stderr, "Can't allocate memory.\n");
    384          1.1       mrg       abort ();
    385          1.1       mrg     }
    386          1.1       mrg   mpn_random (s.xp, size);
    387          1.1       mrg   s.yp = s.xp + size;
    388          1.1       mrg   mpn_random (s.yp, size);
    389          1.1       mrg   *threshold = MPFR_PREC_MAX;
    390          1.1       mrg   t1 = mpfr_speed_measure (func, &s, "function 1");
    391          1.1       mrg   *threshold = 1;
    392          1.1       mrg   t2 = mpfr_speed_measure (func, &s, "function 2");
    393          1.1       mrg   free (s.xp);
    394          1.1       mrg   /* t1 is the time of the first algo (used for low prec) */
    395          1.1       mrg   if (t2 >= t1)
    396          1.1       mrg     d = (t2 - t1) / t2;
    397          1.1       mrg   else
    398          1.1       mrg     d = (t2 - t1) / t1;
    399          1.1       mrg   /* d > 0 if we have to use algo 1.
    400          1.1       mrg      d < 0 if we have to use algo 2 */
    401          1.1       mrg   return d;
    402          1.1       mrg }
    403          1.1       mrg 
    404          1.1       mrg /* Performs measures when both the precision and the point of evaluation
    405          1.1       mrg    shall vary. s.yp is ignored and not initialized.
    406          1.1       mrg    It assumes that func depends on three thresholds with a boundary of the
    407          1.1       mrg    form threshold1*x + threshold2*p = some scaling factor, if x<0,
    408          1.1       mrg    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
    409          1.1       mrg */
    410          1.1       mrg static double
    411          1.1       mrg domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
    412          1.1       mrg             double (*func) (struct speed_params *),
    413          1.1       mrg             mpfr_prec_t p,
    414          1.1       mrg             mpfr_t x)
    415          1.1       mrg {
    416          1.1       mrg   struct speed_params s;
    417          1.1       mrg   mp_size_t size;
    418          1.1       mrg   double t1, t2, d;
    419          1.1       mrg   mpfr_t xtmp;
    420          1.1       mrg 
    421          1.1       mrg   if (MPFR_IS_SINGULAR (x))
    422          1.1       mrg     {
    423          1.1       mrg       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
    424          1.1       mrg       abort ();
    425          1.1       mrg     }
    426          1.1       mrg   if (MPFR_IS_NEG (x))
    427          1.1       mrg     s.align_xp = 2;
    428          1.1       mrg   else
    429          1.1       mrg     s.align_xp = 1;
    430          1.1       mrg 
    431          1.1       mrg   s.align_yp = s.align_wp = 64;
    432          1.1       mrg   s.size = p;
    433          1.1       mrg   size = (p - 1)/GMP_NUMB_BITS+1;
    434          1.1       mrg 
    435          1.1       mrg   mpfr_init2 (xtmp, p);
    436          1.1       mrg   mpn_random (xtmp->_mpfr_d, size);
    437          1.1       mrg   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
    438          1.1       mrg   MPFR_SET_EXP (xtmp, -53);
    439          1.1       mrg   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
    440          1.1       mrg   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
    441          1.1       mrg                                       /* where perturb ~ 2^(-53) is */
    442          1.1       mrg                                       /* randomly chosen.           */
    443          1.1       mrg   s.xp = xtmp->_mpfr_d;
    444          1.1       mrg   s.r = MPFR_GET_EXP (xtmp);
    445          1.1       mrg 
    446          1.1       mrg   *threshold1 = 0;
    447          1.1       mrg   *threshold2 = 0;
    448          1.1       mrg   *threshold3 = 0;
    449          1.1       mrg   t1 = mpfr_speed_measure (func, &s, "function 1");
    450          1.1       mrg 
    451          1.1       mrg   if (MPFR_IS_NEG (x))
    452          1.1       mrg     *threshold1 = INT_MIN;
    453          1.1       mrg   else
    454          1.1       mrg     *threshold3 = INT_MAX;
    455          1.1       mrg   *threshold2 = INT_MAX;
    456          1.1       mrg   t2 = mpfr_speed_measure (func, &s, "function 2");
    457          1.1       mrg 
    458          1.1       mrg   /* t1 is the time of the first algo (used for low prec) */
    459          1.1       mrg   if (t2 >= t1)
    460          1.1       mrg     d = (t2 - t1) / t2;
    461          1.1       mrg   else
    462          1.1       mrg     d = (t2 - t1) / t1;
    463          1.1       mrg   /* d > 0 if we have to use algo 1.
    464          1.1       mrg      d < 0 if we have to use algo 2 */
    465          1.1       mrg   mpfr_clear (xtmp);
    466          1.1       mrg   return d;
    467          1.1       mrg }
    468          1.1       mrg 
    469          1.1       mrg /* Tune a function with a simple THRESHOLD
    470          1.1       mrg    The function doesn't depend on another threshold.
    471          1.1       mrg    It assumes that it uses algo1 if p < THRESHOLD
    472          1.1       mrg    and algo2 otherwise.
    473          1.1       mrg    if algo2 is better for low prec, and algo1 better for high prec,
    474          1.1       mrg    the behaviour of this function is undefined. */
    475          1.1       mrg static void
    476          1.1       mrg tune_simple_func (mpfr_prec_t *threshold,
    477          1.1       mrg                   double (*func) (struct speed_params *),
    478          1.1       mrg                   mpfr_prec_t pstart)
    479          1.1       mrg {
    480          1.1       mrg   double measure[THRESHOLD_FINAL_WINDOW+1];
    481          1.1       mrg   double d;
    482          1.1       mrg   mpfr_prec_t pstep;
    483          1.1       mrg   int i, numpos, numneg, try;
    484          1.1       mrg   mpfr_prec_t pmin, pmax, p;
    485          1.1       mrg 
    486          1.1       mrg   /* first look for a lower bound within 10% */
    487          1.1       mrg   pmin = p = pstart;
    488          1.1       mrg   d = domeasure (threshold, func, pmin);
    489          1.1       mrg   if (d < 0.0)
    490          1.1       mrg     {
    491          1.1       mrg       if (verbose)
    492          1.1       mrg         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    493          1.1       mrg                 (unsigned long) pmin);
    494          1.1       mrg       *threshold = MPFR_PREC_MIN;
    495          1.1       mrg       return;
    496          1.1       mrg     }
    497          1.1       mrg   if (d >= 1.00)
    498          1.1       mrg     for (;;)
    499          1.1       mrg       {
    500          1.1       mrg         d = domeasure (threshold, func, pmin);
    501          1.1       mrg         if (d < 1.00)
    502          1.1       mrg           break;
    503          1.1       mrg         p = pmin;
    504          1.1       mrg         pmin += pmin/2;
    505          1.1       mrg       }
    506          1.1       mrg   pmin = p;
    507          1.1       mrg   for (;;)
    508          1.1       mrg     {
    509          1.1       mrg       d = domeasure (threshold, func, pmin);
    510          1.1       mrg       if (d < 0.10)
    511          1.1       mrg         break;
    512          1.1       mrg       pmin += GMP_NUMB_BITS;
    513          1.1       mrg     }
    514          1.1       mrg 
    515          1.1       mrg   /* then look for an upper bound within 20% */
    516          1.1       mrg   pmax = pmin * 2;
    517          1.1       mrg   for (;;)
    518          1.1       mrg     {
    519          1.1       mrg       d = domeasure (threshold, func, pmax);
    520          1.1       mrg       if (d < -0.20)
    521          1.1       mrg         break;
    522          1.1       mrg       pmax += pmin / 2; /* don't increase too rapidly */
    523          1.1       mrg     }
    524          1.1       mrg 
    525          1.1       mrg   /* The threshold is between pmin and pmax. Affine them */
    526          1.1       mrg   try = 0;
    527          1.1       mrg   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    528          1.1       mrg     {
    529          1.1       mrg       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    530          1.1       mrg       if (verbose)
    531          1.1       mrg         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    532          1.1       mrg       p = (pmin + pmax) / 2;
    533          1.1       mrg       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    534          1.1       mrg         {
    535          1.1       mrg           measure[i] = domeasure (threshold, func,
    536          1.1       mrg                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
    537          1.1       mrg           if (measure[i] > 0)
    538          1.1       mrg             numpos ++;
    539          1.1       mrg           else if (measure[i] < 0)
    540          1.1       mrg             numneg ++;
    541          1.1       mrg         }
    542          1.1       mrg       if (numpos > numneg)
    543          1.1       mrg         /* We use more often algo 1 than algo 2 */
    544          1.1       mrg         pmin = p - THRESHOLD_WINDOW/2*pstep;
    545          1.1       mrg       else if (numpos < numneg)
    546          1.1       mrg         pmax = p + THRESHOLD_WINDOW/2*pstep;
    547          1.1       mrg       else
    548          1.1       mrg         /* numpos == numneg ... */
    549          1.1       mrg         if (++ try > 2)
    550          1.1       mrg           {
    551          1.1       mrg             *threshold = p;
    552          1.1       mrg             if (verbose)
    553          1.1       mrg               printf ("Quick find: %lu\n", *threshold);
    554          1.1       mrg             return ;
    555          1.1       mrg           }
    556          1.1       mrg     }
    557          1.1       mrg 
    558          1.1       mrg   /* Final tune... */
    559          1.1       mrg   if (verbose)
    560          1.1       mrg     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    561          1.1       mrg   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    562          1.1       mrg     measure[i] = domeasure (threshold, func, pmin+i);
    563          1.1       mrg   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    564          1.1       mrg   *threshold = pmin + i;
    565          1.1       mrg   if (verbose)
    566          1.1       mrg     printf ("%lu\n", *threshold);
    567          1.1       mrg   return;
    568          1.1       mrg }
    569          1.1       mrg 
    570          1.1       mrg /* Tune a function which behavior depends on both p and x,
    571          1.1       mrg    in a given direction.
    572          1.1       mrg    It assumes that for (x,p) close to zero, algo1 is used
    573          1.1       mrg    and algo2 is used when (x,p) is far from zero.
    574          1.1       mrg    If algo2 is better for low prec, and algo1 better for high prec,
    575  1.1.1.2.4.1  christos    the behavior of this function is undefined.
    576          1.1       mrg    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
    577          1.1       mrg    until it finds a point on the boundary. It returns ell.
    578          1.1       mrg  */
    579          1.1       mrg static void
    580          1.1       mrg tune_simple_func_in_some_direction (long int *threshold1,
    581          1.1       mrg                                     long int *threshold2,
    582          1.1       mrg                                     long int *threshold3,
    583          1.1       mrg                                     double (*func) (struct speed_params *),
    584          1.1       mrg                                     mpfr_prec_t pstart,
    585          1.1       mrg                                     int dirx, int dirp,
    586          1.1       mrg                                     mpfr_t xres, mpfr_prec_t *pres)
    587          1.1       mrg {
    588          1.1       mrg   double measure[THRESHOLD_FINAL_WINDOW+1];
    589          1.1       mrg   double d;
    590          1.1       mrg   mpfr_prec_t pstep;
    591          1.1       mrg   int i, numpos, numneg, try;
    592          1.1       mrg   mpfr_prec_t pmin, pmax, p;
    593          1.1       mrg   mpfr_t xmin, xmax, x;
    594          1.1       mrg   mpfr_t ratio;
    595          1.1       mrg 
    596          1.1       mrg   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
    597          1.1       mrg   mpfr_set_si (ratio, dirx, MPFR_RNDN);
    598          1.1       mrg   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
    599          1.1       mrg 
    600          1.1       mrg   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
    601          1.1       mrg   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
    602          1.1       mrg   mpfr_init2 (x, MPFR_SMALL_PRECISION);
    603          1.1       mrg 
    604          1.1       mrg   /* first look for a lower bound within 10% */
    605          1.1       mrg   pmin = p = pstart;
    606          1.1       mrg   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    607          1.1       mrg   mpfr_set (x, xmin, MPFR_RNDN);
    608          1.1       mrg 
    609          1.1       mrg   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    610          1.1       mrg   if (d < 0.0)
    611          1.1       mrg     {
    612          1.1       mrg       if (verbose)
    613          1.1       mrg         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
    614          1.1       mrg                 (unsigned long) pmin);
    615          1.1       mrg       *pres = MPFR_PREC_MIN;
    616          1.1       mrg       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    617          1.1       mrg       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    618          1.1       mrg       return;
    619          1.1       mrg     }
    620          1.1       mrg   if (d >= 1.00)
    621          1.1       mrg     for (;;)
    622          1.1       mrg       {
    623          1.1       mrg         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    624          1.1       mrg         if (d < 1.00)
    625          1.1       mrg           break;
    626          1.1       mrg         p = pmin;
    627          1.1       mrg         mpfr_set (x, xmin, MPFR_RNDN);
    628          1.1       mrg         pmin += pmin/2;
    629          1.1       mrg         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    630          1.1       mrg       }
    631          1.1       mrg   pmin = p;
    632          1.1       mrg   mpfr_set (xmin, x, MPFR_RNDN);
    633          1.1       mrg   for (;;)
    634          1.1       mrg     {
    635          1.1       mrg       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
    636          1.1       mrg       if (d < 0.10)
    637          1.1       mrg         break;
    638          1.1       mrg       pmin += GMP_NUMB_BITS;
    639          1.1       mrg       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    640          1.1       mrg     }
    641          1.1       mrg 
    642          1.1       mrg   /* then look for an upper bound within 20% */
    643          1.1       mrg   pmax = pmin * 2;
    644          1.1       mrg   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    645          1.1       mrg   for (;;)
    646          1.1       mrg     {
    647          1.1       mrg       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
    648          1.1       mrg       if (d < -0.20)
    649          1.1       mrg         break;
    650          1.1       mrg       pmax += pmin / 2; /* don't increase too rapidly */
    651          1.1       mrg       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    652          1.1       mrg     }
    653          1.1       mrg 
    654          1.1       mrg   /* The threshold is between pmin and pmax. Affine them */
    655          1.1       mrg   try = 0;
    656          1.1       mrg   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
    657          1.1       mrg     {
    658          1.1       mrg       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
    659          1.1       mrg       if (verbose)
    660          1.1       mrg         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
    661          1.1       mrg       p = (pmin + pmax) / 2;
    662          1.1       mrg       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
    663          1.1       mrg       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
    664          1.1       mrg         {
    665          1.1       mrg           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
    666          1.1       mrg           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    667          1.1       mrg           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    668          1.1       mrg                                    func, *pres, xres);
    669          1.1       mrg           if (measure[i] > 0)
    670          1.1       mrg             numpos ++;
    671          1.1       mrg           else if (measure[i] < 0)
    672          1.1       mrg             numneg ++;
    673          1.1       mrg         }
    674          1.1       mrg       if (numpos > numneg)
    675          1.1       mrg         {
    676          1.1       mrg           /* We use more often algo 1 than algo 2 */
    677          1.1       mrg           pmin = p - THRESHOLD_WINDOW/2*pstep;
    678          1.1       mrg           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
    679          1.1       mrg         }
    680          1.1       mrg       else if (numpos < numneg)
    681          1.1       mrg         {
    682          1.1       mrg           pmax = p + THRESHOLD_WINDOW/2*pstep;
    683          1.1       mrg           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
    684          1.1       mrg         }
    685          1.1       mrg       else
    686          1.1       mrg         /* numpos == numneg ... */
    687          1.1       mrg         if (++ try > 2)
    688          1.1       mrg           {
    689          1.1       mrg             *pres = p;
    690          1.1       mrg             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    691          1.1       mrg             if (verbose)
    692          1.1       mrg               printf ("Quick find: %lu\n", *pres);
    693          1.1       mrg             mpfr_clear (ratio);
    694          1.1       mrg             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    695          1.1       mrg             return ;
    696          1.1       mrg           }
    697          1.1       mrg     }
    698          1.1       mrg 
    699          1.1       mrg   /* Final tune... */
    700          1.1       mrg   if (verbose)
    701          1.1       mrg     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
    702          1.1       mrg   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
    703          1.1       mrg     {
    704          1.1       mrg       *pres = pmin+i;
    705          1.1       mrg       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    706          1.1       mrg       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
    707          1.1       mrg                                func, *pres, xres);
    708          1.1       mrg     }
    709          1.1       mrg   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
    710          1.1       mrg   *pres = pmin + i;
    711          1.1       mrg   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
    712          1.1       mrg   if (verbose)
    713          1.1       mrg     printf ("%lu\n", *pres);
    714          1.1       mrg   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
    715          1.1       mrg   return;
    716          1.1       mrg }
    717          1.1       mrg 
    718          1.1       mrg /************************************
    719          1.1       mrg  * Tune Mulders' mulhigh function   *
    720          1.1       mrg  ************************************/
    721          1.1       mrg #define TOLERANCE 1.00
    722          1.1       mrg #define MULDERS_TABLE_SIZE 1024
    723          1.1       mrg #ifndef MPFR_MULHIGH_SIZE
    724          1.1       mrg # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
    725          1.1       mrg #endif
    726          1.1       mrg #ifndef MPFR_SQRHIGH_SIZE
    727          1.1       mrg # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
    728          1.1       mrg #endif
    729          1.1       mrg #ifndef MPFR_DIVHIGH_SIZE
    730          1.1       mrg # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
    731          1.1       mrg #endif
    732          1.1       mrg #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
    733          1.1       mrg #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
    734          1.1       mrg #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
    735          1.1       mrg #include "mulders.c"
    736          1.1       mrg 
    737          1.1       mrg static double
    738          1.1       mrg speed_mpfr_mulhigh (struct speed_params *s)
    739          1.1       mrg {
    740          1.1       mrg   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
    741          1.1       mrg }
    742          1.1       mrg 
    743          1.1       mrg static double
    744          1.1       mrg speed_mpfr_sqrhigh (struct speed_params *s)
    745          1.1       mrg {
    746          1.1       mrg   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
    747          1.1       mrg }
    748          1.1       mrg 
    749          1.1       mrg static double
    750          1.1       mrg speed_mpfr_divhigh (struct speed_params *s)
    751          1.1       mrg {
    752          1.1       mrg   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
    753          1.1       mrg }
    754          1.1       mrg 
    755          1.1       mrg #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
    756          1.1       mrg 
    757          1.1       mrg /* Tune mpfr_mulhigh_n for size n */
    758          1.1       mrg static mp_size_t
    759          1.1       mrg tune_mul_mulders_upto (mp_size_t n)
    760          1.1       mrg {
    761          1.1       mrg   struct speed_params s;
    762          1.1       mrg   mp_size_t k, kbest, step;
    763          1.1       mrg   double t, tbest;
    764          1.1       mrg   MPFR_TMP_DECL (marker);
    765          1.1       mrg 
    766          1.1       mrg   if (n == 0)
    767          1.1       mrg     return -1;
    768          1.1       mrg 
    769          1.1       mrg   MPFR_TMP_MARK (marker);
    770          1.1       mrg   s.align_xp = s.align_yp = s.align_wp = 64;
    771          1.1       mrg   s.size = n;
    772          1.1       mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    773          1.1       mrg   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    774          1.1       mrg   mpn_random (s.xp, n);
    775          1.1       mrg   mpn_random (s.yp, n);
    776          1.1       mrg 
    777          1.1       mrg   /* Check k == -1, mpn_mul_basecase */
    778          1.1       mrg   mulhigh_ktab[n] = -1;
    779          1.1       mrg   kbest = -1;
    780          1.1       mrg   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    781          1.1       mrg 
    782          1.1       mrg   /* Check k == 0, mpn_mulhigh_n_basecase */
    783          1.1       mrg   mulhigh_ktab[n] = 0;
    784          1.1       mrg   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    785          1.1       mrg   if (t * TOLERANCE < tbest)
    786          1.1       mrg     kbest = 0, tbest = t;
    787          1.1       mrg 
    788          1.1       mrg   /* Check Mulders with cutoff point k */
    789          1.1       mrg   step = 1 + n / (2 * MAX_STEPS);
    790          1.1       mrg   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    791          1.1       mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    792          1.1       mrg     {
    793          1.1       mrg       mulhigh_ktab[n] = k;
    794          1.1       mrg       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
    795          1.1       mrg       if (t * TOLERANCE < tbest)
    796          1.1       mrg         kbest = k, tbest = t;
    797          1.1       mrg     }
    798          1.1       mrg 
    799          1.1       mrg   mulhigh_ktab[n] = kbest;
    800          1.1       mrg 
    801          1.1       mrg   MPFR_TMP_FREE (marker);
    802          1.1       mrg   return kbest;
    803          1.1       mrg }
    804          1.1       mrg 
    805          1.1       mrg /* Tune mpfr_sqrhigh_n for size n */
    806          1.1       mrg static mp_size_t
    807          1.1       mrg tune_sqr_mulders_upto (mp_size_t n)
    808          1.1       mrg {
    809          1.1       mrg   struct speed_params s;
    810          1.1       mrg   mp_size_t k, kbest, step;
    811          1.1       mrg   double t, tbest;
    812          1.1       mrg   MPFR_TMP_DECL (marker);
    813          1.1       mrg 
    814          1.1       mrg   if (n == 0)
    815          1.1       mrg     return -1;
    816          1.1       mrg 
    817          1.1       mrg   MPFR_TMP_MARK (marker);
    818          1.1       mrg   s.align_xp = s.align_wp = 64;
    819          1.1       mrg   s.size = n;
    820          1.1       mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    821          1.1       mrg   mpn_random (s.xp, n);
    822          1.1       mrg 
    823          1.1       mrg   /* Check k == -1, mpn_sqr_basecase */
    824          1.1       mrg   sqrhigh_ktab[n] = -1;
    825          1.1       mrg   kbest = -1;
    826          1.1       mrg   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    827          1.1       mrg 
    828          1.1       mrg   /* Check k == 0, mpfr_mulhigh_n_basecase */
    829          1.1       mrg   sqrhigh_ktab[n] = 0;
    830          1.1       mrg   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    831          1.1       mrg   if (t * TOLERANCE < tbest)
    832          1.1       mrg     kbest = 0, tbest = t;
    833          1.1       mrg 
    834          1.1       mrg   /* Check Mulders */
    835          1.1       mrg   step = 1 + n / (2 * MAX_STEPS);
    836          1.1       mrg   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
    837          1.1       mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    838          1.1       mrg     {
    839          1.1       mrg       sqrhigh_ktab[n] = k;
    840          1.1       mrg       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
    841          1.1       mrg       if (t * TOLERANCE < tbest)
    842          1.1       mrg         kbest = k, tbest = t;
    843          1.1       mrg     }
    844          1.1       mrg 
    845          1.1       mrg   sqrhigh_ktab[n] = kbest;
    846          1.1       mrg 
    847          1.1       mrg   MPFR_TMP_FREE (marker);
    848          1.1       mrg   return kbest;
    849          1.1       mrg }
    850          1.1       mrg 
    851          1.1       mrg /* Tune mpfr_divhigh_n for size n */
    852          1.1       mrg static mp_size_t
    853          1.1       mrg tune_div_mulders_upto (mp_size_t n)
    854          1.1       mrg {
    855          1.1       mrg   struct speed_params s;
    856          1.1       mrg   mp_size_t k, kbest, step;
    857          1.1       mrg   double t, tbest;
    858          1.1       mrg   MPFR_TMP_DECL (marker);
    859          1.1       mrg 
    860          1.1       mrg   if (n == 0)
    861          1.1       mrg     return 0;
    862          1.1       mrg 
    863          1.1       mrg   MPFR_TMP_MARK (marker);
    864          1.1       mrg   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
    865          1.1       mrg   s.size = n;
    866          1.1       mrg   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    867          1.1       mrg   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
    868          1.1       mrg   mpn_random (s.xp, n);
    869          1.1       mrg   mpn_random (s.yp, n);
    870          1.1       mrg 
    871          1.1       mrg   /* Check k == n, i.e., mpn_divrem */
    872          1.1       mrg   divhigh_ktab[n] = n;
    873          1.1       mrg   kbest = n;
    874          1.1       mrg   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    875          1.1       mrg 
    876          1.1       mrg   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
    877          1.1       mrg #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
    878          1.1       mrg   if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
    879          1.1       mrg #endif
    880          1.1       mrg     {
    881          1.1       mrg       divhigh_ktab[n] = 0;
    882          1.1       mrg       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    883          1.1       mrg       if (t * TOLERANCE < tbest)
    884          1.1       mrg         kbest = 0, tbest = t;
    885          1.1       mrg     }
    886          1.1       mrg 
    887          1.1       mrg   /* Check Mulders */
    888          1.1       mrg   step = 1 + n / (2 * MAX_STEPS);
    889          1.1       mrg   /* we should have (n+3)/2 <= k < n, which translates into
    890          1.1       mrg      (n+4)/2 <= k < n in C */
    891          1.1       mrg   for (k = (n + 4) / 2 ; k < n ; k += step)
    892          1.1       mrg     {
    893          1.1       mrg       divhigh_ktab[n] = k;
    894          1.1       mrg       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
    895          1.1       mrg       if (t * TOLERANCE < tbest)
    896          1.1       mrg         kbest = k, tbest = t;
    897          1.1       mrg     }
    898          1.1       mrg 
    899          1.1       mrg   divhigh_ktab[n] = kbest;
    900          1.1       mrg 
    901          1.1       mrg   MPFR_TMP_FREE (marker);
    902          1.1       mrg 
    903          1.1       mrg   return kbest;
    904          1.1       mrg }
    905          1.1       mrg 
    906          1.1       mrg static void
    907          1.1       mrg tune_mul_mulders (FILE *f)
    908          1.1       mrg {
    909          1.1       mrg   mp_size_t k;
    910          1.1       mrg 
    911          1.1       mrg   if (verbose)
    912          1.1       mrg     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
    913          1.1       mrg   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
    914          1.1       mrg   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
    915          1.1       mrg     {
    916          1.1       mrg       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
    917          1.1       mrg       if (k != MPFR_MULHIGH_TAB_SIZE-1)
    918          1.1       mrg         fputc (',', f);
    919          1.1       mrg       if ((k+1) % 16 == 0)
    920          1.1       mrg         fprintf (f, " \\\n ");
    921          1.1       mrg       if (verbose)
    922          1.1       mrg         putchar ('.');
    923          1.1       mrg     }
    924          1.1       mrg   fprintf (f, " \n");
    925          1.1       mrg   if (verbose)
    926          1.1       mrg     putchar ('\n');
    927          1.1       mrg }
    928          1.1       mrg 
    929          1.1       mrg static void
    930          1.1       mrg tune_sqr_mulders (FILE *f)
    931          1.1       mrg {
    932          1.1       mrg   mp_size_t k;
    933          1.1       mrg 
    934          1.1       mrg   if (verbose)
    935          1.1       mrg     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
    936          1.1       mrg   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
    937          1.1       mrg   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
    938          1.1       mrg     {
    939          1.1       mrg       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
    940          1.1       mrg       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
    941          1.1       mrg         fputc (',', f);
    942          1.1       mrg       if ((k+1) % 16 == 0)
    943          1.1       mrg         fprintf (f, " \\\n ");
    944          1.1       mrg       if (verbose)
    945          1.1       mrg         putchar ('.');
    946          1.1       mrg     }
    947          1.1       mrg   fprintf (f, " \n");
    948          1.1       mrg   if (verbose)
    949          1.1       mrg     putchar ('\n');
    950          1.1       mrg }
    951          1.1       mrg 
    952          1.1       mrg static void
    953          1.1       mrg tune_div_mulders (FILE *f)
    954          1.1       mrg {
    955          1.1       mrg   mp_size_t k;
    956          1.1       mrg 
    957          1.1       mrg   if (verbose)
    958          1.1       mrg     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
    959          1.1       mrg   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
    960          1.1       mrg   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
    961          1.1       mrg     {
    962          1.1       mrg       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
    963          1.1       mrg       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
    964          1.1       mrg         fputc (',', f);
    965          1.1       mrg       if ((k+1) % 16 == 0)
    966  1.1.1.2.4.1  christos         fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
    967          1.1       mrg       if (verbose)
    968          1.1       mrg         putchar ('.');
    969          1.1       mrg     }
    970          1.1       mrg   fprintf (f, " \n");
    971          1.1       mrg   if (verbose)
    972          1.1       mrg     putchar ('\n');
    973          1.1       mrg }
    974          1.1       mrg 
    975          1.1       mrg /*******************************************************
    976          1.1       mrg  *            Tuning functions for mpfr_ai             *
    977          1.1       mrg  *******************************************************/
    978          1.1       mrg 
    979          1.1       mrg long int mpfr_ai_threshold1;
    980          1.1       mrg long int mpfr_ai_threshold2;
    981          1.1       mrg long int mpfr_ai_threshold3;
    982          1.1       mrg #undef  MPFR_AI_THRESHOLD1
    983          1.1       mrg #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
    984          1.1       mrg #undef  MPFR_AI_THRESHOLD2
    985          1.1       mrg #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
    986          1.1       mrg #undef  MPFR_AI_THRESHOLD3
    987          1.1       mrg #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
    988          1.1       mrg 
    989          1.1       mrg #include "ai.c"
    990          1.1       mrg 
    991          1.1       mrg static double
    992          1.1       mrg speed_mpfr_ai (struct speed_params *s)
    993          1.1       mrg {
    994          1.1       mrg   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
    995          1.1       mrg }
    996          1.1       mrg 
    997          1.1       mrg 
    998          1.1       mrg /*******************************************************
    999          1.1       mrg  *            Tune all the threshold of MPFR           *
   1000          1.1       mrg  * Warning: tune the function in their dependent order!*
   1001          1.1       mrg  *******************************************************/
   1002          1.1       mrg static void
   1003          1.1       mrg all (const char *filename)
   1004          1.1       mrg {
   1005          1.1       mrg   FILE *f;
   1006          1.1       mrg   time_t  start_time, end_time;
   1007          1.1       mrg   struct tm  *tp;
   1008          1.1       mrg   mpfr_t x1, x2, x3, tmp1, tmp2;
   1009          1.1       mrg   mpfr_prec_t p1, p2, p3;
   1010          1.1       mrg 
   1011          1.1       mrg   f = fopen (filename, "w");
   1012          1.1       mrg   if (f == NULL)
   1013          1.1       mrg     {
   1014          1.1       mrg       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
   1015          1.1       mrg       abort ();
   1016          1.1       mrg     }
   1017          1.1       mrg 
   1018          1.1       mrg   speed_time_init ();
   1019          1.1       mrg   if (verbose)
   1020          1.1       mrg     {
   1021          1.1       mrg       printf ("Using: %s\n", speed_time_string);
   1022          1.1       mrg       printf ("speed_precision %d", speed_precision);
   1023          1.1       mrg       if (speed_unittime == 1.0)
   1024          1.1       mrg         printf (", speed_unittime 1 cycle");
   1025          1.1       mrg       else
   1026          1.1       mrg         printf (", speed_unittime %.2e secs", speed_unittime);
   1027          1.1       mrg       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
   1028          1.1       mrg         printf (", CPU freq unknown\n");
   1029          1.1       mrg       else
   1030          1.1       mrg         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
   1031          1.1       mrg     }
   1032          1.1       mrg 
   1033          1.1       mrg   time (&start_time);
   1034          1.1       mrg   tp = localtime (&start_time);
   1035          1.1       mrg   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
   1036          1.1       mrg           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
   1037          1.1       mrg 
   1038          1.1       mrg #ifdef __ICC
   1039          1.1       mrg   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
   1040          1.1       mrg #elif defined(__GNUC__)
   1041          1.1       mrg #ifdef __GNUC_PATCHLEVEL__
   1042          1.1       mrg   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
   1043          1.1       mrg            __GNUC_PATCHLEVEL__);
   1044          1.1       mrg #else
   1045          1.1       mrg   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
   1046          1.1       mrg #endif
   1047          1.1       mrg #elif defined (__SUNPRO_C)
   1048          1.1       mrg   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
   1049          1.1       mrg #elif defined (__sgi) && defined (_COMPILER_VERSION)
   1050          1.1       mrg   fprintf (f, "MIPSpro C %d.%d.%d */\n",
   1051          1.1       mrg            _COMPILER_VERSION / 100,
   1052          1.1       mrg            _COMPILER_VERSION / 10 % 10,
   1053          1.1       mrg            _COMPILER_VERSION % 10);
   1054          1.1       mrg #elif defined (__DECC) && defined (__DECC_VER)
   1055          1.1       mrg   fprintf (f, "DEC C %d */\n", __DECC_VER);
   1056          1.1       mrg #else
   1057          1.1       mrg   fprintf (f, "system compiler */\n");
   1058          1.1       mrg #endif
   1059          1.1       mrg   fprintf (f, "\n\n");
   1060          1.1       mrg   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
   1061          1.1       mrg   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
   1062          1.1       mrg   fprintf (f, "#endif\n\n");
   1063          1.1       mrg 
   1064          1.1       mrg   /* Tune mulhigh */
   1065          1.1       mrg   tune_mul_mulders (f);
   1066          1.1       mrg 
   1067          1.1       mrg   /* Tune sqrhigh */
   1068          1.1       mrg   tune_sqr_mulders (f);
   1069          1.1       mrg 
   1070          1.1       mrg   /* Tune divhigh */
   1071          1.1       mrg   tune_div_mulders (f);
   1072          1.1       mrg   fflush (f);
   1073          1.1       mrg 
   1074          1.1       mrg   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
   1075          1.1       mrg   if (verbose)
   1076          1.1       mrg     printf ("Tuning mpfr_mul...\n");
   1077          1.1       mrg   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
   1078          1.1       mrg                     2*GMP_NUMB_BITS+1);
   1079          1.1       mrg   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
   1080          1.1       mrg            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
   1081          1.1       mrg 
   1082          1.1       mrg   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
   1083          1.1       mrg   if (verbose)
   1084          1.1       mrg     printf ("Tuning mpfr_sqr...\n");
   1085          1.1       mrg   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
   1086          1.1       mrg                     2*GMP_NUMB_BITS+1);
   1087          1.1       mrg   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
   1088          1.1       mrg            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
   1089          1.1       mrg 
   1090          1.1       mrg   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
   1091          1.1       mrg   if (verbose)
   1092          1.1       mrg     printf ("Tuning mpfr_div...\n");
   1093          1.1       mrg   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
   1094          1.1       mrg                     2*GMP_NUMB_BITS+1);
   1095          1.1       mrg   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
   1096          1.1       mrg            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
   1097          1.1       mrg 
   1098          1.1       mrg   /* Tune mpfr_exp_2 */
   1099          1.1       mrg   if (verbose)
   1100          1.1       mrg     printf ("Tuning mpfr_exp_2...\n");
   1101          1.1       mrg   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
   1102          1.1       mrg   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
   1103          1.1       mrg            (unsigned long) mpfr_exp_2_threshold);
   1104          1.1       mrg 
   1105          1.1       mrg   /* Tune mpfr_exp */
   1106          1.1       mrg   if (verbose)
   1107          1.1       mrg     printf ("Tuning mpfr_exp...\n");
   1108          1.1       mrg   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
   1109          1.1       mrg                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1110          1.1       mrg   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
   1111          1.1       mrg            (unsigned long) mpfr_exp_threshold);
   1112          1.1       mrg 
   1113          1.1       mrg   /* Tune mpfr_sin_cos */
   1114          1.1       mrg   if (verbose)
   1115          1.1       mrg     printf ("Tuning mpfr_sin_cos...\n");
   1116          1.1       mrg   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
   1117          1.1       mrg                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
   1118          1.1       mrg   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
   1119          1.1       mrg            (unsigned long) mpfr_sincos_threshold);
   1120          1.1       mrg 
   1121          1.1       mrg   /* Tune mpfr_ai */
   1122          1.1       mrg   if (verbose)
   1123          1.1       mrg     printf ("Tuning mpfr_ai...\n");
   1124          1.1       mrg   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
   1125          1.1       mrg   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
   1126          1.1       mrg   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
   1127          1.1       mrg   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
   1128          1.1       mrg   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
   1129          1.1       mrg 
   1130          1.1       mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1131          1.1       mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1132          1.1       mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1133          1.1       mrg                                       -60, 200, x1, &p1);
   1134          1.1       mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1135          1.1       mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1136          1.1       mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1137          1.1       mrg                                       -20, 500, x2, &p2);
   1138          1.1       mrg   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
   1139          1.1       mrg                                       &mpfr_ai_threshold3, speed_mpfr_ai,
   1140          1.1       mrg                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
   1141          1.1       mrg                                       40, 200, x3, &p3);
   1142          1.1       mrg 
   1143          1.1       mrg   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
   1144          1.1       mrg   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
   1145          1.1       mrg   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
   1146          1.1       mrg   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
   1147          1.1       mrg 
   1148          1.1       mrg   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
   1149          1.1       mrg   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
   1150          1.1       mrg   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1151          1.1       mrg   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
   1152          1.1       mrg 
   1153          1.1       mrg   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
   1154          1.1       mrg   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
   1155          1.1       mrg   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
   1156          1.1       mrg 
   1157          1.1       mrg   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
   1158          1.1       mrg   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
   1159          1.1       mrg   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
   1160          1.1       mrg   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
   1161          1.1       mrg   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
   1162          1.1       mrg 
   1163          1.1       mrg   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
   1164          1.1       mrg   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
   1165          1.1       mrg   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
   1166          1.1       mrg 
   1167          1.1       mrg   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
   1168          1.1       mrg   mpfr_clear (tmp1); mpfr_clear (tmp2);
   1169          1.1       mrg 
   1170          1.1       mrg   /* End of tuning */
   1171          1.1       mrg   time (&end_time);
   1172          1.1       mrg   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
   1173          1.1       mrg            (long) (end_time - start_time));
   1174          1.1       mrg   if (verbose)
   1175          1.1       mrg     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
   1176          1.1       mrg 
   1177          1.1       mrg   fclose (f);
   1178          1.1       mrg }
   1179          1.1       mrg 
   1180          1.1       mrg 
   1181          1.1       mrg /* Main function */
   1182          1.1       mrg int main (int argc, char *argv[])
   1183          1.1       mrg {
   1184          1.1       mrg   /* Unbuffered so if output is redirected to a file it isn't lost if the
   1185          1.1       mrg      program is killed part way through.  */
   1186          1.1       mrg   setbuf (stdout, NULL);
   1187          1.1       mrg   setbuf (stderr, NULL);
   1188          1.1       mrg 
   1189          1.1       mrg   verbose = argc > 1;
   1190          1.1       mrg 
   1191          1.1       mrg   if (verbose)
   1192          1.1       mrg     printf ("Tuning MPFR (Coffee time?)...\n");
   1193          1.1       mrg 
   1194          1.1       mrg   all ("mparam.h");
   1195          1.1       mrg 
   1196          1.1       mrg   return 0;
   1197          1.1       mrg }
   1198