tuneup.c revision 1.1.1.1 1 /* Tune various threshold of MPFR
2
3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #include <stdlib.h>
24 #include <time.h>
25
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28
29 #undef _PROTO
30 #define _PROTO __GMP_PROTO
31 #include "speed.h"
32
33 int verbose;
34
35 /* template for an unary function */
36 /* s->size: precision of both input and output
37 s->xp : Mantissa of first input
38 s->yp : mantissa of second input */
39 #define SPEED_MPFR_FUNC(mean_fun) \
40 do \
41 { \
42 unsigned i; \
43 mpfr_limb_ptr wp; \
44 double t; \
45 mpfr_t w, x; \
46 mp_size_t size; \
47 MPFR_TMP_DECL (marker); \
48 \
49 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
50 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
51 MPFR_TMP_MARK (marker); \
52 \
53 size = (s->size-1)/GMP_NUMB_BITS+1; \
54 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
55 MPFR_TMP_INIT1 (s->xp, x, s->size); \
56 MPFR_SET_EXP (x, 0); \
57 \
58 MPFR_TMP_INIT (wp, w, s->size, size); \
59 \
60 speed_operand_src (s, s->xp, size); \
61 speed_operand_dst (s, wp, size); \
62 speed_cache_fill (s); \
63 \
64 speed_starttime (); \
65 i = s->reps; \
66 do \
67 mean_fun (w, x, MPFR_RNDN); \
68 while (--i != 0); \
69 t = speed_endtime (); \
70 \
71 MPFR_TMP_FREE (marker); \
72 return t; \
73 } \
74 while (0)
75
76 /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
77 #define SPEED_MPFR_FUNC2(mean_fun) \
78 do \
79 { \
80 unsigned i; \
81 mpfr_limb_ptr vp, wp; \
82 double t; \
83 mpfr_t v, w, x; \
84 mp_size_t size; \
85 MPFR_TMP_DECL (marker); \
86 \
87 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
88 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
89 MPFR_TMP_MARK (marker); \
90 \
91 size = (s->size-1)/GMP_NUMB_BITS+1; \
92 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
93 MPFR_TMP_INIT1 (s->xp, x, s->size); \
94 MPFR_SET_EXP (x, 0); \
95 \
96 MPFR_TMP_INIT (vp, v, s->size, size); \
97 MPFR_TMP_INIT (wp, w, s->size, size); \
98 \
99 speed_operand_src (s, s->xp, size); \
100 speed_operand_dst (s, vp, size); \
101 speed_operand_dst (s, wp, size); \
102 speed_cache_fill (s); \
103 \
104 speed_starttime (); \
105 i = s->reps; \
106 do \
107 mean_fun (v, w, x, MPFR_RNDN); \
108 while (--i != 0); \
109 t = speed_endtime (); \
110 \
111 MPFR_TMP_FREE (marker); \
112 return t; \
113 } \
114 while (0)
115
116 /* template for a function like mpfr_mul */
117 #define SPEED_MPFR_OP(mean_fun) \
118 do \
119 { \
120 unsigned i; \
121 mpfr_limb_ptr wp; \
122 double t; \
123 mpfr_t w, x, y; \
124 mp_size_t size; \
125 MPFR_TMP_DECL (marker); \
126 \
127 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
128 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
129 MPFR_TMP_MARK (marker); \
130 \
131 size = (s->size-1)/GMP_NUMB_BITS+1; \
132 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
133 MPFR_TMP_INIT1 (s->xp, x, s->size); \
134 MPFR_SET_EXP (x, 0); \
135 s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
136 MPFR_TMP_INIT1 (s->yp, y, s->size); \
137 MPFR_SET_EXP (y, 0); \
138 \
139 MPFR_TMP_INIT (wp, w, s->size, size); \
140 \
141 speed_operand_src (s, s->xp, size); \
142 speed_operand_src (s, s->yp, size); \
143 speed_operand_dst (s, wp, size); \
144 speed_cache_fill (s); \
145 \
146 speed_starttime (); \
147 i = s->reps; \
148 do \
149 mean_fun (w, x, y, MPFR_RNDN); \
150 while (--i != 0); \
151 t = speed_endtime (); \
152 \
153 MPFR_TMP_FREE (marker); \
154 return t; \
155 } \
156 while (0)
157
158 /* special template for mpfr_mul(a,b,b) */
159 #define SPEED_MPFR_SQR(mean_fun) \
160 do \
161 { \
162 unsigned i; \
163 mpfr_limb_ptr wp; \
164 double t; \
165 mpfr_t w, x; \
166 mp_size_t size; \
167 MPFR_TMP_DECL (marker); \
168 \
169 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
170 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
171 MPFR_TMP_MARK (marker); \
172 \
173 size = (s->size-1)/GMP_NUMB_BITS+1; \
174 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
175 MPFR_TMP_INIT1 (s->xp, x, s->size); \
176 MPFR_SET_EXP (x, 0); \
177 \
178 MPFR_TMP_INIT (wp, w, s->size, size); \
179 \
180 speed_operand_src (s, s->xp, size); \
181 speed_operand_dst (s, wp, size); \
182 speed_cache_fill (s); \
183 \
184 speed_starttime (); \
185 i = s->reps; \
186 do \
187 mean_fun (w, x, x, MPFR_RNDN); \
188 while (--i != 0); \
189 t = speed_endtime (); \
190 \
191 MPFR_TMP_FREE (marker); \
192 return t; \
193 } \
194 while (0)
195
196 /* s->size: precision of both input and output
197 s->xp : Mantissa of first input
198 s->r : exponent
199 s->align_xp : sign (1 means positive, 2 means negative)
200 */
201 #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \
202 do \
203 { \
204 unsigned i; \
205 mpfr_limb_ptr wp; \
206 double t; \
207 mpfr_t w, x; \
208 mp_size_t size; \
209 MPFR_TMP_DECL (marker); \
210 \
211 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
212 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
213 MPFR_TMP_MARK (marker); \
214 \
215 size = (s->size-1)/GMP_NUMB_BITS+1; \
216 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
217 MPFR_TMP_INIT1 (s->xp, x, s->size); \
218 MPFR_SET_EXP (x, s->r); \
219 if (s->align_xp == 2) MPFR_SET_NEG (x); \
220 \
221 MPFR_TMP_INIT (wp, w, s->size, size); \
222 \
223 speed_operand_src (s, s->xp, size); \
224 speed_operand_dst (s, wp, size); \
225 speed_cache_fill (s); \
226 \
227 speed_starttime (); \
228 i = s->reps; \
229 do \
230 mean_fun (w, x, MPFR_RNDN); \
231 while (--i != 0); \
232 t = speed_endtime (); \
233 \
234 MPFR_TMP_FREE (marker); \
235 return t; \
236 } \
237 while (0)
238
239 /* First we include all the functions we want to tune inside this program.
240 We can't use the GNU MPFR library since the thresholds are fixed macros. */
241
242 /* Setup mpfr_exp_2 */
243 mpfr_prec_t mpfr_exp_2_threshold;
244 #undef MPFR_EXP_2_THRESHOLD
245 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
246 #include "exp_2.c"
247 static double
248 speed_mpfr_exp_2 (struct speed_params *s)
249 {
250 SPEED_MPFR_FUNC (mpfr_exp_2);
251 }
252
253 /* Setup mpfr_exp */
254 mpfr_prec_t mpfr_exp_threshold;
255 #undef MPFR_EXP_THRESHOLD
256 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
257 #include "exp.c"
258 static double
259 speed_mpfr_exp (struct speed_params *s)
260 {
261 SPEED_MPFR_FUNC (mpfr_exp);
262 }
263
264 /* Setup mpfr_sin_cos */
265 mpfr_prec_t mpfr_sincos_threshold;
266 #undef MPFR_SINCOS_THRESHOLD
267 #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
268 #include "sin_cos.c"
269 #include "cos.c"
270 static double
271 speed_mpfr_sincos (struct speed_params *s)
272 {
273 SPEED_MPFR_FUNC2 (mpfr_sin_cos);
274 }
275
276 /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
277 mpfr_prec_t mpfr_mul_threshold;
278 mpfr_prec_t mpfr_sqr_threshold;
279 mpfr_prec_t mpfr_div_threshold;
280 #undef MPFR_MUL_THRESHOLD
281 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
282 #undef MPFR_SQR_THRESHOLD
283 #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
284 #undef MPFR_DIV_THRESHOLD
285 #define MPFR_DIV_THRESHOLD mpfr_div_threshold
286 #include "mul.c"
287 #include "div.c"
288 static double
289 speed_mpfr_mul (struct speed_params *s)
290 {
291 SPEED_MPFR_OP (mpfr_mul);
292 }
293 static double
294 speed_mpfr_sqr (struct speed_params *s)
295 {
296 SPEED_MPFR_SQR (mpfr_mul);
297 }
298 static double
299 speed_mpfr_div (struct speed_params *s)
300 {
301 SPEED_MPFR_OP (mpfr_div);
302 }
303
304 /************************************************
305 * Common functions (inspired by GMP function) *
306 ************************************************/
307 static int
308 analyze_data (double *dat, int ndat)
309 {
310 double x, min_x;
311 int j, min_j;
312
313 x = 0.0;
314 for (j = 0; j < ndat; j++)
315 if (dat[j] > 0.0)
316 x += dat[j];
317
318 min_x = x;
319 min_j = 0;
320
321 for (j = 0; j < ndat; x -= dat[j], j++)
322 {
323 if (x < min_x)
324 {
325 min_x = x;
326 min_j = j;
327 }
328 }
329 return min_j;
330 }
331
332 static double
333 mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
334 {
335 double t = -1.0;
336 int i;
337 int number_of_iterations = 30;
338 for (i = 1; i <= number_of_iterations && t == -1.0; i++)
339 {
340 t = speed_measure (fun, s);
341 if ( (t == -1.0) && (i+1 <= number_of_iterations) )
342 printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
343 s->size, i+1, number_of_iterations);
344 }
345 if (t == -1.0)
346 {
347 fprintf (stderr, "Failed to measure %s!\n", m);
348 fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
349 fprintf (stderr, " under Linux: cpufreq-selector -g performance\n");
350 fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
351 abort ();
352 }
353 return t;
354 }
355
356 #define THRESHOLD_WINDOW 16
357 #define THRESHOLD_FINAL_WINDOW 128
358 static double
359 domeasure (mpfr_prec_t *threshold,
360 double (*func) (struct speed_params *),
361 mpfr_prec_t p)
362 {
363 struct speed_params s;
364 mp_size_t size;
365 double t1, t2, d;
366
367 s.align_xp = s.align_yp = s.align_wp = 64;
368 s.size = p;
369 size = (p - 1)/GMP_NUMB_BITS+1;
370 s.xp = malloc (2*size*sizeof (mp_limb_t));
371 if (s.xp == NULL)
372 {
373 fprintf (stderr, "Can't allocate memory.\n");
374 abort ();
375 }
376 mpn_random (s.xp, size);
377 s.yp = s.xp + size;
378 mpn_random (s.yp, size);
379 *threshold = MPFR_PREC_MAX;
380 t1 = mpfr_speed_measure (func, &s, "function 1");
381 *threshold = 1;
382 t2 = mpfr_speed_measure (func, &s, "function 2");
383 free (s.xp);
384 /* t1 is the time of the first algo (used for low prec) */
385 if (t2 >= t1)
386 d = (t2 - t1) / t2;
387 else
388 d = (t2 - t1) / t1;
389 /* d > 0 if we have to use algo 1.
390 d < 0 if we have to use algo 2 */
391 return d;
392 }
393
394 /* Performs measures when both the precision and the point of evaluation
395 shall vary. s.yp is ignored and not initialized.
396 It assumes that func depends on three thresholds with a boundary of the
397 form threshold1*x + threshold2*p = some scaling factor, if x<0,
398 and threshold3*x + threshold2*p = some scaling factor, if x>=0.
399 */
400 static double
401 domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
402 double (*func) (struct speed_params *),
403 mpfr_prec_t p,
404 mpfr_t x)
405 {
406 struct speed_params s;
407 mp_size_t size;
408 double t1, t2, d;
409 mpfr_t xtmp;
410
411 if (MPFR_IS_SINGULAR (x))
412 {
413 mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
414 abort ();
415 }
416 if (MPFR_IS_NEG (x))
417 s.align_xp = 2;
418 else
419 s.align_xp = 1;
420
421 s.align_yp = s.align_wp = 64;
422 s.size = p;
423 size = (p - 1)/GMP_NUMB_BITS+1;
424
425 mpfr_init2 (xtmp, p);
426 mpn_random (xtmp->_mpfr_d, size);
427 xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
428 MPFR_SET_EXP (xtmp, -53);
429 mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
430 mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */
431 /* where perturb ~ 2^(-53) is */
432 /* randomly chosen. */
433 s.xp = xtmp->_mpfr_d;
434 s.r = MPFR_GET_EXP (xtmp);
435
436 *threshold1 = 0;
437 *threshold2 = 0;
438 *threshold3 = 0;
439 t1 = mpfr_speed_measure (func, &s, "function 1");
440
441 if (MPFR_IS_NEG (x))
442 *threshold1 = INT_MIN;
443 else
444 *threshold3 = INT_MAX;
445 *threshold2 = INT_MAX;
446 t2 = mpfr_speed_measure (func, &s, "function 2");
447
448 /* t1 is the time of the first algo (used for low prec) */
449 if (t2 >= t1)
450 d = (t2 - t1) / t2;
451 else
452 d = (t2 - t1) / t1;
453 /* d > 0 if we have to use algo 1.
454 d < 0 if we have to use algo 2 */
455 mpfr_clear (xtmp);
456 return d;
457 }
458
459 /* Tune a function with a simple THRESHOLD
460 The function doesn't depend on another threshold.
461 It assumes that it uses algo1 if p < THRESHOLD
462 and algo2 otherwise.
463 if algo2 is better for low prec, and algo1 better for high prec,
464 the behaviour of this function is undefined. */
465 static void
466 tune_simple_func (mpfr_prec_t *threshold,
467 double (*func) (struct speed_params *),
468 mpfr_prec_t pstart)
469 {
470 double measure[THRESHOLD_FINAL_WINDOW+1];
471 double d;
472 mpfr_prec_t pstep;
473 int i, numpos, numneg, try;
474 mpfr_prec_t pmin, pmax, p;
475
476 /* first look for a lower bound within 10% */
477 pmin = p = pstart;
478 d = domeasure (threshold, func, pmin);
479 if (d < 0.0)
480 {
481 if (verbose)
482 printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
483 (unsigned long) pmin);
484 *threshold = MPFR_PREC_MIN;
485 return;
486 }
487 if (d >= 1.00)
488 for (;;)
489 {
490 d = domeasure (threshold, func, pmin);
491 if (d < 1.00)
492 break;
493 p = pmin;
494 pmin += pmin/2;
495 }
496 pmin = p;
497 for (;;)
498 {
499 d = domeasure (threshold, func, pmin);
500 if (d < 0.10)
501 break;
502 pmin += GMP_NUMB_BITS;
503 }
504
505 /* then look for an upper bound within 20% */
506 pmax = pmin * 2;
507 for (;;)
508 {
509 d = domeasure (threshold, func, pmax);
510 if (d < -0.20)
511 break;
512 pmax += pmin / 2; /* don't increase too rapidly */
513 }
514
515 /* The threshold is between pmin and pmax. Affine them */
516 try = 0;
517 while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
518 {
519 pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
520 if (verbose)
521 printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
522 p = (pmin + pmax) / 2;
523 for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
524 {
525 measure[i] = domeasure (threshold, func,
526 p+(i-THRESHOLD_WINDOW/2)*pstep);
527 if (measure[i] > 0)
528 numpos ++;
529 else if (measure[i] < 0)
530 numneg ++;
531 }
532 if (numpos > numneg)
533 /* We use more often algo 1 than algo 2 */
534 pmin = p - THRESHOLD_WINDOW/2*pstep;
535 else if (numpos < numneg)
536 pmax = p + THRESHOLD_WINDOW/2*pstep;
537 else
538 /* numpos == numneg ... */
539 if (++ try > 2)
540 {
541 *threshold = p;
542 if (verbose)
543 printf ("Quick find: %lu\n", *threshold);
544 return ;
545 }
546 }
547
548 /* Final tune... */
549 if (verbose)
550 printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
551 for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
552 measure[i] = domeasure (threshold, func, pmin+i);
553 i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
554 *threshold = pmin + i;
555 if (verbose)
556 printf ("%lu\n", *threshold);
557 return;
558 }
559
560 /* Tune a function which behavior depends on both p and x,
561 in a given direction.
562 It assumes that for (x,p) close to zero, algo1 is used
563 and algo2 is used when (x,p) is far from zero.
564 If algo2 is better for low prec, and algo1 better for high prec,
565 the behaviour of this function is undefined.
566 This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
567 until it finds a point on the boundary. It returns ell.
568 */
569 static void
570 tune_simple_func_in_some_direction (long int *threshold1,
571 long int *threshold2,
572 long int *threshold3,
573 double (*func) (struct speed_params *),
574 mpfr_prec_t pstart,
575 int dirx, int dirp,
576 mpfr_t xres, mpfr_prec_t *pres)
577 {
578 double measure[THRESHOLD_FINAL_WINDOW+1];
579 double d;
580 mpfr_prec_t pstep;
581 int i, numpos, numneg, try;
582 mpfr_prec_t pmin, pmax, p;
583 mpfr_t xmin, xmax, x;
584 mpfr_t ratio;
585
586 mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
587 mpfr_set_si (ratio, dirx, MPFR_RNDN);
588 mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
589
590 mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
591 mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
592 mpfr_init2 (x, MPFR_SMALL_PRECISION);
593
594 /* first look for a lower bound within 10% */
595 pmin = p = pstart;
596 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
597 mpfr_set (x, xmin, MPFR_RNDN);
598
599 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
600 if (d < 0.0)
601 {
602 if (verbose)
603 printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
604 (unsigned long) pmin);
605 *pres = MPFR_PREC_MIN;
606 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
607 mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
608 return;
609 }
610 if (d >= 1.00)
611 for (;;)
612 {
613 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
614 if (d < 1.00)
615 break;
616 p = pmin;
617 mpfr_set (x, xmin, MPFR_RNDN);
618 pmin += pmin/2;
619 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
620 }
621 pmin = p;
622 mpfr_set (xmin, x, MPFR_RNDN);
623 for (;;)
624 {
625 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
626 if (d < 0.10)
627 break;
628 pmin += GMP_NUMB_BITS;
629 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
630 }
631
632 /* then look for an upper bound within 20% */
633 pmax = pmin * 2;
634 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
635 for (;;)
636 {
637 d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
638 if (d < -0.20)
639 break;
640 pmax += pmin / 2; /* don't increase too rapidly */
641 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
642 }
643
644 /* The threshold is between pmin and pmax. Affine them */
645 try = 0;
646 while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
647 {
648 pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
649 if (verbose)
650 printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
651 p = (pmin + pmax) / 2;
652 mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
653 for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
654 {
655 *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
656 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
657 measure[i] = domeasure2 (threshold1, threshold2, threshold3,
658 func, *pres, xres);
659 if (measure[i] > 0)
660 numpos ++;
661 else if (measure[i] < 0)
662 numneg ++;
663 }
664 if (numpos > numneg)
665 {
666 /* We use more often algo 1 than algo 2 */
667 pmin = p - THRESHOLD_WINDOW/2*pstep;
668 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
669 }
670 else if (numpos < numneg)
671 {
672 pmax = p + THRESHOLD_WINDOW/2*pstep;
673 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
674 }
675 else
676 /* numpos == numneg ... */
677 if (++ try > 2)
678 {
679 *pres = p;
680 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
681 if (verbose)
682 printf ("Quick find: %lu\n", *pres);
683 mpfr_clear (ratio);
684 mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
685 return ;
686 }
687 }
688
689 /* Final tune... */
690 if (verbose)
691 printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
692 for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
693 {
694 *pres = pmin+i;
695 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
696 measure[i] = domeasure2 (threshold1, threshold2, threshold3,
697 func, *pres, xres);
698 }
699 i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
700 *pres = pmin + i;
701 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
702 if (verbose)
703 printf ("%lu\n", *pres);
704 mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
705 return;
706 }
707
708 /************************************
709 * Tune Mulders' mulhigh function *
710 ************************************/
711 #define TOLERANCE 1.00
712 #define MULDERS_TABLE_SIZE 1024
713 #ifndef MPFR_MULHIGH_SIZE
714 # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
715 #endif
716 #ifndef MPFR_SQRHIGH_SIZE
717 # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
718 #endif
719 #ifndef MPFR_DIVHIGH_SIZE
720 # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
721 #endif
722 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
723 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
724 #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
725 #include "mulders.c"
726
727 static double
728 speed_mpfr_mulhigh (struct speed_params *s)
729 {
730 SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
731 }
732
733 static double
734 speed_mpfr_sqrhigh (struct speed_params *s)
735 {
736 SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
737 }
738
739 static double
740 speed_mpfr_divhigh (struct speed_params *s)
741 {
742 SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
743 }
744
745 #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
746
747 /* Tune mpfr_mulhigh_n for size n */
748 static mp_size_t
749 tune_mul_mulders_upto (mp_size_t n)
750 {
751 struct speed_params s;
752 mp_size_t k, kbest, step;
753 double t, tbest;
754 MPFR_TMP_DECL (marker);
755
756 if (n == 0)
757 return -1;
758
759 MPFR_TMP_MARK (marker);
760 s.align_xp = s.align_yp = s.align_wp = 64;
761 s.size = n;
762 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
763 s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
764 mpn_random (s.xp, n);
765 mpn_random (s.yp, n);
766
767 /* Check k == -1, mpn_mul_basecase */
768 mulhigh_ktab[n] = -1;
769 kbest = -1;
770 tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
771
772 /* Check k == 0, mpn_mulhigh_n_basecase */
773 mulhigh_ktab[n] = 0;
774 t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
775 if (t * TOLERANCE < tbest)
776 kbest = 0, tbest = t;
777
778 /* Check Mulders with cutoff point k */
779 step = 1 + n / (2 * MAX_STEPS);
780 /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
781 for (k = (n + 4) / 2 ; k < n ; k += step)
782 {
783 mulhigh_ktab[n] = k;
784 t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
785 if (t * TOLERANCE < tbest)
786 kbest = k, tbest = t;
787 }
788
789 mulhigh_ktab[n] = kbest;
790
791 MPFR_TMP_FREE (marker);
792 return kbest;
793 }
794
795 /* Tune mpfr_sqrhigh_n for size n */
796 static mp_size_t
797 tune_sqr_mulders_upto (mp_size_t n)
798 {
799 struct speed_params s;
800 mp_size_t k, kbest, step;
801 double t, tbest;
802 MPFR_TMP_DECL (marker);
803
804 if (n == 0)
805 return -1;
806
807 MPFR_TMP_MARK (marker);
808 s.align_xp = s.align_wp = 64;
809 s.size = n;
810 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
811 mpn_random (s.xp, n);
812
813 /* Check k == -1, mpn_sqr_basecase */
814 sqrhigh_ktab[n] = -1;
815 kbest = -1;
816 tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
817
818 /* Check k == 0, mpfr_mulhigh_n_basecase */
819 sqrhigh_ktab[n] = 0;
820 t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
821 if (t * TOLERANCE < tbest)
822 kbest = 0, tbest = t;
823
824 /* Check Mulders */
825 step = 1 + n / (2 * MAX_STEPS);
826 /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
827 for (k = (n + 4) / 2 ; k < n ; k += step)
828 {
829 sqrhigh_ktab[n] = k;
830 t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
831 if (t * TOLERANCE < tbest)
832 kbest = k, tbest = t;
833 }
834
835 sqrhigh_ktab[n] = kbest;
836
837 MPFR_TMP_FREE (marker);
838 return kbest;
839 }
840
841 /* Tune mpfr_divhigh_n for size n */
842 static mp_size_t
843 tune_div_mulders_upto (mp_size_t n)
844 {
845 struct speed_params s;
846 mp_size_t k, kbest, step;
847 double t, tbest;
848 MPFR_TMP_DECL (marker);
849
850 if (n == 0)
851 return 0;
852
853 MPFR_TMP_MARK (marker);
854 s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
855 s.size = n;
856 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
857 s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
858 mpn_random (s.xp, n);
859 mpn_random (s.yp, n);
860
861 /* Check k == n, i.e., mpn_divrem */
862 divhigh_ktab[n] = n;
863 kbest = n;
864 tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
865
866 /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
867 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
868 if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
869 #endif
870 {
871 divhigh_ktab[n] = 0;
872 t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
873 if (t * TOLERANCE < tbest)
874 kbest = 0, tbest = t;
875 }
876
877 /* Check Mulders */
878 step = 1 + n / (2 * MAX_STEPS);
879 /* we should have (n+3)/2 <= k < n, which translates into
880 (n+4)/2 <= k < n in C */
881 for (k = (n + 4) / 2 ; k < n ; k += step)
882 {
883 divhigh_ktab[n] = k;
884 t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
885 if (t * TOLERANCE < tbest)
886 kbest = k, tbest = t;
887 }
888
889 divhigh_ktab[n] = kbest;
890
891 MPFR_TMP_FREE (marker);
892
893 return kbest;
894 }
895
896 static void
897 tune_mul_mulders (FILE *f)
898 {
899 mp_size_t k;
900
901 if (verbose)
902 printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
903 fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
904 for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
905 {
906 fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
907 if (k != MPFR_MULHIGH_TAB_SIZE-1)
908 fputc (',', f);
909 if ((k+1) % 16 == 0)
910 fprintf (f, " \\\n ");
911 if (verbose)
912 putchar ('.');
913 }
914 fprintf (f, " \n");
915 if (verbose)
916 putchar ('\n');
917 }
918
919 static void
920 tune_sqr_mulders (FILE *f)
921 {
922 mp_size_t k;
923
924 if (verbose)
925 printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
926 fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n ");
927 for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
928 {
929 fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
930 if (k != MPFR_SQRHIGH_TAB_SIZE-1)
931 fputc (',', f);
932 if ((k+1) % 16 == 0)
933 fprintf (f, " \\\n ");
934 if (verbose)
935 putchar ('.');
936 }
937 fprintf (f, " \n");
938 if (verbose)
939 putchar ('\n');
940 }
941
942 static void
943 tune_div_mulders (FILE *f)
944 {
945 mp_size_t k;
946
947 if (verbose)
948 printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
949 fprintf (f, "#define MPFR_DIVHIGH_TAB \\\n ");
950 for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
951 {
952 fprintf (f, "%d", (int) tune_div_mulders_upto (k));
953 if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
954 fputc (',', f);
955 if ((k+1) % 16 == 0)
956 fprintf (f, " /*%zu-%zu*/ \\\n ", k - 15, k);
957 if (verbose)
958 putchar ('.');
959 }
960 fprintf (f, " \n");
961 if (verbose)
962 putchar ('\n');
963 }
964
965 /*******************************************************
966 * Tuning functions for mpfr_ai *
967 *******************************************************/
968
969 long int mpfr_ai_threshold1;
970 long int mpfr_ai_threshold2;
971 long int mpfr_ai_threshold3;
972 #undef MPFR_AI_THRESHOLD1
973 #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
974 #undef MPFR_AI_THRESHOLD2
975 #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
976 #undef MPFR_AI_THRESHOLD3
977 #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
978
979 #include "ai.c"
980
981 static double
982 speed_mpfr_ai (struct speed_params *s)
983 {
984 SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
985 }
986
987
988 /*******************************************************
989 * Tune all the threshold of MPFR *
990 * Warning: tune the function in their dependent order!*
991 *******************************************************/
992 static void
993 all (const char *filename)
994 {
995 FILE *f;
996 time_t start_time, end_time;
997 struct tm *tp;
998 mpfr_t x1, x2, x3, tmp1, tmp2;
999 mpfr_prec_t p1, p2, p3;
1000
1001 f = fopen (filename, "w");
1002 if (f == NULL)
1003 {
1004 fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
1005 abort ();
1006 }
1007
1008 speed_time_init ();
1009 if (verbose)
1010 {
1011 printf ("Using: %s\n", speed_time_string);
1012 printf ("speed_precision %d", speed_precision);
1013 if (speed_unittime == 1.0)
1014 printf (", speed_unittime 1 cycle");
1015 else
1016 printf (", speed_unittime %.2e secs", speed_unittime);
1017 if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1018 printf (", CPU freq unknown\n");
1019 else
1020 printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
1021 }
1022
1023 time (&start_time);
1024 tp = localtime (&start_time);
1025 fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
1026 tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
1027
1028 #ifdef __ICC
1029 fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
1030 #elif defined(__GNUC__)
1031 #ifdef __GNUC_PATCHLEVEL__
1032 fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
1033 __GNUC_PATCHLEVEL__);
1034 #else
1035 fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
1036 #endif
1037 #elif defined (__SUNPRO_C)
1038 fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
1039 #elif defined (__sgi) && defined (_COMPILER_VERSION)
1040 fprintf (f, "MIPSpro C %d.%d.%d */\n",
1041 _COMPILER_VERSION / 100,
1042 _COMPILER_VERSION / 10 % 10,
1043 _COMPILER_VERSION % 10);
1044 #elif defined (__DECC) && defined (__DECC_VER)
1045 fprintf (f, "DEC C %d */\n", __DECC_VER);
1046 #else
1047 fprintf (f, "system compiler */\n");
1048 #endif
1049 fprintf (f, "\n\n");
1050 fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
1051 fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
1052 fprintf (f, "#endif\n\n");
1053
1054 /* Tune mulhigh */
1055 tune_mul_mulders (f);
1056
1057 /* Tune sqrhigh */
1058 tune_sqr_mulders (f);
1059
1060 /* Tune divhigh */
1061 tune_div_mulders (f);
1062 fflush (f);
1063
1064 /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
1065 if (verbose)
1066 printf ("Tuning mpfr_mul...\n");
1067 tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
1068 2*GMP_NUMB_BITS+1);
1069 fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
1070 (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
1071
1072 /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
1073 if (verbose)
1074 printf ("Tuning mpfr_sqr...\n");
1075 tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
1076 2*GMP_NUMB_BITS+1);
1077 fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
1078 (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
1079
1080 /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
1081 if (verbose)
1082 printf ("Tuning mpfr_div...\n");
1083 tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
1084 2*GMP_NUMB_BITS+1);
1085 fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
1086 (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
1087
1088 /* Tune mpfr_exp_2 */
1089 if (verbose)
1090 printf ("Tuning mpfr_exp_2...\n");
1091 tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
1092 fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
1093 (unsigned long) mpfr_exp_2_threshold);
1094
1095 /* Tune mpfr_exp */
1096 if (verbose)
1097 printf ("Tuning mpfr_exp...\n");
1098 tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
1099 MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1100 fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
1101 (unsigned long) mpfr_exp_threshold);
1102
1103 /* Tune mpfr_sin_cos */
1104 if (verbose)
1105 printf ("Tuning mpfr_sin_cos...\n");
1106 tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
1107 MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1108 fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
1109 (unsigned long) mpfr_sincos_threshold);
1110
1111 /* Tune mpfr_ai */
1112 if (verbose)
1113 printf ("Tuning mpfr_ai...\n");
1114 mpfr_init2 (x1, MPFR_SMALL_PRECISION);
1115 mpfr_init2 (x2, MPFR_SMALL_PRECISION);
1116 mpfr_init2 (x3, MPFR_SMALL_PRECISION);
1117 mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
1118 mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
1119
1120 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1121 &mpfr_ai_threshold3, speed_mpfr_ai,
1122 MPFR_PREC_MIN+GMP_NUMB_BITS,
1123 -60, 200, x1, &p1);
1124 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1125 &mpfr_ai_threshold3, speed_mpfr_ai,
1126 MPFR_PREC_MIN+GMP_NUMB_BITS,
1127 -20, 500, x2, &p2);
1128 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1129 &mpfr_ai_threshold3, speed_mpfr_ai,
1130 MPFR_PREC_MIN+GMP_NUMB_BITS,
1131 40, 200, x3, &p3);
1132
1133 mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
1134 mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
1135 mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
1136 mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
1137
1138 mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
1139 mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
1140 mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1141 mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
1142
1143 mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
1144 mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1145 mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
1146
1147 mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
1148 mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
1149 mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
1150 mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
1151 mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
1152
1153 fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
1154 fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
1155 fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
1156
1157 mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
1158 mpfr_clear (tmp1); mpfr_clear (tmp2);
1159
1160 /* End of tuning */
1161 time (&end_time);
1162 fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
1163 (long) (end_time - start_time));
1164 if (verbose)
1165 printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
1166
1167 fclose (f);
1168 }
1169
1170
1171 /* Main function */
1172 int main (int argc, char *argv[])
1173 {
1174 /* Unbuffered so if output is redirected to a file it isn't lost if the
1175 program is killed part way through. */
1176 setbuf (stdout, NULL);
1177 setbuf (stderr, NULL);
1178
1179 verbose = argc > 1;
1180
1181 if (verbose)
1182 printf ("Tuning MPFR (Coffee time?)...\n");
1183
1184 all ("mparam.h");
1185
1186 return 0;
1187 }
1188