tuneup.c revision 1.1.1.1 1 1.1 mrg /* Tune various threshold of MPFR
2 1.1 mrg
3 1.1 mrg Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 1.1 mrg Contributed by the AriC and Caramel projects, INRIA.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MPFR Library.
7 1.1 mrg
8 1.1 mrg The GNU MPFR Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MPFR Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 1.1 mrg http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 1.1 mrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 1.1 mrg
23 1.1 mrg #include <stdlib.h>
24 1.1 mrg #include <time.h>
25 1.1 mrg
26 1.1 mrg #define MPFR_NEED_LONGLONG_H
27 1.1 mrg #include "mpfr-impl.h"
28 1.1 mrg
29 1.1 mrg #undef _PROTO
30 1.1 mrg #define _PROTO __GMP_PROTO
31 1.1 mrg #include "speed.h"
32 1.1 mrg
33 1.1 mrg int verbose;
34 1.1 mrg
35 1.1 mrg /* template for an unary function */
36 1.1 mrg /* s->size: precision of both input and output
37 1.1 mrg s->xp : Mantissa of first input
38 1.1 mrg s->yp : mantissa of second input */
39 1.1 mrg #define SPEED_MPFR_FUNC(mean_fun) \
40 1.1 mrg do \
41 1.1 mrg { \
42 1.1 mrg unsigned i; \
43 1.1 mrg mpfr_limb_ptr wp; \
44 1.1 mrg double t; \
45 1.1 mrg mpfr_t w, x; \
46 1.1 mrg mp_size_t size; \
47 1.1 mrg MPFR_TMP_DECL (marker); \
48 1.1 mrg \
49 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
50 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
51 1.1 mrg MPFR_TMP_MARK (marker); \
52 1.1 mrg \
53 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
54 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
55 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
56 1.1 mrg MPFR_SET_EXP (x, 0); \
57 1.1 mrg \
58 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
59 1.1 mrg \
60 1.1 mrg speed_operand_src (s, s->xp, size); \
61 1.1 mrg speed_operand_dst (s, wp, size); \
62 1.1 mrg speed_cache_fill (s); \
63 1.1 mrg \
64 1.1 mrg speed_starttime (); \
65 1.1 mrg i = s->reps; \
66 1.1 mrg do \
67 1.1 mrg mean_fun (w, x, MPFR_RNDN); \
68 1.1 mrg while (--i != 0); \
69 1.1 mrg t = speed_endtime (); \
70 1.1 mrg \
71 1.1 mrg MPFR_TMP_FREE (marker); \
72 1.1 mrg return t; \
73 1.1 mrg } \
74 1.1 mrg while (0)
75 1.1 mrg
76 1.1 mrg /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
77 1.1 mrg #define SPEED_MPFR_FUNC2(mean_fun) \
78 1.1 mrg do \
79 1.1 mrg { \
80 1.1 mrg unsigned i; \
81 1.1 mrg mpfr_limb_ptr vp, wp; \
82 1.1 mrg double t; \
83 1.1 mrg mpfr_t v, w, x; \
84 1.1 mrg mp_size_t size; \
85 1.1 mrg MPFR_TMP_DECL (marker); \
86 1.1 mrg \
87 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
88 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
89 1.1 mrg MPFR_TMP_MARK (marker); \
90 1.1 mrg \
91 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
92 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
93 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
94 1.1 mrg MPFR_SET_EXP (x, 0); \
95 1.1 mrg \
96 1.1 mrg MPFR_TMP_INIT (vp, v, s->size, size); \
97 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
98 1.1 mrg \
99 1.1 mrg speed_operand_src (s, s->xp, size); \
100 1.1 mrg speed_operand_dst (s, vp, size); \
101 1.1 mrg speed_operand_dst (s, wp, size); \
102 1.1 mrg speed_cache_fill (s); \
103 1.1 mrg \
104 1.1 mrg speed_starttime (); \
105 1.1 mrg i = s->reps; \
106 1.1 mrg do \
107 1.1 mrg mean_fun (v, w, x, MPFR_RNDN); \
108 1.1 mrg while (--i != 0); \
109 1.1 mrg t = speed_endtime (); \
110 1.1 mrg \
111 1.1 mrg MPFR_TMP_FREE (marker); \
112 1.1 mrg return t; \
113 1.1 mrg } \
114 1.1 mrg while (0)
115 1.1 mrg
116 1.1 mrg /* template for a function like mpfr_mul */
117 1.1 mrg #define SPEED_MPFR_OP(mean_fun) \
118 1.1 mrg do \
119 1.1 mrg { \
120 1.1 mrg unsigned i; \
121 1.1 mrg mpfr_limb_ptr wp; \
122 1.1 mrg double t; \
123 1.1 mrg mpfr_t w, x, y; \
124 1.1 mrg mp_size_t size; \
125 1.1 mrg MPFR_TMP_DECL (marker); \
126 1.1 mrg \
127 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
128 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
129 1.1 mrg MPFR_TMP_MARK (marker); \
130 1.1 mrg \
131 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
132 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
133 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
134 1.1 mrg MPFR_SET_EXP (x, 0); \
135 1.1 mrg s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
136 1.1 mrg MPFR_TMP_INIT1 (s->yp, y, s->size); \
137 1.1 mrg MPFR_SET_EXP (y, 0); \
138 1.1 mrg \
139 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
140 1.1 mrg \
141 1.1 mrg speed_operand_src (s, s->xp, size); \
142 1.1 mrg speed_operand_src (s, s->yp, size); \
143 1.1 mrg speed_operand_dst (s, wp, size); \
144 1.1 mrg speed_cache_fill (s); \
145 1.1 mrg \
146 1.1 mrg speed_starttime (); \
147 1.1 mrg i = s->reps; \
148 1.1 mrg do \
149 1.1 mrg mean_fun (w, x, y, MPFR_RNDN); \
150 1.1 mrg while (--i != 0); \
151 1.1 mrg t = speed_endtime (); \
152 1.1 mrg \
153 1.1 mrg MPFR_TMP_FREE (marker); \
154 1.1 mrg return t; \
155 1.1 mrg } \
156 1.1 mrg while (0)
157 1.1 mrg
158 1.1 mrg /* special template for mpfr_mul(a,b,b) */
159 1.1 mrg #define SPEED_MPFR_SQR(mean_fun) \
160 1.1 mrg do \
161 1.1 mrg { \
162 1.1 mrg unsigned i; \
163 1.1 mrg mpfr_limb_ptr wp; \
164 1.1 mrg double t; \
165 1.1 mrg mpfr_t w, x; \
166 1.1 mrg mp_size_t size; \
167 1.1 mrg MPFR_TMP_DECL (marker); \
168 1.1 mrg \
169 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
170 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
171 1.1 mrg MPFR_TMP_MARK (marker); \
172 1.1 mrg \
173 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
174 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
175 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
176 1.1 mrg MPFR_SET_EXP (x, 0); \
177 1.1 mrg \
178 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
179 1.1 mrg \
180 1.1 mrg speed_operand_src (s, s->xp, size); \
181 1.1 mrg speed_operand_dst (s, wp, size); \
182 1.1 mrg speed_cache_fill (s); \
183 1.1 mrg \
184 1.1 mrg speed_starttime (); \
185 1.1 mrg i = s->reps; \
186 1.1 mrg do \
187 1.1 mrg mean_fun (w, x, x, MPFR_RNDN); \
188 1.1 mrg while (--i != 0); \
189 1.1 mrg t = speed_endtime (); \
190 1.1 mrg \
191 1.1 mrg MPFR_TMP_FREE (marker); \
192 1.1 mrg return t; \
193 1.1 mrg } \
194 1.1 mrg while (0)
195 1.1 mrg
196 1.1 mrg /* s->size: precision of both input and output
197 1.1 mrg s->xp : Mantissa of first input
198 1.1 mrg s->r : exponent
199 1.1 mrg s->align_xp : sign (1 means positive, 2 means negative)
200 1.1 mrg */
201 1.1 mrg #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \
202 1.1 mrg do \
203 1.1 mrg { \
204 1.1 mrg unsigned i; \
205 1.1 mrg mpfr_limb_ptr wp; \
206 1.1 mrg double t; \
207 1.1 mrg mpfr_t w, x; \
208 1.1 mrg mp_size_t size; \
209 1.1 mrg MPFR_TMP_DECL (marker); \
210 1.1 mrg \
211 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
212 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
213 1.1 mrg MPFR_TMP_MARK (marker); \
214 1.1 mrg \
215 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
216 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
217 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
218 1.1 mrg MPFR_SET_EXP (x, s->r); \
219 1.1 mrg if (s->align_xp == 2) MPFR_SET_NEG (x); \
220 1.1 mrg \
221 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
222 1.1 mrg \
223 1.1 mrg speed_operand_src (s, s->xp, size); \
224 1.1 mrg speed_operand_dst (s, wp, size); \
225 1.1 mrg speed_cache_fill (s); \
226 1.1 mrg \
227 1.1 mrg speed_starttime (); \
228 1.1 mrg i = s->reps; \
229 1.1 mrg do \
230 1.1 mrg mean_fun (w, x, MPFR_RNDN); \
231 1.1 mrg while (--i != 0); \
232 1.1 mrg t = speed_endtime (); \
233 1.1 mrg \
234 1.1 mrg MPFR_TMP_FREE (marker); \
235 1.1 mrg return t; \
236 1.1 mrg } \
237 1.1 mrg while (0)
238 1.1 mrg
239 1.1 mrg /* First we include all the functions we want to tune inside this program.
240 1.1 mrg We can't use the GNU MPFR library since the thresholds are fixed macros. */
241 1.1 mrg
242 1.1 mrg /* Setup mpfr_exp_2 */
243 1.1 mrg mpfr_prec_t mpfr_exp_2_threshold;
244 1.1 mrg #undef MPFR_EXP_2_THRESHOLD
245 1.1 mrg #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
246 1.1 mrg #include "exp_2.c"
247 1.1 mrg static double
248 1.1 mrg speed_mpfr_exp_2 (struct speed_params *s)
249 1.1 mrg {
250 1.1 mrg SPEED_MPFR_FUNC (mpfr_exp_2);
251 1.1 mrg }
252 1.1 mrg
253 1.1 mrg /* Setup mpfr_exp */
254 1.1 mrg mpfr_prec_t mpfr_exp_threshold;
255 1.1 mrg #undef MPFR_EXP_THRESHOLD
256 1.1 mrg #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
257 1.1 mrg #include "exp.c"
258 1.1 mrg static double
259 1.1 mrg speed_mpfr_exp (struct speed_params *s)
260 1.1 mrg {
261 1.1 mrg SPEED_MPFR_FUNC (mpfr_exp);
262 1.1 mrg }
263 1.1 mrg
264 1.1 mrg /* Setup mpfr_sin_cos */
265 1.1 mrg mpfr_prec_t mpfr_sincos_threshold;
266 1.1 mrg #undef MPFR_SINCOS_THRESHOLD
267 1.1 mrg #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
268 1.1 mrg #include "sin_cos.c"
269 1.1 mrg #include "cos.c"
270 1.1 mrg static double
271 1.1 mrg speed_mpfr_sincos (struct speed_params *s)
272 1.1 mrg {
273 1.1 mrg SPEED_MPFR_FUNC2 (mpfr_sin_cos);
274 1.1 mrg }
275 1.1 mrg
276 1.1 mrg /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
277 1.1 mrg mpfr_prec_t mpfr_mul_threshold;
278 1.1 mrg mpfr_prec_t mpfr_sqr_threshold;
279 1.1 mrg mpfr_prec_t mpfr_div_threshold;
280 1.1 mrg #undef MPFR_MUL_THRESHOLD
281 1.1 mrg #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
282 1.1 mrg #undef MPFR_SQR_THRESHOLD
283 1.1 mrg #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
284 1.1 mrg #undef MPFR_DIV_THRESHOLD
285 1.1 mrg #define MPFR_DIV_THRESHOLD mpfr_div_threshold
286 1.1 mrg #include "mul.c"
287 1.1 mrg #include "div.c"
288 1.1 mrg static double
289 1.1 mrg speed_mpfr_mul (struct speed_params *s)
290 1.1 mrg {
291 1.1 mrg SPEED_MPFR_OP (mpfr_mul);
292 1.1 mrg }
293 1.1 mrg static double
294 1.1 mrg speed_mpfr_sqr (struct speed_params *s)
295 1.1 mrg {
296 1.1 mrg SPEED_MPFR_SQR (mpfr_mul);
297 1.1 mrg }
298 1.1 mrg static double
299 1.1 mrg speed_mpfr_div (struct speed_params *s)
300 1.1 mrg {
301 1.1 mrg SPEED_MPFR_OP (mpfr_div);
302 1.1 mrg }
303 1.1 mrg
304 1.1 mrg /************************************************
305 1.1 mrg * Common functions (inspired by GMP function) *
306 1.1 mrg ************************************************/
307 1.1 mrg static int
308 1.1 mrg analyze_data (double *dat, int ndat)
309 1.1 mrg {
310 1.1 mrg double x, min_x;
311 1.1 mrg int j, min_j;
312 1.1 mrg
313 1.1 mrg x = 0.0;
314 1.1 mrg for (j = 0; j < ndat; j++)
315 1.1 mrg if (dat[j] > 0.0)
316 1.1 mrg x += dat[j];
317 1.1 mrg
318 1.1 mrg min_x = x;
319 1.1 mrg min_j = 0;
320 1.1 mrg
321 1.1 mrg for (j = 0; j < ndat; x -= dat[j], j++)
322 1.1 mrg {
323 1.1 mrg if (x < min_x)
324 1.1 mrg {
325 1.1 mrg min_x = x;
326 1.1 mrg min_j = j;
327 1.1 mrg }
328 1.1 mrg }
329 1.1 mrg return min_j;
330 1.1 mrg }
331 1.1 mrg
332 1.1 mrg static double
333 1.1 mrg mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
334 1.1 mrg {
335 1.1 mrg double t = -1.0;
336 1.1 mrg int i;
337 1.1 mrg int number_of_iterations = 30;
338 1.1 mrg for (i = 1; i <= number_of_iterations && t == -1.0; i++)
339 1.1 mrg {
340 1.1 mrg t = speed_measure (fun, s);
341 1.1 mrg if ( (t == -1.0) && (i+1 <= number_of_iterations) )
342 1.1 mrg printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
343 1.1 mrg s->size, i+1, number_of_iterations);
344 1.1 mrg }
345 1.1 mrg if (t == -1.0)
346 1.1 mrg {
347 1.1 mrg fprintf (stderr, "Failed to measure %s!\n", m);
348 1.1 mrg fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
349 1.1 mrg fprintf (stderr, " under Linux: cpufreq-selector -g performance\n");
350 1.1 mrg fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
351 1.1 mrg abort ();
352 1.1 mrg }
353 1.1 mrg return t;
354 1.1 mrg }
355 1.1 mrg
356 1.1 mrg #define THRESHOLD_WINDOW 16
357 1.1 mrg #define THRESHOLD_FINAL_WINDOW 128
358 1.1 mrg static double
359 1.1 mrg domeasure (mpfr_prec_t *threshold,
360 1.1 mrg double (*func) (struct speed_params *),
361 1.1 mrg mpfr_prec_t p)
362 1.1 mrg {
363 1.1 mrg struct speed_params s;
364 1.1 mrg mp_size_t size;
365 1.1 mrg double t1, t2, d;
366 1.1 mrg
367 1.1 mrg s.align_xp = s.align_yp = s.align_wp = 64;
368 1.1 mrg s.size = p;
369 1.1 mrg size = (p - 1)/GMP_NUMB_BITS+1;
370 1.1 mrg s.xp = malloc (2*size*sizeof (mp_limb_t));
371 1.1 mrg if (s.xp == NULL)
372 1.1 mrg {
373 1.1 mrg fprintf (stderr, "Can't allocate memory.\n");
374 1.1 mrg abort ();
375 1.1 mrg }
376 1.1 mrg mpn_random (s.xp, size);
377 1.1 mrg s.yp = s.xp + size;
378 1.1 mrg mpn_random (s.yp, size);
379 1.1 mrg *threshold = MPFR_PREC_MAX;
380 1.1 mrg t1 = mpfr_speed_measure (func, &s, "function 1");
381 1.1 mrg *threshold = 1;
382 1.1 mrg t2 = mpfr_speed_measure (func, &s, "function 2");
383 1.1 mrg free (s.xp);
384 1.1 mrg /* t1 is the time of the first algo (used for low prec) */
385 1.1 mrg if (t2 >= t1)
386 1.1 mrg d = (t2 - t1) / t2;
387 1.1 mrg else
388 1.1 mrg d = (t2 - t1) / t1;
389 1.1 mrg /* d > 0 if we have to use algo 1.
390 1.1 mrg d < 0 if we have to use algo 2 */
391 1.1 mrg return d;
392 1.1 mrg }
393 1.1 mrg
394 1.1 mrg /* Performs measures when both the precision and the point of evaluation
395 1.1 mrg shall vary. s.yp is ignored and not initialized.
396 1.1 mrg It assumes that func depends on three thresholds with a boundary of the
397 1.1 mrg form threshold1*x + threshold2*p = some scaling factor, if x<0,
398 1.1 mrg and threshold3*x + threshold2*p = some scaling factor, if x>=0.
399 1.1 mrg */
400 1.1 mrg static double
401 1.1 mrg domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
402 1.1 mrg double (*func) (struct speed_params *),
403 1.1 mrg mpfr_prec_t p,
404 1.1 mrg mpfr_t x)
405 1.1 mrg {
406 1.1 mrg struct speed_params s;
407 1.1 mrg mp_size_t size;
408 1.1 mrg double t1, t2, d;
409 1.1 mrg mpfr_t xtmp;
410 1.1 mrg
411 1.1 mrg if (MPFR_IS_SINGULAR (x))
412 1.1 mrg {
413 1.1 mrg mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
414 1.1 mrg abort ();
415 1.1 mrg }
416 1.1 mrg if (MPFR_IS_NEG (x))
417 1.1 mrg s.align_xp = 2;
418 1.1 mrg else
419 1.1 mrg s.align_xp = 1;
420 1.1 mrg
421 1.1 mrg s.align_yp = s.align_wp = 64;
422 1.1 mrg s.size = p;
423 1.1 mrg size = (p - 1)/GMP_NUMB_BITS+1;
424 1.1 mrg
425 1.1 mrg mpfr_init2 (xtmp, p);
426 1.1 mrg mpn_random (xtmp->_mpfr_d, size);
427 1.1 mrg xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
428 1.1 mrg MPFR_SET_EXP (xtmp, -53);
429 1.1 mrg mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
430 1.1 mrg mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */
431 1.1 mrg /* where perturb ~ 2^(-53) is */
432 1.1 mrg /* randomly chosen. */
433 1.1 mrg s.xp = xtmp->_mpfr_d;
434 1.1 mrg s.r = MPFR_GET_EXP (xtmp);
435 1.1 mrg
436 1.1 mrg *threshold1 = 0;
437 1.1 mrg *threshold2 = 0;
438 1.1 mrg *threshold3 = 0;
439 1.1 mrg t1 = mpfr_speed_measure (func, &s, "function 1");
440 1.1 mrg
441 1.1 mrg if (MPFR_IS_NEG (x))
442 1.1 mrg *threshold1 = INT_MIN;
443 1.1 mrg else
444 1.1 mrg *threshold3 = INT_MAX;
445 1.1 mrg *threshold2 = INT_MAX;
446 1.1 mrg t2 = mpfr_speed_measure (func, &s, "function 2");
447 1.1 mrg
448 1.1 mrg /* t1 is the time of the first algo (used for low prec) */
449 1.1 mrg if (t2 >= t1)
450 1.1 mrg d = (t2 - t1) / t2;
451 1.1 mrg else
452 1.1 mrg d = (t2 - t1) / t1;
453 1.1 mrg /* d > 0 if we have to use algo 1.
454 1.1 mrg d < 0 if we have to use algo 2 */
455 1.1 mrg mpfr_clear (xtmp);
456 1.1 mrg return d;
457 1.1 mrg }
458 1.1 mrg
459 1.1 mrg /* Tune a function with a simple THRESHOLD
460 1.1 mrg The function doesn't depend on another threshold.
461 1.1 mrg It assumes that it uses algo1 if p < THRESHOLD
462 1.1 mrg and algo2 otherwise.
463 1.1 mrg if algo2 is better for low prec, and algo1 better for high prec,
464 1.1 mrg the behaviour of this function is undefined. */
465 1.1 mrg static void
466 1.1 mrg tune_simple_func (mpfr_prec_t *threshold,
467 1.1 mrg double (*func) (struct speed_params *),
468 1.1 mrg mpfr_prec_t pstart)
469 1.1 mrg {
470 1.1 mrg double measure[THRESHOLD_FINAL_WINDOW+1];
471 1.1 mrg double d;
472 1.1 mrg mpfr_prec_t pstep;
473 1.1 mrg int i, numpos, numneg, try;
474 1.1 mrg mpfr_prec_t pmin, pmax, p;
475 1.1 mrg
476 1.1 mrg /* first look for a lower bound within 10% */
477 1.1 mrg pmin = p = pstart;
478 1.1 mrg d = domeasure (threshold, func, pmin);
479 1.1 mrg if (d < 0.0)
480 1.1 mrg {
481 1.1 mrg if (verbose)
482 1.1 mrg printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
483 1.1 mrg (unsigned long) pmin);
484 1.1 mrg *threshold = MPFR_PREC_MIN;
485 1.1 mrg return;
486 1.1 mrg }
487 1.1 mrg if (d >= 1.00)
488 1.1 mrg for (;;)
489 1.1 mrg {
490 1.1 mrg d = domeasure (threshold, func, pmin);
491 1.1 mrg if (d < 1.00)
492 1.1 mrg break;
493 1.1 mrg p = pmin;
494 1.1 mrg pmin += pmin/2;
495 1.1 mrg }
496 1.1 mrg pmin = p;
497 1.1 mrg for (;;)
498 1.1 mrg {
499 1.1 mrg d = domeasure (threshold, func, pmin);
500 1.1 mrg if (d < 0.10)
501 1.1 mrg break;
502 1.1 mrg pmin += GMP_NUMB_BITS;
503 1.1 mrg }
504 1.1 mrg
505 1.1 mrg /* then look for an upper bound within 20% */
506 1.1 mrg pmax = pmin * 2;
507 1.1 mrg for (;;)
508 1.1 mrg {
509 1.1 mrg d = domeasure (threshold, func, pmax);
510 1.1 mrg if (d < -0.20)
511 1.1 mrg break;
512 1.1 mrg pmax += pmin / 2; /* don't increase too rapidly */
513 1.1 mrg }
514 1.1 mrg
515 1.1 mrg /* The threshold is between pmin and pmax. Affine them */
516 1.1 mrg try = 0;
517 1.1 mrg while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
518 1.1 mrg {
519 1.1 mrg pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
520 1.1 mrg if (verbose)
521 1.1 mrg printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
522 1.1 mrg p = (pmin + pmax) / 2;
523 1.1 mrg for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
524 1.1 mrg {
525 1.1 mrg measure[i] = domeasure (threshold, func,
526 1.1 mrg p+(i-THRESHOLD_WINDOW/2)*pstep);
527 1.1 mrg if (measure[i] > 0)
528 1.1 mrg numpos ++;
529 1.1 mrg else if (measure[i] < 0)
530 1.1 mrg numneg ++;
531 1.1 mrg }
532 1.1 mrg if (numpos > numneg)
533 1.1 mrg /* We use more often algo 1 than algo 2 */
534 1.1 mrg pmin = p - THRESHOLD_WINDOW/2*pstep;
535 1.1 mrg else if (numpos < numneg)
536 1.1 mrg pmax = p + THRESHOLD_WINDOW/2*pstep;
537 1.1 mrg else
538 1.1 mrg /* numpos == numneg ... */
539 1.1 mrg if (++ try > 2)
540 1.1 mrg {
541 1.1 mrg *threshold = p;
542 1.1 mrg if (verbose)
543 1.1 mrg printf ("Quick find: %lu\n", *threshold);
544 1.1 mrg return ;
545 1.1 mrg }
546 1.1 mrg }
547 1.1 mrg
548 1.1 mrg /* Final tune... */
549 1.1 mrg if (verbose)
550 1.1 mrg printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
551 1.1 mrg for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
552 1.1 mrg measure[i] = domeasure (threshold, func, pmin+i);
553 1.1 mrg i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
554 1.1 mrg *threshold = pmin + i;
555 1.1 mrg if (verbose)
556 1.1 mrg printf ("%lu\n", *threshold);
557 1.1 mrg return;
558 1.1 mrg }
559 1.1 mrg
560 1.1 mrg /* Tune a function which behavior depends on both p and x,
561 1.1 mrg in a given direction.
562 1.1 mrg It assumes that for (x,p) close to zero, algo1 is used
563 1.1 mrg and algo2 is used when (x,p) is far from zero.
564 1.1 mrg If algo2 is better for low prec, and algo1 better for high prec,
565 1.1 mrg the behaviour of this function is undefined.
566 1.1 mrg This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
567 1.1 mrg until it finds a point on the boundary. It returns ell.
568 1.1 mrg */
569 1.1 mrg static void
570 1.1 mrg tune_simple_func_in_some_direction (long int *threshold1,
571 1.1 mrg long int *threshold2,
572 1.1 mrg long int *threshold3,
573 1.1 mrg double (*func) (struct speed_params *),
574 1.1 mrg mpfr_prec_t pstart,
575 1.1 mrg int dirx, int dirp,
576 1.1 mrg mpfr_t xres, mpfr_prec_t *pres)
577 1.1 mrg {
578 1.1 mrg double measure[THRESHOLD_FINAL_WINDOW+1];
579 1.1 mrg double d;
580 1.1 mrg mpfr_prec_t pstep;
581 1.1 mrg int i, numpos, numneg, try;
582 1.1 mrg mpfr_prec_t pmin, pmax, p;
583 1.1 mrg mpfr_t xmin, xmax, x;
584 1.1 mrg mpfr_t ratio;
585 1.1 mrg
586 1.1 mrg mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
587 1.1 mrg mpfr_set_si (ratio, dirx, MPFR_RNDN);
588 1.1 mrg mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
589 1.1 mrg
590 1.1 mrg mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
591 1.1 mrg mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
592 1.1 mrg mpfr_init2 (x, MPFR_SMALL_PRECISION);
593 1.1 mrg
594 1.1 mrg /* first look for a lower bound within 10% */
595 1.1 mrg pmin = p = pstart;
596 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
597 1.1 mrg mpfr_set (x, xmin, MPFR_RNDN);
598 1.1 mrg
599 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
600 1.1 mrg if (d < 0.0)
601 1.1 mrg {
602 1.1 mrg if (verbose)
603 1.1 mrg printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
604 1.1 mrg (unsigned long) pmin);
605 1.1 mrg *pres = MPFR_PREC_MIN;
606 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
607 1.1 mrg mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
608 1.1 mrg return;
609 1.1 mrg }
610 1.1 mrg if (d >= 1.00)
611 1.1 mrg for (;;)
612 1.1 mrg {
613 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
614 1.1 mrg if (d < 1.00)
615 1.1 mrg break;
616 1.1 mrg p = pmin;
617 1.1 mrg mpfr_set (x, xmin, MPFR_RNDN);
618 1.1 mrg pmin += pmin/2;
619 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
620 1.1 mrg }
621 1.1 mrg pmin = p;
622 1.1 mrg mpfr_set (xmin, x, MPFR_RNDN);
623 1.1 mrg for (;;)
624 1.1 mrg {
625 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
626 1.1 mrg if (d < 0.10)
627 1.1 mrg break;
628 1.1 mrg pmin += GMP_NUMB_BITS;
629 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
630 1.1 mrg }
631 1.1 mrg
632 1.1 mrg /* then look for an upper bound within 20% */
633 1.1 mrg pmax = pmin * 2;
634 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
635 1.1 mrg for (;;)
636 1.1 mrg {
637 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
638 1.1 mrg if (d < -0.20)
639 1.1 mrg break;
640 1.1 mrg pmax += pmin / 2; /* don't increase too rapidly */
641 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
642 1.1 mrg }
643 1.1 mrg
644 1.1 mrg /* The threshold is between pmin and pmax. Affine them */
645 1.1 mrg try = 0;
646 1.1 mrg while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
647 1.1 mrg {
648 1.1 mrg pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
649 1.1 mrg if (verbose)
650 1.1 mrg printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
651 1.1 mrg p = (pmin + pmax) / 2;
652 1.1 mrg mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
653 1.1 mrg for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
654 1.1 mrg {
655 1.1 mrg *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
656 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
657 1.1 mrg measure[i] = domeasure2 (threshold1, threshold2, threshold3,
658 1.1 mrg func, *pres, xres);
659 1.1 mrg if (measure[i] > 0)
660 1.1 mrg numpos ++;
661 1.1 mrg else if (measure[i] < 0)
662 1.1 mrg numneg ++;
663 1.1 mrg }
664 1.1 mrg if (numpos > numneg)
665 1.1 mrg {
666 1.1 mrg /* We use more often algo 1 than algo 2 */
667 1.1 mrg pmin = p - THRESHOLD_WINDOW/2*pstep;
668 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
669 1.1 mrg }
670 1.1 mrg else if (numpos < numneg)
671 1.1 mrg {
672 1.1 mrg pmax = p + THRESHOLD_WINDOW/2*pstep;
673 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
674 1.1 mrg }
675 1.1 mrg else
676 1.1 mrg /* numpos == numneg ... */
677 1.1 mrg if (++ try > 2)
678 1.1 mrg {
679 1.1 mrg *pres = p;
680 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
681 1.1 mrg if (verbose)
682 1.1 mrg printf ("Quick find: %lu\n", *pres);
683 1.1 mrg mpfr_clear (ratio);
684 1.1 mrg mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
685 1.1 mrg return ;
686 1.1 mrg }
687 1.1 mrg }
688 1.1 mrg
689 1.1 mrg /* Final tune... */
690 1.1 mrg if (verbose)
691 1.1 mrg printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
692 1.1 mrg for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
693 1.1 mrg {
694 1.1 mrg *pres = pmin+i;
695 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
696 1.1 mrg measure[i] = domeasure2 (threshold1, threshold2, threshold3,
697 1.1 mrg func, *pres, xres);
698 1.1 mrg }
699 1.1 mrg i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
700 1.1 mrg *pres = pmin + i;
701 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
702 1.1 mrg if (verbose)
703 1.1 mrg printf ("%lu\n", *pres);
704 1.1 mrg mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
705 1.1 mrg return;
706 1.1 mrg }
707 1.1 mrg
708 1.1 mrg /************************************
709 1.1 mrg * Tune Mulders' mulhigh function *
710 1.1 mrg ************************************/
711 1.1 mrg #define TOLERANCE 1.00
712 1.1 mrg #define MULDERS_TABLE_SIZE 1024
713 1.1 mrg #ifndef MPFR_MULHIGH_SIZE
714 1.1 mrg # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
715 1.1 mrg #endif
716 1.1 mrg #ifndef MPFR_SQRHIGH_SIZE
717 1.1 mrg # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
718 1.1 mrg #endif
719 1.1 mrg #ifndef MPFR_DIVHIGH_SIZE
720 1.1 mrg # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
721 1.1 mrg #endif
722 1.1 mrg #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
723 1.1 mrg #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
724 1.1 mrg #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
725 1.1 mrg #include "mulders.c"
726 1.1 mrg
727 1.1 mrg static double
728 1.1 mrg speed_mpfr_mulhigh (struct speed_params *s)
729 1.1 mrg {
730 1.1 mrg SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
731 1.1 mrg }
732 1.1 mrg
733 1.1 mrg static double
734 1.1 mrg speed_mpfr_sqrhigh (struct speed_params *s)
735 1.1 mrg {
736 1.1 mrg SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
737 1.1 mrg }
738 1.1 mrg
739 1.1 mrg static double
740 1.1 mrg speed_mpfr_divhigh (struct speed_params *s)
741 1.1 mrg {
742 1.1 mrg SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
743 1.1 mrg }
744 1.1 mrg
745 1.1 mrg #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
746 1.1 mrg
747 1.1 mrg /* Tune mpfr_mulhigh_n for size n */
748 1.1 mrg static mp_size_t
749 1.1 mrg tune_mul_mulders_upto (mp_size_t n)
750 1.1 mrg {
751 1.1 mrg struct speed_params s;
752 1.1 mrg mp_size_t k, kbest, step;
753 1.1 mrg double t, tbest;
754 1.1 mrg MPFR_TMP_DECL (marker);
755 1.1 mrg
756 1.1 mrg if (n == 0)
757 1.1 mrg return -1;
758 1.1 mrg
759 1.1 mrg MPFR_TMP_MARK (marker);
760 1.1 mrg s.align_xp = s.align_yp = s.align_wp = 64;
761 1.1 mrg s.size = n;
762 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
763 1.1 mrg s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
764 1.1 mrg mpn_random (s.xp, n);
765 1.1 mrg mpn_random (s.yp, n);
766 1.1 mrg
767 1.1 mrg /* Check k == -1, mpn_mul_basecase */
768 1.1 mrg mulhigh_ktab[n] = -1;
769 1.1 mrg kbest = -1;
770 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
771 1.1 mrg
772 1.1 mrg /* Check k == 0, mpn_mulhigh_n_basecase */
773 1.1 mrg mulhigh_ktab[n] = 0;
774 1.1 mrg t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
775 1.1 mrg if (t * TOLERANCE < tbest)
776 1.1 mrg kbest = 0, tbest = t;
777 1.1 mrg
778 1.1 mrg /* Check Mulders with cutoff point k */
779 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
780 1.1 mrg /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
781 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
782 1.1 mrg {
783 1.1 mrg mulhigh_ktab[n] = k;
784 1.1 mrg t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
785 1.1 mrg if (t * TOLERANCE < tbest)
786 1.1 mrg kbest = k, tbest = t;
787 1.1 mrg }
788 1.1 mrg
789 1.1 mrg mulhigh_ktab[n] = kbest;
790 1.1 mrg
791 1.1 mrg MPFR_TMP_FREE (marker);
792 1.1 mrg return kbest;
793 1.1 mrg }
794 1.1 mrg
795 1.1 mrg /* Tune mpfr_sqrhigh_n for size n */
796 1.1 mrg static mp_size_t
797 1.1 mrg tune_sqr_mulders_upto (mp_size_t n)
798 1.1 mrg {
799 1.1 mrg struct speed_params s;
800 1.1 mrg mp_size_t k, kbest, step;
801 1.1 mrg double t, tbest;
802 1.1 mrg MPFR_TMP_DECL (marker);
803 1.1 mrg
804 1.1 mrg if (n == 0)
805 1.1 mrg return -1;
806 1.1 mrg
807 1.1 mrg MPFR_TMP_MARK (marker);
808 1.1 mrg s.align_xp = s.align_wp = 64;
809 1.1 mrg s.size = n;
810 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
811 1.1 mrg mpn_random (s.xp, n);
812 1.1 mrg
813 1.1 mrg /* Check k == -1, mpn_sqr_basecase */
814 1.1 mrg sqrhigh_ktab[n] = -1;
815 1.1 mrg kbest = -1;
816 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
817 1.1 mrg
818 1.1 mrg /* Check k == 0, mpfr_mulhigh_n_basecase */
819 1.1 mrg sqrhigh_ktab[n] = 0;
820 1.1 mrg t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
821 1.1 mrg if (t * TOLERANCE < tbest)
822 1.1 mrg kbest = 0, tbest = t;
823 1.1 mrg
824 1.1 mrg /* Check Mulders */
825 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
826 1.1 mrg /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
827 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
828 1.1 mrg {
829 1.1 mrg sqrhigh_ktab[n] = k;
830 1.1 mrg t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
831 1.1 mrg if (t * TOLERANCE < tbest)
832 1.1 mrg kbest = k, tbest = t;
833 1.1 mrg }
834 1.1 mrg
835 1.1 mrg sqrhigh_ktab[n] = kbest;
836 1.1 mrg
837 1.1 mrg MPFR_TMP_FREE (marker);
838 1.1 mrg return kbest;
839 1.1 mrg }
840 1.1 mrg
841 1.1 mrg /* Tune mpfr_divhigh_n for size n */
842 1.1 mrg static mp_size_t
843 1.1 mrg tune_div_mulders_upto (mp_size_t n)
844 1.1 mrg {
845 1.1 mrg struct speed_params s;
846 1.1 mrg mp_size_t k, kbest, step;
847 1.1 mrg double t, tbest;
848 1.1 mrg MPFR_TMP_DECL (marker);
849 1.1 mrg
850 1.1 mrg if (n == 0)
851 1.1 mrg return 0;
852 1.1 mrg
853 1.1 mrg MPFR_TMP_MARK (marker);
854 1.1 mrg s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
855 1.1 mrg s.size = n;
856 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
857 1.1 mrg s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
858 1.1 mrg mpn_random (s.xp, n);
859 1.1 mrg mpn_random (s.yp, n);
860 1.1 mrg
861 1.1 mrg /* Check k == n, i.e., mpn_divrem */
862 1.1 mrg divhigh_ktab[n] = n;
863 1.1 mrg kbest = n;
864 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
865 1.1 mrg
866 1.1 mrg /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
867 1.1 mrg #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
868 1.1 mrg if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
869 1.1 mrg #endif
870 1.1 mrg {
871 1.1 mrg divhigh_ktab[n] = 0;
872 1.1 mrg t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
873 1.1 mrg if (t * TOLERANCE < tbest)
874 1.1 mrg kbest = 0, tbest = t;
875 1.1 mrg }
876 1.1 mrg
877 1.1 mrg /* Check Mulders */
878 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
879 1.1 mrg /* we should have (n+3)/2 <= k < n, which translates into
880 1.1 mrg (n+4)/2 <= k < n in C */
881 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
882 1.1 mrg {
883 1.1 mrg divhigh_ktab[n] = k;
884 1.1 mrg t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
885 1.1 mrg if (t * TOLERANCE < tbest)
886 1.1 mrg kbest = k, tbest = t;
887 1.1 mrg }
888 1.1 mrg
889 1.1 mrg divhigh_ktab[n] = kbest;
890 1.1 mrg
891 1.1 mrg MPFR_TMP_FREE (marker);
892 1.1 mrg
893 1.1 mrg return kbest;
894 1.1 mrg }
895 1.1 mrg
896 1.1 mrg static void
897 1.1 mrg tune_mul_mulders (FILE *f)
898 1.1 mrg {
899 1.1 mrg mp_size_t k;
900 1.1 mrg
901 1.1 mrg if (verbose)
902 1.1 mrg printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
903 1.1 mrg fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
904 1.1 mrg for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
905 1.1 mrg {
906 1.1 mrg fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
907 1.1 mrg if (k != MPFR_MULHIGH_TAB_SIZE-1)
908 1.1 mrg fputc (',', f);
909 1.1 mrg if ((k+1) % 16 == 0)
910 1.1 mrg fprintf (f, " \\\n ");
911 1.1 mrg if (verbose)
912 1.1 mrg putchar ('.');
913 1.1 mrg }
914 1.1 mrg fprintf (f, " \n");
915 1.1 mrg if (verbose)
916 1.1 mrg putchar ('\n');
917 1.1 mrg }
918 1.1 mrg
919 1.1 mrg static void
920 1.1 mrg tune_sqr_mulders (FILE *f)
921 1.1 mrg {
922 1.1 mrg mp_size_t k;
923 1.1 mrg
924 1.1 mrg if (verbose)
925 1.1 mrg printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
926 1.1 mrg fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n ");
927 1.1 mrg for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
928 1.1 mrg {
929 1.1 mrg fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
930 1.1 mrg if (k != MPFR_SQRHIGH_TAB_SIZE-1)
931 1.1 mrg fputc (',', f);
932 1.1 mrg if ((k+1) % 16 == 0)
933 1.1 mrg fprintf (f, " \\\n ");
934 1.1 mrg if (verbose)
935 1.1 mrg putchar ('.');
936 1.1 mrg }
937 1.1 mrg fprintf (f, " \n");
938 1.1 mrg if (verbose)
939 1.1 mrg putchar ('\n');
940 1.1 mrg }
941 1.1 mrg
942 1.1 mrg static void
943 1.1 mrg tune_div_mulders (FILE *f)
944 1.1 mrg {
945 1.1 mrg mp_size_t k;
946 1.1 mrg
947 1.1 mrg if (verbose)
948 1.1 mrg printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
949 1.1 mrg fprintf (f, "#define MPFR_DIVHIGH_TAB \\\n ");
950 1.1 mrg for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
951 1.1 mrg {
952 1.1 mrg fprintf (f, "%d", (int) tune_div_mulders_upto (k));
953 1.1 mrg if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
954 1.1 mrg fputc (',', f);
955 1.1 mrg if ((k+1) % 16 == 0)
956 1.1 mrg fprintf (f, " /*%zu-%zu*/ \\\n ", k - 15, k);
957 1.1 mrg if (verbose)
958 1.1 mrg putchar ('.');
959 1.1 mrg }
960 1.1 mrg fprintf (f, " \n");
961 1.1 mrg if (verbose)
962 1.1 mrg putchar ('\n');
963 1.1 mrg }
964 1.1 mrg
965 1.1 mrg /*******************************************************
966 1.1 mrg * Tuning functions for mpfr_ai *
967 1.1 mrg *******************************************************/
968 1.1 mrg
969 1.1 mrg long int mpfr_ai_threshold1;
970 1.1 mrg long int mpfr_ai_threshold2;
971 1.1 mrg long int mpfr_ai_threshold3;
972 1.1 mrg #undef MPFR_AI_THRESHOLD1
973 1.1 mrg #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
974 1.1 mrg #undef MPFR_AI_THRESHOLD2
975 1.1 mrg #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
976 1.1 mrg #undef MPFR_AI_THRESHOLD3
977 1.1 mrg #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
978 1.1 mrg
979 1.1 mrg #include "ai.c"
980 1.1 mrg
981 1.1 mrg static double
982 1.1 mrg speed_mpfr_ai (struct speed_params *s)
983 1.1 mrg {
984 1.1 mrg SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
985 1.1 mrg }
986 1.1 mrg
987 1.1 mrg
988 1.1 mrg /*******************************************************
989 1.1 mrg * Tune all the threshold of MPFR *
990 1.1 mrg * Warning: tune the function in their dependent order!*
991 1.1 mrg *******************************************************/
992 1.1 mrg static void
993 1.1 mrg all (const char *filename)
994 1.1 mrg {
995 1.1 mrg FILE *f;
996 1.1 mrg time_t start_time, end_time;
997 1.1 mrg struct tm *tp;
998 1.1 mrg mpfr_t x1, x2, x3, tmp1, tmp2;
999 1.1 mrg mpfr_prec_t p1, p2, p3;
1000 1.1 mrg
1001 1.1 mrg f = fopen (filename, "w");
1002 1.1 mrg if (f == NULL)
1003 1.1 mrg {
1004 1.1 mrg fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
1005 1.1 mrg abort ();
1006 1.1 mrg }
1007 1.1 mrg
1008 1.1 mrg speed_time_init ();
1009 1.1 mrg if (verbose)
1010 1.1 mrg {
1011 1.1 mrg printf ("Using: %s\n", speed_time_string);
1012 1.1 mrg printf ("speed_precision %d", speed_precision);
1013 1.1 mrg if (speed_unittime == 1.0)
1014 1.1 mrg printf (", speed_unittime 1 cycle");
1015 1.1 mrg else
1016 1.1 mrg printf (", speed_unittime %.2e secs", speed_unittime);
1017 1.1 mrg if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1018 1.1 mrg printf (", CPU freq unknown\n");
1019 1.1 mrg else
1020 1.1 mrg printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
1021 1.1 mrg }
1022 1.1 mrg
1023 1.1 mrg time (&start_time);
1024 1.1 mrg tp = localtime (&start_time);
1025 1.1 mrg fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
1026 1.1 mrg tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
1027 1.1 mrg
1028 1.1 mrg #ifdef __ICC
1029 1.1 mrg fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
1030 1.1 mrg #elif defined(__GNUC__)
1031 1.1 mrg #ifdef __GNUC_PATCHLEVEL__
1032 1.1 mrg fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
1033 1.1 mrg __GNUC_PATCHLEVEL__);
1034 1.1 mrg #else
1035 1.1 mrg fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
1036 1.1 mrg #endif
1037 1.1 mrg #elif defined (__SUNPRO_C)
1038 1.1 mrg fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
1039 1.1 mrg #elif defined (__sgi) && defined (_COMPILER_VERSION)
1040 1.1 mrg fprintf (f, "MIPSpro C %d.%d.%d */\n",
1041 1.1 mrg _COMPILER_VERSION / 100,
1042 1.1 mrg _COMPILER_VERSION / 10 % 10,
1043 1.1 mrg _COMPILER_VERSION % 10);
1044 1.1 mrg #elif defined (__DECC) && defined (__DECC_VER)
1045 1.1 mrg fprintf (f, "DEC C %d */\n", __DECC_VER);
1046 1.1 mrg #else
1047 1.1 mrg fprintf (f, "system compiler */\n");
1048 1.1 mrg #endif
1049 1.1 mrg fprintf (f, "\n\n");
1050 1.1 mrg fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
1051 1.1 mrg fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
1052 1.1 mrg fprintf (f, "#endif\n\n");
1053 1.1 mrg
1054 1.1 mrg /* Tune mulhigh */
1055 1.1 mrg tune_mul_mulders (f);
1056 1.1 mrg
1057 1.1 mrg /* Tune sqrhigh */
1058 1.1 mrg tune_sqr_mulders (f);
1059 1.1 mrg
1060 1.1 mrg /* Tune divhigh */
1061 1.1 mrg tune_div_mulders (f);
1062 1.1 mrg fflush (f);
1063 1.1 mrg
1064 1.1 mrg /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
1065 1.1 mrg if (verbose)
1066 1.1 mrg printf ("Tuning mpfr_mul...\n");
1067 1.1 mrg tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
1068 1.1 mrg 2*GMP_NUMB_BITS+1);
1069 1.1 mrg fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
1070 1.1 mrg (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
1071 1.1 mrg
1072 1.1 mrg /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
1073 1.1 mrg if (verbose)
1074 1.1 mrg printf ("Tuning mpfr_sqr...\n");
1075 1.1 mrg tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
1076 1.1 mrg 2*GMP_NUMB_BITS+1);
1077 1.1 mrg fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
1078 1.1 mrg (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
1079 1.1 mrg
1080 1.1 mrg /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
1081 1.1 mrg if (verbose)
1082 1.1 mrg printf ("Tuning mpfr_div...\n");
1083 1.1 mrg tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
1084 1.1 mrg 2*GMP_NUMB_BITS+1);
1085 1.1 mrg fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
1086 1.1 mrg (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
1087 1.1 mrg
1088 1.1 mrg /* Tune mpfr_exp_2 */
1089 1.1 mrg if (verbose)
1090 1.1 mrg printf ("Tuning mpfr_exp_2...\n");
1091 1.1 mrg tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
1092 1.1 mrg fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
1093 1.1 mrg (unsigned long) mpfr_exp_2_threshold);
1094 1.1 mrg
1095 1.1 mrg /* Tune mpfr_exp */
1096 1.1 mrg if (verbose)
1097 1.1 mrg printf ("Tuning mpfr_exp...\n");
1098 1.1 mrg tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
1099 1.1 mrg MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1100 1.1 mrg fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
1101 1.1 mrg (unsigned long) mpfr_exp_threshold);
1102 1.1 mrg
1103 1.1 mrg /* Tune mpfr_sin_cos */
1104 1.1 mrg if (verbose)
1105 1.1 mrg printf ("Tuning mpfr_sin_cos...\n");
1106 1.1 mrg tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
1107 1.1 mrg MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1108 1.1 mrg fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
1109 1.1 mrg (unsigned long) mpfr_sincos_threshold);
1110 1.1 mrg
1111 1.1 mrg /* Tune mpfr_ai */
1112 1.1 mrg if (verbose)
1113 1.1 mrg printf ("Tuning mpfr_ai...\n");
1114 1.1 mrg mpfr_init2 (x1, MPFR_SMALL_PRECISION);
1115 1.1 mrg mpfr_init2 (x2, MPFR_SMALL_PRECISION);
1116 1.1 mrg mpfr_init2 (x3, MPFR_SMALL_PRECISION);
1117 1.1 mrg mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
1118 1.1 mrg mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
1119 1.1 mrg
1120 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1121 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1122 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1123 1.1 mrg -60, 200, x1, &p1);
1124 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1125 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1126 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1127 1.1 mrg -20, 500, x2, &p2);
1128 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1129 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1130 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1131 1.1 mrg 40, 200, x3, &p3);
1132 1.1 mrg
1133 1.1 mrg mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
1134 1.1 mrg mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
1135 1.1 mrg mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
1136 1.1 mrg mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
1137 1.1 mrg
1138 1.1 mrg mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
1139 1.1 mrg mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
1140 1.1 mrg mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1141 1.1 mrg mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
1142 1.1 mrg
1143 1.1 mrg mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
1144 1.1 mrg mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1145 1.1 mrg mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
1146 1.1 mrg
1147 1.1 mrg mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
1148 1.1 mrg mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
1149 1.1 mrg mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
1150 1.1 mrg mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
1151 1.1 mrg mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
1152 1.1 mrg
1153 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
1154 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
1155 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
1156 1.1 mrg
1157 1.1 mrg mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
1158 1.1 mrg mpfr_clear (tmp1); mpfr_clear (tmp2);
1159 1.1 mrg
1160 1.1 mrg /* End of tuning */
1161 1.1 mrg time (&end_time);
1162 1.1 mrg fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
1163 1.1 mrg (long) (end_time - start_time));
1164 1.1 mrg if (verbose)
1165 1.1 mrg printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
1166 1.1 mrg
1167 1.1 mrg fclose (f);
1168 1.1 mrg }
1169 1.1 mrg
1170 1.1 mrg
1171 1.1 mrg /* Main function */
1172 1.1 mrg int main (int argc, char *argv[])
1173 1.1 mrg {
1174 1.1 mrg /* Unbuffered so if output is redirected to a file it isn't lost if the
1175 1.1 mrg program is killed part way through. */
1176 1.1 mrg setbuf (stdout, NULL);
1177 1.1 mrg setbuf (stderr, NULL);
1178 1.1 mrg
1179 1.1 mrg verbose = argc > 1;
1180 1.1 mrg
1181 1.1 mrg if (verbose)
1182 1.1 mrg printf ("Tuning MPFR (Coffee time?)...\n");
1183 1.1 mrg
1184 1.1 mrg all ("mparam.h");
1185 1.1 mrg
1186 1.1 mrg return 0;
1187 1.1 mrg }
1188