tuneup.c revision 1.1.1.2.4.1 1 1.1 mrg /* Tune various threshold of MPFR
2 1.1 mrg
3 1.1.1.2.4.1 christos Copyright 2005-2018 Free Software Foundation, Inc.
4 1.1.1.2 mrg Contributed by the AriC and Caramba projects, INRIA.
5 1.1 mrg
6 1.1 mrg This file is part of the GNU MPFR Library.
7 1.1 mrg
8 1.1 mrg The GNU MPFR Library is free software; you can redistribute it and/or modify
9 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg The GNU MPFR Library is distributed in the hope that it will be useful, but
14 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 1.1 mrg http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 1.1 mrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 1.1 mrg
23 1.1 mrg #include <stdlib.h>
24 1.1 mrg #include <time.h>
25 1.1 mrg
26 1.1 mrg #define MPFR_NEED_LONGLONG_H
27 1.1 mrg #include "mpfr-impl.h"
28 1.1 mrg
29 1.1 mrg #undef _PROTO
30 1.1 mrg #define _PROTO __GMP_PROTO
31 1.1 mrg #include "speed.h"
32 1.1 mrg
33 1.1.1.2.4.1 christos /* Undefine static assertion system */
34 1.1.1.2.4.1 christos #undef MPFR_DECL_STATIC_ASSERT
35 1.1.1.2.4.1 christos #undef MPFR_STAT_STATIC_ASSERT
36 1.1.1.2.4.1 christos #define MPFR_DECL_STATIC_ASSERT(a) MPFR_ASSERTN(a)
37 1.1.1.2.4.1 christos #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
38 1.1.1.2.4.1 christos
39 1.1 mrg int verbose;
40 1.1 mrg
41 1.1 mrg /* template for an unary function */
42 1.1 mrg /* s->size: precision of both input and output
43 1.1 mrg s->xp : Mantissa of first input
44 1.1 mrg s->yp : mantissa of second input */
45 1.1 mrg #define SPEED_MPFR_FUNC(mean_fun) \
46 1.1 mrg do \
47 1.1 mrg { \
48 1.1 mrg unsigned i; \
49 1.1 mrg mpfr_limb_ptr wp; \
50 1.1 mrg double t; \
51 1.1 mrg mpfr_t w, x; \
52 1.1 mrg mp_size_t size; \
53 1.1 mrg MPFR_TMP_DECL (marker); \
54 1.1 mrg \
55 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
56 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
57 1.1 mrg MPFR_TMP_MARK (marker); \
58 1.1 mrg \
59 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
60 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
61 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
62 1.1 mrg MPFR_SET_EXP (x, 0); \
63 1.1 mrg \
64 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
65 1.1 mrg \
66 1.1 mrg speed_operand_src (s, s->xp, size); \
67 1.1 mrg speed_operand_dst (s, wp, size); \
68 1.1 mrg speed_cache_fill (s); \
69 1.1 mrg \
70 1.1 mrg speed_starttime (); \
71 1.1 mrg i = s->reps; \
72 1.1 mrg do \
73 1.1 mrg mean_fun (w, x, MPFR_RNDN); \
74 1.1 mrg while (--i != 0); \
75 1.1 mrg t = speed_endtime (); \
76 1.1 mrg \
77 1.1 mrg MPFR_TMP_FREE (marker); \
78 1.1 mrg return t; \
79 1.1 mrg } \
80 1.1 mrg while (0)
81 1.1 mrg
82 1.1 mrg /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
83 1.1 mrg #define SPEED_MPFR_FUNC2(mean_fun) \
84 1.1 mrg do \
85 1.1 mrg { \
86 1.1 mrg unsigned i; \
87 1.1 mrg mpfr_limb_ptr vp, wp; \
88 1.1 mrg double t; \
89 1.1 mrg mpfr_t v, w, x; \
90 1.1 mrg mp_size_t size; \
91 1.1 mrg MPFR_TMP_DECL (marker); \
92 1.1 mrg \
93 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
94 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
95 1.1 mrg MPFR_TMP_MARK (marker); \
96 1.1 mrg \
97 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
98 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
99 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
100 1.1 mrg MPFR_SET_EXP (x, 0); \
101 1.1 mrg \
102 1.1 mrg MPFR_TMP_INIT (vp, v, s->size, size); \
103 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
104 1.1 mrg \
105 1.1 mrg speed_operand_src (s, s->xp, size); \
106 1.1 mrg speed_operand_dst (s, vp, size); \
107 1.1 mrg speed_operand_dst (s, wp, size); \
108 1.1 mrg speed_cache_fill (s); \
109 1.1 mrg \
110 1.1 mrg speed_starttime (); \
111 1.1 mrg i = s->reps; \
112 1.1 mrg do \
113 1.1 mrg mean_fun (v, w, x, MPFR_RNDN); \
114 1.1 mrg while (--i != 0); \
115 1.1 mrg t = speed_endtime (); \
116 1.1 mrg \
117 1.1 mrg MPFR_TMP_FREE (marker); \
118 1.1 mrg return t; \
119 1.1 mrg } \
120 1.1 mrg while (0)
121 1.1 mrg
122 1.1 mrg /* template for a function like mpfr_mul */
123 1.1 mrg #define SPEED_MPFR_OP(mean_fun) \
124 1.1 mrg do \
125 1.1 mrg { \
126 1.1 mrg unsigned i; \
127 1.1 mrg mpfr_limb_ptr wp; \
128 1.1 mrg double t; \
129 1.1 mrg mpfr_t w, x, y; \
130 1.1 mrg mp_size_t size; \
131 1.1 mrg MPFR_TMP_DECL (marker); \
132 1.1 mrg \
133 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
134 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
135 1.1 mrg MPFR_TMP_MARK (marker); \
136 1.1 mrg \
137 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
138 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
139 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
140 1.1 mrg MPFR_SET_EXP (x, 0); \
141 1.1 mrg s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
142 1.1 mrg MPFR_TMP_INIT1 (s->yp, y, s->size); \
143 1.1 mrg MPFR_SET_EXP (y, 0); \
144 1.1 mrg \
145 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
146 1.1 mrg \
147 1.1 mrg speed_operand_src (s, s->xp, size); \
148 1.1 mrg speed_operand_src (s, s->yp, size); \
149 1.1 mrg speed_operand_dst (s, wp, size); \
150 1.1 mrg speed_cache_fill (s); \
151 1.1 mrg \
152 1.1 mrg speed_starttime (); \
153 1.1 mrg i = s->reps; \
154 1.1 mrg do \
155 1.1 mrg mean_fun (w, x, y, MPFR_RNDN); \
156 1.1 mrg while (--i != 0); \
157 1.1 mrg t = speed_endtime (); \
158 1.1 mrg \
159 1.1 mrg MPFR_TMP_FREE (marker); \
160 1.1 mrg return t; \
161 1.1 mrg } \
162 1.1 mrg while (0)
163 1.1 mrg
164 1.1 mrg /* special template for mpfr_mul(a,b,b) */
165 1.1 mrg #define SPEED_MPFR_SQR(mean_fun) \
166 1.1 mrg do \
167 1.1 mrg { \
168 1.1 mrg unsigned i; \
169 1.1 mrg mpfr_limb_ptr wp; \
170 1.1 mrg double t; \
171 1.1 mrg mpfr_t w, x; \
172 1.1 mrg mp_size_t size; \
173 1.1 mrg MPFR_TMP_DECL (marker); \
174 1.1 mrg \
175 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
176 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
177 1.1 mrg MPFR_TMP_MARK (marker); \
178 1.1 mrg \
179 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
180 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
181 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
182 1.1 mrg MPFR_SET_EXP (x, 0); \
183 1.1 mrg \
184 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
185 1.1 mrg \
186 1.1 mrg speed_operand_src (s, s->xp, size); \
187 1.1 mrg speed_operand_dst (s, wp, size); \
188 1.1 mrg speed_cache_fill (s); \
189 1.1 mrg \
190 1.1 mrg speed_starttime (); \
191 1.1 mrg i = s->reps; \
192 1.1 mrg do \
193 1.1 mrg mean_fun (w, x, x, MPFR_RNDN); \
194 1.1 mrg while (--i != 0); \
195 1.1 mrg t = speed_endtime (); \
196 1.1 mrg \
197 1.1 mrg MPFR_TMP_FREE (marker); \
198 1.1 mrg return t; \
199 1.1 mrg } \
200 1.1 mrg while (0)
201 1.1 mrg
202 1.1 mrg /* s->size: precision of both input and output
203 1.1 mrg s->xp : Mantissa of first input
204 1.1 mrg s->r : exponent
205 1.1 mrg s->align_xp : sign (1 means positive, 2 means negative)
206 1.1 mrg */
207 1.1 mrg #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \
208 1.1 mrg do \
209 1.1 mrg { \
210 1.1 mrg unsigned i; \
211 1.1 mrg mpfr_limb_ptr wp; \
212 1.1 mrg double t; \
213 1.1 mrg mpfr_t w, x; \
214 1.1 mrg mp_size_t size; \
215 1.1 mrg MPFR_TMP_DECL (marker); \
216 1.1 mrg \
217 1.1 mrg SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
218 1.1 mrg SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
219 1.1 mrg MPFR_TMP_MARK (marker); \
220 1.1 mrg \
221 1.1 mrg size = (s->size-1)/GMP_NUMB_BITS+1; \
222 1.1 mrg s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
223 1.1 mrg MPFR_TMP_INIT1 (s->xp, x, s->size); \
224 1.1 mrg MPFR_SET_EXP (x, s->r); \
225 1.1 mrg if (s->align_xp == 2) MPFR_SET_NEG (x); \
226 1.1 mrg \
227 1.1 mrg MPFR_TMP_INIT (wp, w, s->size, size); \
228 1.1 mrg \
229 1.1 mrg speed_operand_src (s, s->xp, size); \
230 1.1 mrg speed_operand_dst (s, wp, size); \
231 1.1 mrg speed_cache_fill (s); \
232 1.1 mrg \
233 1.1 mrg speed_starttime (); \
234 1.1 mrg i = s->reps; \
235 1.1 mrg do \
236 1.1 mrg mean_fun (w, x, MPFR_RNDN); \
237 1.1 mrg while (--i != 0); \
238 1.1 mrg t = speed_endtime (); \
239 1.1 mrg \
240 1.1 mrg MPFR_TMP_FREE (marker); \
241 1.1 mrg return t; \
242 1.1 mrg } \
243 1.1 mrg while (0)
244 1.1 mrg
245 1.1 mrg /* First we include all the functions we want to tune inside this program.
246 1.1 mrg We can't use the GNU MPFR library since the thresholds are fixed macros. */
247 1.1 mrg
248 1.1 mrg /* Setup mpfr_exp_2 */
249 1.1 mrg mpfr_prec_t mpfr_exp_2_threshold;
250 1.1 mrg #undef MPFR_EXP_2_THRESHOLD
251 1.1 mrg #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
252 1.1 mrg #include "exp_2.c"
253 1.1 mrg static double
254 1.1 mrg speed_mpfr_exp_2 (struct speed_params *s)
255 1.1 mrg {
256 1.1 mrg SPEED_MPFR_FUNC (mpfr_exp_2);
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg /* Setup mpfr_exp */
260 1.1 mrg mpfr_prec_t mpfr_exp_threshold;
261 1.1 mrg #undef MPFR_EXP_THRESHOLD
262 1.1 mrg #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
263 1.1 mrg #include "exp.c"
264 1.1 mrg static double
265 1.1 mrg speed_mpfr_exp (struct speed_params *s)
266 1.1 mrg {
267 1.1 mrg SPEED_MPFR_FUNC (mpfr_exp);
268 1.1 mrg }
269 1.1 mrg
270 1.1 mrg /* Setup mpfr_sin_cos */
271 1.1 mrg mpfr_prec_t mpfr_sincos_threshold;
272 1.1 mrg #undef MPFR_SINCOS_THRESHOLD
273 1.1 mrg #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
274 1.1 mrg #include "sin_cos.c"
275 1.1 mrg #include "cos.c"
276 1.1 mrg static double
277 1.1 mrg speed_mpfr_sincos (struct speed_params *s)
278 1.1 mrg {
279 1.1 mrg SPEED_MPFR_FUNC2 (mpfr_sin_cos);
280 1.1 mrg }
281 1.1 mrg
282 1.1 mrg /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
283 1.1.1.2.4.1 christos /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
284 1.1.1.2.4.1 christos the thresholds are >= 1, we initialize both values to 1 to avoid a failed
285 1.1.1.2.4.1 christos assertion (let's recall that static assertions are replaced by normal
286 1.1.1.2.4.1 christos dynamic assertions here). */
287 1.1.1.2.4.1 christos mpfr_prec_t mpfr_mul_threshold = 1;
288 1.1.1.2.4.1 christos mpfr_prec_t mpfr_sqr_threshold = 1;
289 1.1 mrg mpfr_prec_t mpfr_div_threshold;
290 1.1 mrg #undef MPFR_MUL_THRESHOLD
291 1.1 mrg #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
292 1.1 mrg #undef MPFR_SQR_THRESHOLD
293 1.1 mrg #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
294 1.1 mrg #undef MPFR_DIV_THRESHOLD
295 1.1 mrg #define MPFR_DIV_THRESHOLD mpfr_div_threshold
296 1.1 mrg #include "mul.c"
297 1.1 mrg #include "div.c"
298 1.1 mrg static double
299 1.1 mrg speed_mpfr_mul (struct speed_params *s)
300 1.1 mrg {
301 1.1 mrg SPEED_MPFR_OP (mpfr_mul);
302 1.1 mrg }
303 1.1 mrg static double
304 1.1 mrg speed_mpfr_sqr (struct speed_params *s)
305 1.1 mrg {
306 1.1 mrg SPEED_MPFR_SQR (mpfr_mul);
307 1.1 mrg }
308 1.1 mrg static double
309 1.1 mrg speed_mpfr_div (struct speed_params *s)
310 1.1 mrg {
311 1.1 mrg SPEED_MPFR_OP (mpfr_div);
312 1.1 mrg }
313 1.1 mrg
314 1.1 mrg /************************************************
315 1.1 mrg * Common functions (inspired by GMP function) *
316 1.1 mrg ************************************************/
317 1.1 mrg static int
318 1.1 mrg analyze_data (double *dat, int ndat)
319 1.1 mrg {
320 1.1 mrg double x, min_x;
321 1.1 mrg int j, min_j;
322 1.1 mrg
323 1.1 mrg x = 0.0;
324 1.1 mrg for (j = 0; j < ndat; j++)
325 1.1 mrg if (dat[j] > 0.0)
326 1.1 mrg x += dat[j];
327 1.1 mrg
328 1.1 mrg min_x = x;
329 1.1 mrg min_j = 0;
330 1.1 mrg
331 1.1 mrg for (j = 0; j < ndat; x -= dat[j], j++)
332 1.1 mrg {
333 1.1 mrg if (x < min_x)
334 1.1 mrg {
335 1.1 mrg min_x = x;
336 1.1 mrg min_j = j;
337 1.1 mrg }
338 1.1 mrg }
339 1.1 mrg return min_j;
340 1.1 mrg }
341 1.1 mrg
342 1.1 mrg static double
343 1.1 mrg mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
344 1.1 mrg {
345 1.1 mrg double t = -1.0;
346 1.1 mrg int i;
347 1.1 mrg int number_of_iterations = 30;
348 1.1 mrg for (i = 1; i <= number_of_iterations && t == -1.0; i++)
349 1.1 mrg {
350 1.1 mrg t = speed_measure (fun, s);
351 1.1 mrg if ( (t == -1.0) && (i+1 <= number_of_iterations) )
352 1.1 mrg printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
353 1.1 mrg s->size, i+1, number_of_iterations);
354 1.1 mrg }
355 1.1 mrg if (t == -1.0)
356 1.1 mrg {
357 1.1 mrg fprintf (stderr, "Failed to measure %s!\n", m);
358 1.1 mrg fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
359 1.1 mrg fprintf (stderr, " under Linux: cpufreq-selector -g performance\n");
360 1.1 mrg fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
361 1.1 mrg abort ();
362 1.1 mrg }
363 1.1 mrg return t;
364 1.1 mrg }
365 1.1 mrg
366 1.1 mrg #define THRESHOLD_WINDOW 16
367 1.1 mrg #define THRESHOLD_FINAL_WINDOW 128
368 1.1 mrg static double
369 1.1 mrg domeasure (mpfr_prec_t *threshold,
370 1.1 mrg double (*func) (struct speed_params *),
371 1.1 mrg mpfr_prec_t p)
372 1.1 mrg {
373 1.1 mrg struct speed_params s;
374 1.1 mrg mp_size_t size;
375 1.1 mrg double t1, t2, d;
376 1.1 mrg
377 1.1 mrg s.align_xp = s.align_yp = s.align_wp = 64;
378 1.1 mrg s.size = p;
379 1.1 mrg size = (p - 1)/GMP_NUMB_BITS+1;
380 1.1 mrg s.xp = malloc (2*size*sizeof (mp_limb_t));
381 1.1 mrg if (s.xp == NULL)
382 1.1 mrg {
383 1.1 mrg fprintf (stderr, "Can't allocate memory.\n");
384 1.1 mrg abort ();
385 1.1 mrg }
386 1.1 mrg mpn_random (s.xp, size);
387 1.1 mrg s.yp = s.xp + size;
388 1.1 mrg mpn_random (s.yp, size);
389 1.1 mrg *threshold = MPFR_PREC_MAX;
390 1.1 mrg t1 = mpfr_speed_measure (func, &s, "function 1");
391 1.1 mrg *threshold = 1;
392 1.1 mrg t2 = mpfr_speed_measure (func, &s, "function 2");
393 1.1 mrg free (s.xp);
394 1.1 mrg /* t1 is the time of the first algo (used for low prec) */
395 1.1 mrg if (t2 >= t1)
396 1.1 mrg d = (t2 - t1) / t2;
397 1.1 mrg else
398 1.1 mrg d = (t2 - t1) / t1;
399 1.1 mrg /* d > 0 if we have to use algo 1.
400 1.1 mrg d < 0 if we have to use algo 2 */
401 1.1 mrg return d;
402 1.1 mrg }
403 1.1 mrg
404 1.1 mrg /* Performs measures when both the precision and the point of evaluation
405 1.1 mrg shall vary. s.yp is ignored and not initialized.
406 1.1 mrg It assumes that func depends on three thresholds with a boundary of the
407 1.1 mrg form threshold1*x + threshold2*p = some scaling factor, if x<0,
408 1.1 mrg and threshold3*x + threshold2*p = some scaling factor, if x>=0.
409 1.1 mrg */
410 1.1 mrg static double
411 1.1 mrg domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
412 1.1 mrg double (*func) (struct speed_params *),
413 1.1 mrg mpfr_prec_t p,
414 1.1 mrg mpfr_t x)
415 1.1 mrg {
416 1.1 mrg struct speed_params s;
417 1.1 mrg mp_size_t size;
418 1.1 mrg double t1, t2, d;
419 1.1 mrg mpfr_t xtmp;
420 1.1 mrg
421 1.1 mrg if (MPFR_IS_SINGULAR (x))
422 1.1 mrg {
423 1.1 mrg mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
424 1.1 mrg abort ();
425 1.1 mrg }
426 1.1 mrg if (MPFR_IS_NEG (x))
427 1.1 mrg s.align_xp = 2;
428 1.1 mrg else
429 1.1 mrg s.align_xp = 1;
430 1.1 mrg
431 1.1 mrg s.align_yp = s.align_wp = 64;
432 1.1 mrg s.size = p;
433 1.1 mrg size = (p - 1)/GMP_NUMB_BITS+1;
434 1.1 mrg
435 1.1 mrg mpfr_init2 (xtmp, p);
436 1.1 mrg mpn_random (xtmp->_mpfr_d, size);
437 1.1 mrg xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
438 1.1 mrg MPFR_SET_EXP (xtmp, -53);
439 1.1 mrg mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
440 1.1 mrg mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */
441 1.1 mrg /* where perturb ~ 2^(-53) is */
442 1.1 mrg /* randomly chosen. */
443 1.1 mrg s.xp = xtmp->_mpfr_d;
444 1.1 mrg s.r = MPFR_GET_EXP (xtmp);
445 1.1 mrg
446 1.1 mrg *threshold1 = 0;
447 1.1 mrg *threshold2 = 0;
448 1.1 mrg *threshold3 = 0;
449 1.1 mrg t1 = mpfr_speed_measure (func, &s, "function 1");
450 1.1 mrg
451 1.1 mrg if (MPFR_IS_NEG (x))
452 1.1 mrg *threshold1 = INT_MIN;
453 1.1 mrg else
454 1.1 mrg *threshold3 = INT_MAX;
455 1.1 mrg *threshold2 = INT_MAX;
456 1.1 mrg t2 = mpfr_speed_measure (func, &s, "function 2");
457 1.1 mrg
458 1.1 mrg /* t1 is the time of the first algo (used for low prec) */
459 1.1 mrg if (t2 >= t1)
460 1.1 mrg d = (t2 - t1) / t2;
461 1.1 mrg else
462 1.1 mrg d = (t2 - t1) / t1;
463 1.1 mrg /* d > 0 if we have to use algo 1.
464 1.1 mrg d < 0 if we have to use algo 2 */
465 1.1 mrg mpfr_clear (xtmp);
466 1.1 mrg return d;
467 1.1 mrg }
468 1.1 mrg
469 1.1 mrg /* Tune a function with a simple THRESHOLD
470 1.1 mrg The function doesn't depend on another threshold.
471 1.1 mrg It assumes that it uses algo1 if p < THRESHOLD
472 1.1 mrg and algo2 otherwise.
473 1.1 mrg if algo2 is better for low prec, and algo1 better for high prec,
474 1.1 mrg the behaviour of this function is undefined. */
475 1.1 mrg static void
476 1.1 mrg tune_simple_func (mpfr_prec_t *threshold,
477 1.1 mrg double (*func) (struct speed_params *),
478 1.1 mrg mpfr_prec_t pstart)
479 1.1 mrg {
480 1.1 mrg double measure[THRESHOLD_FINAL_WINDOW+1];
481 1.1 mrg double d;
482 1.1 mrg mpfr_prec_t pstep;
483 1.1 mrg int i, numpos, numneg, try;
484 1.1 mrg mpfr_prec_t pmin, pmax, p;
485 1.1 mrg
486 1.1 mrg /* first look for a lower bound within 10% */
487 1.1 mrg pmin = p = pstart;
488 1.1 mrg d = domeasure (threshold, func, pmin);
489 1.1 mrg if (d < 0.0)
490 1.1 mrg {
491 1.1 mrg if (verbose)
492 1.1 mrg printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
493 1.1 mrg (unsigned long) pmin);
494 1.1 mrg *threshold = MPFR_PREC_MIN;
495 1.1 mrg return;
496 1.1 mrg }
497 1.1 mrg if (d >= 1.00)
498 1.1 mrg for (;;)
499 1.1 mrg {
500 1.1 mrg d = domeasure (threshold, func, pmin);
501 1.1 mrg if (d < 1.00)
502 1.1 mrg break;
503 1.1 mrg p = pmin;
504 1.1 mrg pmin += pmin/2;
505 1.1 mrg }
506 1.1 mrg pmin = p;
507 1.1 mrg for (;;)
508 1.1 mrg {
509 1.1 mrg d = domeasure (threshold, func, pmin);
510 1.1 mrg if (d < 0.10)
511 1.1 mrg break;
512 1.1 mrg pmin += GMP_NUMB_BITS;
513 1.1 mrg }
514 1.1 mrg
515 1.1 mrg /* then look for an upper bound within 20% */
516 1.1 mrg pmax = pmin * 2;
517 1.1 mrg for (;;)
518 1.1 mrg {
519 1.1 mrg d = domeasure (threshold, func, pmax);
520 1.1 mrg if (d < -0.20)
521 1.1 mrg break;
522 1.1 mrg pmax += pmin / 2; /* don't increase too rapidly */
523 1.1 mrg }
524 1.1 mrg
525 1.1 mrg /* The threshold is between pmin and pmax. Affine them */
526 1.1 mrg try = 0;
527 1.1 mrg while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
528 1.1 mrg {
529 1.1 mrg pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
530 1.1 mrg if (verbose)
531 1.1 mrg printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
532 1.1 mrg p = (pmin + pmax) / 2;
533 1.1 mrg for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
534 1.1 mrg {
535 1.1 mrg measure[i] = domeasure (threshold, func,
536 1.1 mrg p+(i-THRESHOLD_WINDOW/2)*pstep);
537 1.1 mrg if (measure[i] > 0)
538 1.1 mrg numpos ++;
539 1.1 mrg else if (measure[i] < 0)
540 1.1 mrg numneg ++;
541 1.1 mrg }
542 1.1 mrg if (numpos > numneg)
543 1.1 mrg /* We use more often algo 1 than algo 2 */
544 1.1 mrg pmin = p - THRESHOLD_WINDOW/2*pstep;
545 1.1 mrg else if (numpos < numneg)
546 1.1 mrg pmax = p + THRESHOLD_WINDOW/2*pstep;
547 1.1 mrg else
548 1.1 mrg /* numpos == numneg ... */
549 1.1 mrg if (++ try > 2)
550 1.1 mrg {
551 1.1 mrg *threshold = p;
552 1.1 mrg if (verbose)
553 1.1 mrg printf ("Quick find: %lu\n", *threshold);
554 1.1 mrg return ;
555 1.1 mrg }
556 1.1 mrg }
557 1.1 mrg
558 1.1 mrg /* Final tune... */
559 1.1 mrg if (verbose)
560 1.1 mrg printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
561 1.1 mrg for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
562 1.1 mrg measure[i] = domeasure (threshold, func, pmin+i);
563 1.1 mrg i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
564 1.1 mrg *threshold = pmin + i;
565 1.1 mrg if (verbose)
566 1.1 mrg printf ("%lu\n", *threshold);
567 1.1 mrg return;
568 1.1 mrg }
569 1.1 mrg
570 1.1 mrg /* Tune a function which behavior depends on both p and x,
571 1.1 mrg in a given direction.
572 1.1 mrg It assumes that for (x,p) close to zero, algo1 is used
573 1.1 mrg and algo2 is used when (x,p) is far from zero.
574 1.1 mrg If algo2 is better for low prec, and algo1 better for high prec,
575 1.1.1.2.4.1 christos the behavior of this function is undefined.
576 1.1 mrg This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
577 1.1 mrg until it finds a point on the boundary. It returns ell.
578 1.1 mrg */
579 1.1 mrg static void
580 1.1 mrg tune_simple_func_in_some_direction (long int *threshold1,
581 1.1 mrg long int *threshold2,
582 1.1 mrg long int *threshold3,
583 1.1 mrg double (*func) (struct speed_params *),
584 1.1 mrg mpfr_prec_t pstart,
585 1.1 mrg int dirx, int dirp,
586 1.1 mrg mpfr_t xres, mpfr_prec_t *pres)
587 1.1 mrg {
588 1.1 mrg double measure[THRESHOLD_FINAL_WINDOW+1];
589 1.1 mrg double d;
590 1.1 mrg mpfr_prec_t pstep;
591 1.1 mrg int i, numpos, numneg, try;
592 1.1 mrg mpfr_prec_t pmin, pmax, p;
593 1.1 mrg mpfr_t xmin, xmax, x;
594 1.1 mrg mpfr_t ratio;
595 1.1 mrg
596 1.1 mrg mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
597 1.1 mrg mpfr_set_si (ratio, dirx, MPFR_RNDN);
598 1.1 mrg mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
599 1.1 mrg
600 1.1 mrg mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
601 1.1 mrg mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
602 1.1 mrg mpfr_init2 (x, MPFR_SMALL_PRECISION);
603 1.1 mrg
604 1.1 mrg /* first look for a lower bound within 10% */
605 1.1 mrg pmin = p = pstart;
606 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
607 1.1 mrg mpfr_set (x, xmin, MPFR_RNDN);
608 1.1 mrg
609 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
610 1.1 mrg if (d < 0.0)
611 1.1 mrg {
612 1.1 mrg if (verbose)
613 1.1 mrg printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
614 1.1 mrg (unsigned long) pmin);
615 1.1 mrg *pres = MPFR_PREC_MIN;
616 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
617 1.1 mrg mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
618 1.1 mrg return;
619 1.1 mrg }
620 1.1 mrg if (d >= 1.00)
621 1.1 mrg for (;;)
622 1.1 mrg {
623 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
624 1.1 mrg if (d < 1.00)
625 1.1 mrg break;
626 1.1 mrg p = pmin;
627 1.1 mrg mpfr_set (x, xmin, MPFR_RNDN);
628 1.1 mrg pmin += pmin/2;
629 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
630 1.1 mrg }
631 1.1 mrg pmin = p;
632 1.1 mrg mpfr_set (xmin, x, MPFR_RNDN);
633 1.1 mrg for (;;)
634 1.1 mrg {
635 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
636 1.1 mrg if (d < 0.10)
637 1.1 mrg break;
638 1.1 mrg pmin += GMP_NUMB_BITS;
639 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
640 1.1 mrg }
641 1.1 mrg
642 1.1 mrg /* then look for an upper bound within 20% */
643 1.1 mrg pmax = pmin * 2;
644 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
645 1.1 mrg for (;;)
646 1.1 mrg {
647 1.1 mrg d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
648 1.1 mrg if (d < -0.20)
649 1.1 mrg break;
650 1.1 mrg pmax += pmin / 2; /* don't increase too rapidly */
651 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
652 1.1 mrg }
653 1.1 mrg
654 1.1 mrg /* The threshold is between pmin and pmax. Affine them */
655 1.1 mrg try = 0;
656 1.1 mrg while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
657 1.1 mrg {
658 1.1 mrg pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
659 1.1 mrg if (verbose)
660 1.1 mrg printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
661 1.1 mrg p = (pmin + pmax) / 2;
662 1.1 mrg mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
663 1.1 mrg for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
664 1.1 mrg {
665 1.1 mrg *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
666 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
667 1.1 mrg measure[i] = domeasure2 (threshold1, threshold2, threshold3,
668 1.1 mrg func, *pres, xres);
669 1.1 mrg if (measure[i] > 0)
670 1.1 mrg numpos ++;
671 1.1 mrg else if (measure[i] < 0)
672 1.1 mrg numneg ++;
673 1.1 mrg }
674 1.1 mrg if (numpos > numneg)
675 1.1 mrg {
676 1.1 mrg /* We use more often algo 1 than algo 2 */
677 1.1 mrg pmin = p - THRESHOLD_WINDOW/2*pstep;
678 1.1 mrg mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
679 1.1 mrg }
680 1.1 mrg else if (numpos < numneg)
681 1.1 mrg {
682 1.1 mrg pmax = p + THRESHOLD_WINDOW/2*pstep;
683 1.1 mrg mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
684 1.1 mrg }
685 1.1 mrg else
686 1.1 mrg /* numpos == numneg ... */
687 1.1 mrg if (++ try > 2)
688 1.1 mrg {
689 1.1 mrg *pres = p;
690 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
691 1.1 mrg if (verbose)
692 1.1 mrg printf ("Quick find: %lu\n", *pres);
693 1.1 mrg mpfr_clear (ratio);
694 1.1 mrg mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
695 1.1 mrg return ;
696 1.1 mrg }
697 1.1 mrg }
698 1.1 mrg
699 1.1 mrg /* Final tune... */
700 1.1 mrg if (verbose)
701 1.1 mrg printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
702 1.1 mrg for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
703 1.1 mrg {
704 1.1 mrg *pres = pmin+i;
705 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
706 1.1 mrg measure[i] = domeasure2 (threshold1, threshold2, threshold3,
707 1.1 mrg func, *pres, xres);
708 1.1 mrg }
709 1.1 mrg i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
710 1.1 mrg *pres = pmin + i;
711 1.1 mrg mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
712 1.1 mrg if (verbose)
713 1.1 mrg printf ("%lu\n", *pres);
714 1.1 mrg mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
715 1.1 mrg return;
716 1.1 mrg }
717 1.1 mrg
718 1.1 mrg /************************************
719 1.1 mrg * Tune Mulders' mulhigh function *
720 1.1 mrg ************************************/
721 1.1 mrg #define TOLERANCE 1.00
722 1.1 mrg #define MULDERS_TABLE_SIZE 1024
723 1.1 mrg #ifndef MPFR_MULHIGH_SIZE
724 1.1 mrg # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
725 1.1 mrg #endif
726 1.1 mrg #ifndef MPFR_SQRHIGH_SIZE
727 1.1 mrg # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
728 1.1 mrg #endif
729 1.1 mrg #ifndef MPFR_DIVHIGH_SIZE
730 1.1 mrg # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
731 1.1 mrg #endif
732 1.1 mrg #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
733 1.1 mrg #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
734 1.1 mrg #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
735 1.1 mrg #include "mulders.c"
736 1.1 mrg
737 1.1 mrg static double
738 1.1 mrg speed_mpfr_mulhigh (struct speed_params *s)
739 1.1 mrg {
740 1.1 mrg SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
741 1.1 mrg }
742 1.1 mrg
743 1.1 mrg static double
744 1.1 mrg speed_mpfr_sqrhigh (struct speed_params *s)
745 1.1 mrg {
746 1.1 mrg SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
747 1.1 mrg }
748 1.1 mrg
749 1.1 mrg static double
750 1.1 mrg speed_mpfr_divhigh (struct speed_params *s)
751 1.1 mrg {
752 1.1 mrg SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
753 1.1 mrg }
754 1.1 mrg
755 1.1 mrg #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
756 1.1 mrg
757 1.1 mrg /* Tune mpfr_mulhigh_n for size n */
758 1.1 mrg static mp_size_t
759 1.1 mrg tune_mul_mulders_upto (mp_size_t n)
760 1.1 mrg {
761 1.1 mrg struct speed_params s;
762 1.1 mrg mp_size_t k, kbest, step;
763 1.1 mrg double t, tbest;
764 1.1 mrg MPFR_TMP_DECL (marker);
765 1.1 mrg
766 1.1 mrg if (n == 0)
767 1.1 mrg return -1;
768 1.1 mrg
769 1.1 mrg MPFR_TMP_MARK (marker);
770 1.1 mrg s.align_xp = s.align_yp = s.align_wp = 64;
771 1.1 mrg s.size = n;
772 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
773 1.1 mrg s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
774 1.1 mrg mpn_random (s.xp, n);
775 1.1 mrg mpn_random (s.yp, n);
776 1.1 mrg
777 1.1 mrg /* Check k == -1, mpn_mul_basecase */
778 1.1 mrg mulhigh_ktab[n] = -1;
779 1.1 mrg kbest = -1;
780 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
781 1.1 mrg
782 1.1 mrg /* Check k == 0, mpn_mulhigh_n_basecase */
783 1.1 mrg mulhigh_ktab[n] = 0;
784 1.1 mrg t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
785 1.1 mrg if (t * TOLERANCE < tbest)
786 1.1 mrg kbest = 0, tbest = t;
787 1.1 mrg
788 1.1 mrg /* Check Mulders with cutoff point k */
789 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
790 1.1 mrg /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
791 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
792 1.1 mrg {
793 1.1 mrg mulhigh_ktab[n] = k;
794 1.1 mrg t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
795 1.1 mrg if (t * TOLERANCE < tbest)
796 1.1 mrg kbest = k, tbest = t;
797 1.1 mrg }
798 1.1 mrg
799 1.1 mrg mulhigh_ktab[n] = kbest;
800 1.1 mrg
801 1.1 mrg MPFR_TMP_FREE (marker);
802 1.1 mrg return kbest;
803 1.1 mrg }
804 1.1 mrg
805 1.1 mrg /* Tune mpfr_sqrhigh_n for size n */
806 1.1 mrg static mp_size_t
807 1.1 mrg tune_sqr_mulders_upto (mp_size_t n)
808 1.1 mrg {
809 1.1 mrg struct speed_params s;
810 1.1 mrg mp_size_t k, kbest, step;
811 1.1 mrg double t, tbest;
812 1.1 mrg MPFR_TMP_DECL (marker);
813 1.1 mrg
814 1.1 mrg if (n == 0)
815 1.1 mrg return -1;
816 1.1 mrg
817 1.1 mrg MPFR_TMP_MARK (marker);
818 1.1 mrg s.align_xp = s.align_wp = 64;
819 1.1 mrg s.size = n;
820 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
821 1.1 mrg mpn_random (s.xp, n);
822 1.1 mrg
823 1.1 mrg /* Check k == -1, mpn_sqr_basecase */
824 1.1 mrg sqrhigh_ktab[n] = -1;
825 1.1 mrg kbest = -1;
826 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
827 1.1 mrg
828 1.1 mrg /* Check k == 0, mpfr_mulhigh_n_basecase */
829 1.1 mrg sqrhigh_ktab[n] = 0;
830 1.1 mrg t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
831 1.1 mrg if (t * TOLERANCE < tbest)
832 1.1 mrg kbest = 0, tbest = t;
833 1.1 mrg
834 1.1 mrg /* Check Mulders */
835 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
836 1.1 mrg /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
837 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
838 1.1 mrg {
839 1.1 mrg sqrhigh_ktab[n] = k;
840 1.1 mrg t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
841 1.1 mrg if (t * TOLERANCE < tbest)
842 1.1 mrg kbest = k, tbest = t;
843 1.1 mrg }
844 1.1 mrg
845 1.1 mrg sqrhigh_ktab[n] = kbest;
846 1.1 mrg
847 1.1 mrg MPFR_TMP_FREE (marker);
848 1.1 mrg return kbest;
849 1.1 mrg }
850 1.1 mrg
851 1.1 mrg /* Tune mpfr_divhigh_n for size n */
852 1.1 mrg static mp_size_t
853 1.1 mrg tune_div_mulders_upto (mp_size_t n)
854 1.1 mrg {
855 1.1 mrg struct speed_params s;
856 1.1 mrg mp_size_t k, kbest, step;
857 1.1 mrg double t, tbest;
858 1.1 mrg MPFR_TMP_DECL (marker);
859 1.1 mrg
860 1.1 mrg if (n == 0)
861 1.1 mrg return 0;
862 1.1 mrg
863 1.1 mrg MPFR_TMP_MARK (marker);
864 1.1 mrg s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
865 1.1 mrg s.size = n;
866 1.1 mrg s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
867 1.1 mrg s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
868 1.1 mrg mpn_random (s.xp, n);
869 1.1 mrg mpn_random (s.yp, n);
870 1.1 mrg
871 1.1 mrg /* Check k == n, i.e., mpn_divrem */
872 1.1 mrg divhigh_ktab[n] = n;
873 1.1 mrg kbest = n;
874 1.1 mrg tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
875 1.1 mrg
876 1.1 mrg /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
877 1.1 mrg #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
878 1.1 mrg if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
879 1.1 mrg #endif
880 1.1 mrg {
881 1.1 mrg divhigh_ktab[n] = 0;
882 1.1 mrg t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
883 1.1 mrg if (t * TOLERANCE < tbest)
884 1.1 mrg kbest = 0, tbest = t;
885 1.1 mrg }
886 1.1 mrg
887 1.1 mrg /* Check Mulders */
888 1.1 mrg step = 1 + n / (2 * MAX_STEPS);
889 1.1 mrg /* we should have (n+3)/2 <= k < n, which translates into
890 1.1 mrg (n+4)/2 <= k < n in C */
891 1.1 mrg for (k = (n + 4) / 2 ; k < n ; k += step)
892 1.1 mrg {
893 1.1 mrg divhigh_ktab[n] = k;
894 1.1 mrg t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
895 1.1 mrg if (t * TOLERANCE < tbest)
896 1.1 mrg kbest = k, tbest = t;
897 1.1 mrg }
898 1.1 mrg
899 1.1 mrg divhigh_ktab[n] = kbest;
900 1.1 mrg
901 1.1 mrg MPFR_TMP_FREE (marker);
902 1.1 mrg
903 1.1 mrg return kbest;
904 1.1 mrg }
905 1.1 mrg
906 1.1 mrg static void
907 1.1 mrg tune_mul_mulders (FILE *f)
908 1.1 mrg {
909 1.1 mrg mp_size_t k;
910 1.1 mrg
911 1.1 mrg if (verbose)
912 1.1 mrg printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
913 1.1 mrg fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
914 1.1 mrg for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
915 1.1 mrg {
916 1.1 mrg fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
917 1.1 mrg if (k != MPFR_MULHIGH_TAB_SIZE-1)
918 1.1 mrg fputc (',', f);
919 1.1 mrg if ((k+1) % 16 == 0)
920 1.1 mrg fprintf (f, " \\\n ");
921 1.1 mrg if (verbose)
922 1.1 mrg putchar ('.');
923 1.1 mrg }
924 1.1 mrg fprintf (f, " \n");
925 1.1 mrg if (verbose)
926 1.1 mrg putchar ('\n');
927 1.1 mrg }
928 1.1 mrg
929 1.1 mrg static void
930 1.1 mrg tune_sqr_mulders (FILE *f)
931 1.1 mrg {
932 1.1 mrg mp_size_t k;
933 1.1 mrg
934 1.1 mrg if (verbose)
935 1.1 mrg printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
936 1.1 mrg fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n ");
937 1.1 mrg for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
938 1.1 mrg {
939 1.1 mrg fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
940 1.1 mrg if (k != MPFR_SQRHIGH_TAB_SIZE-1)
941 1.1 mrg fputc (',', f);
942 1.1 mrg if ((k+1) % 16 == 0)
943 1.1 mrg fprintf (f, " \\\n ");
944 1.1 mrg if (verbose)
945 1.1 mrg putchar ('.');
946 1.1 mrg }
947 1.1 mrg fprintf (f, " \n");
948 1.1 mrg if (verbose)
949 1.1 mrg putchar ('\n');
950 1.1 mrg }
951 1.1 mrg
952 1.1 mrg static void
953 1.1 mrg tune_div_mulders (FILE *f)
954 1.1 mrg {
955 1.1 mrg mp_size_t k;
956 1.1 mrg
957 1.1 mrg if (verbose)
958 1.1 mrg printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
959 1.1 mrg fprintf (f, "#define MPFR_DIVHIGH_TAB \\\n ");
960 1.1 mrg for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
961 1.1 mrg {
962 1.1 mrg fprintf (f, "%d", (int) tune_div_mulders_upto (k));
963 1.1 mrg if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
964 1.1 mrg fputc (',', f);
965 1.1 mrg if ((k+1) % 16 == 0)
966 1.1.1.2.4.1 christos fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
967 1.1 mrg if (verbose)
968 1.1 mrg putchar ('.');
969 1.1 mrg }
970 1.1 mrg fprintf (f, " \n");
971 1.1 mrg if (verbose)
972 1.1 mrg putchar ('\n');
973 1.1 mrg }
974 1.1 mrg
975 1.1 mrg /*******************************************************
976 1.1 mrg * Tuning functions for mpfr_ai *
977 1.1 mrg *******************************************************/
978 1.1 mrg
979 1.1 mrg long int mpfr_ai_threshold1;
980 1.1 mrg long int mpfr_ai_threshold2;
981 1.1 mrg long int mpfr_ai_threshold3;
982 1.1 mrg #undef MPFR_AI_THRESHOLD1
983 1.1 mrg #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
984 1.1 mrg #undef MPFR_AI_THRESHOLD2
985 1.1 mrg #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
986 1.1 mrg #undef MPFR_AI_THRESHOLD3
987 1.1 mrg #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
988 1.1 mrg
989 1.1 mrg #include "ai.c"
990 1.1 mrg
991 1.1 mrg static double
992 1.1 mrg speed_mpfr_ai (struct speed_params *s)
993 1.1 mrg {
994 1.1 mrg SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
995 1.1 mrg }
996 1.1 mrg
997 1.1 mrg
998 1.1 mrg /*******************************************************
999 1.1 mrg * Tune all the threshold of MPFR *
1000 1.1 mrg * Warning: tune the function in their dependent order!*
1001 1.1 mrg *******************************************************/
1002 1.1 mrg static void
1003 1.1 mrg all (const char *filename)
1004 1.1 mrg {
1005 1.1 mrg FILE *f;
1006 1.1 mrg time_t start_time, end_time;
1007 1.1 mrg struct tm *tp;
1008 1.1 mrg mpfr_t x1, x2, x3, tmp1, tmp2;
1009 1.1 mrg mpfr_prec_t p1, p2, p3;
1010 1.1 mrg
1011 1.1 mrg f = fopen (filename, "w");
1012 1.1 mrg if (f == NULL)
1013 1.1 mrg {
1014 1.1 mrg fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
1015 1.1 mrg abort ();
1016 1.1 mrg }
1017 1.1 mrg
1018 1.1 mrg speed_time_init ();
1019 1.1 mrg if (verbose)
1020 1.1 mrg {
1021 1.1 mrg printf ("Using: %s\n", speed_time_string);
1022 1.1 mrg printf ("speed_precision %d", speed_precision);
1023 1.1 mrg if (speed_unittime == 1.0)
1024 1.1 mrg printf (", speed_unittime 1 cycle");
1025 1.1 mrg else
1026 1.1 mrg printf (", speed_unittime %.2e secs", speed_unittime);
1027 1.1 mrg if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1028 1.1 mrg printf (", CPU freq unknown\n");
1029 1.1 mrg else
1030 1.1 mrg printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
1031 1.1 mrg }
1032 1.1 mrg
1033 1.1 mrg time (&start_time);
1034 1.1 mrg tp = localtime (&start_time);
1035 1.1 mrg fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
1036 1.1 mrg tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
1037 1.1 mrg
1038 1.1 mrg #ifdef __ICC
1039 1.1 mrg fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
1040 1.1 mrg #elif defined(__GNUC__)
1041 1.1 mrg #ifdef __GNUC_PATCHLEVEL__
1042 1.1 mrg fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
1043 1.1 mrg __GNUC_PATCHLEVEL__);
1044 1.1 mrg #else
1045 1.1 mrg fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
1046 1.1 mrg #endif
1047 1.1 mrg #elif defined (__SUNPRO_C)
1048 1.1 mrg fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
1049 1.1 mrg #elif defined (__sgi) && defined (_COMPILER_VERSION)
1050 1.1 mrg fprintf (f, "MIPSpro C %d.%d.%d */\n",
1051 1.1 mrg _COMPILER_VERSION / 100,
1052 1.1 mrg _COMPILER_VERSION / 10 % 10,
1053 1.1 mrg _COMPILER_VERSION % 10);
1054 1.1 mrg #elif defined (__DECC) && defined (__DECC_VER)
1055 1.1 mrg fprintf (f, "DEC C %d */\n", __DECC_VER);
1056 1.1 mrg #else
1057 1.1 mrg fprintf (f, "system compiler */\n");
1058 1.1 mrg #endif
1059 1.1 mrg fprintf (f, "\n\n");
1060 1.1 mrg fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
1061 1.1 mrg fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
1062 1.1 mrg fprintf (f, "#endif\n\n");
1063 1.1 mrg
1064 1.1 mrg /* Tune mulhigh */
1065 1.1 mrg tune_mul_mulders (f);
1066 1.1 mrg
1067 1.1 mrg /* Tune sqrhigh */
1068 1.1 mrg tune_sqr_mulders (f);
1069 1.1 mrg
1070 1.1 mrg /* Tune divhigh */
1071 1.1 mrg tune_div_mulders (f);
1072 1.1 mrg fflush (f);
1073 1.1 mrg
1074 1.1 mrg /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
1075 1.1 mrg if (verbose)
1076 1.1 mrg printf ("Tuning mpfr_mul...\n");
1077 1.1 mrg tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
1078 1.1 mrg 2*GMP_NUMB_BITS+1);
1079 1.1 mrg fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
1080 1.1 mrg (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
1081 1.1 mrg
1082 1.1 mrg /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
1083 1.1 mrg if (verbose)
1084 1.1 mrg printf ("Tuning mpfr_sqr...\n");
1085 1.1 mrg tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
1086 1.1 mrg 2*GMP_NUMB_BITS+1);
1087 1.1 mrg fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
1088 1.1 mrg (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
1089 1.1 mrg
1090 1.1 mrg /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
1091 1.1 mrg if (verbose)
1092 1.1 mrg printf ("Tuning mpfr_div...\n");
1093 1.1 mrg tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
1094 1.1 mrg 2*GMP_NUMB_BITS+1);
1095 1.1 mrg fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
1096 1.1 mrg (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
1097 1.1 mrg
1098 1.1 mrg /* Tune mpfr_exp_2 */
1099 1.1 mrg if (verbose)
1100 1.1 mrg printf ("Tuning mpfr_exp_2...\n");
1101 1.1 mrg tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
1102 1.1 mrg fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
1103 1.1 mrg (unsigned long) mpfr_exp_2_threshold);
1104 1.1 mrg
1105 1.1 mrg /* Tune mpfr_exp */
1106 1.1 mrg if (verbose)
1107 1.1 mrg printf ("Tuning mpfr_exp...\n");
1108 1.1 mrg tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
1109 1.1 mrg MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1110 1.1 mrg fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
1111 1.1 mrg (unsigned long) mpfr_exp_threshold);
1112 1.1 mrg
1113 1.1 mrg /* Tune mpfr_sin_cos */
1114 1.1 mrg if (verbose)
1115 1.1 mrg printf ("Tuning mpfr_sin_cos...\n");
1116 1.1 mrg tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
1117 1.1 mrg MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1118 1.1 mrg fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
1119 1.1 mrg (unsigned long) mpfr_sincos_threshold);
1120 1.1 mrg
1121 1.1 mrg /* Tune mpfr_ai */
1122 1.1 mrg if (verbose)
1123 1.1 mrg printf ("Tuning mpfr_ai...\n");
1124 1.1 mrg mpfr_init2 (x1, MPFR_SMALL_PRECISION);
1125 1.1 mrg mpfr_init2 (x2, MPFR_SMALL_PRECISION);
1126 1.1 mrg mpfr_init2 (x3, MPFR_SMALL_PRECISION);
1127 1.1 mrg mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
1128 1.1 mrg mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
1129 1.1 mrg
1130 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1131 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1132 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1133 1.1 mrg -60, 200, x1, &p1);
1134 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1135 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1136 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1137 1.1 mrg -20, 500, x2, &p2);
1138 1.1 mrg tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1139 1.1 mrg &mpfr_ai_threshold3, speed_mpfr_ai,
1140 1.1 mrg MPFR_PREC_MIN+GMP_NUMB_BITS,
1141 1.1 mrg 40, 200, x3, &p3);
1142 1.1 mrg
1143 1.1 mrg mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
1144 1.1 mrg mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
1145 1.1 mrg mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
1146 1.1 mrg mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
1147 1.1 mrg
1148 1.1 mrg mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
1149 1.1 mrg mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
1150 1.1 mrg mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1151 1.1 mrg mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
1152 1.1 mrg
1153 1.1 mrg mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
1154 1.1 mrg mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1155 1.1 mrg mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
1156 1.1 mrg
1157 1.1 mrg mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
1158 1.1 mrg mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
1159 1.1 mrg mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
1160 1.1 mrg mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
1161 1.1 mrg mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
1162 1.1 mrg
1163 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
1164 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
1165 1.1 mrg fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
1166 1.1 mrg
1167 1.1 mrg mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
1168 1.1 mrg mpfr_clear (tmp1); mpfr_clear (tmp2);
1169 1.1 mrg
1170 1.1 mrg /* End of tuning */
1171 1.1 mrg time (&end_time);
1172 1.1 mrg fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
1173 1.1 mrg (long) (end_time - start_time));
1174 1.1 mrg if (verbose)
1175 1.1 mrg printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
1176 1.1 mrg
1177 1.1 mrg fclose (f);
1178 1.1 mrg }
1179 1.1 mrg
1180 1.1 mrg
1181 1.1 mrg /* Main function */
1182 1.1 mrg int main (int argc, char *argv[])
1183 1.1 mrg {
1184 1.1 mrg /* Unbuffered so if output is redirected to a file it isn't lost if the
1185 1.1 mrg program is killed part way through. */
1186 1.1 mrg setbuf (stdout, NULL);
1187 1.1 mrg setbuf (stderr, NULL);
1188 1.1 mrg
1189 1.1 mrg verbose = argc > 1;
1190 1.1 mrg
1191 1.1 mrg if (verbose)
1192 1.1 mrg printf ("Tuning MPFR (Coffee time?)...\n");
1193 1.1 mrg
1194 1.1 mrg all ("mparam.h");
1195 1.1 mrg
1196 1.1 mrg return 0;
1197 1.1 mrg }
1198