1 1.1 mrg /* Speed measuring program. 2 1.1 mrg 3 1.1.1.4 mrg Copyright 1999-2003, 2005, 2006, 2008-2019 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU MP Library. 6 1.1 mrg 7 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.3 mrg it under the terms of either: 9 1.1.1.3 mrg 10 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 11 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 12 1.1.1.3 mrg option) any later version. 13 1.1.1.3 mrg 14 1.1.1.3 mrg or 15 1.1.1.3 mrg 16 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 17 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.3 mrg later version. 19 1.1.1.3 mrg 20 1.1.1.3 mrg or both in parallel, as here. 21 1.1 mrg 22 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.3 mrg for more details. 26 1.1 mrg 27 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 28 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 30 1.1 mrg 31 1.1 mrg /* Usage message is in the code below, run with no arguments to print it. 32 1.1 mrg See README for interesting applications. 33 1.1 mrg 34 1.1 mrg To add a new routine foo(), create a speed_foo() function in the style of 35 1.1 mrg the existing ones and add an entry in the routine[] array. Put FLAG_R if 36 1.1 mrg speed_foo() wants an "r" parameter. 37 1.1 mrg 38 1.1 mrg The routines don't have help messages or descriptions, but most have 39 1.1.1.5 mrg suggestive names. See the source code for full details. */ 40 1.1 mrg 41 1.1 mrg #include "config.h" 42 1.1 mrg 43 1.1 mrg #include <limits.h> 44 1.1 mrg #include <stdio.h> 45 1.1 mrg #include <stdlib.h> 46 1.1 mrg #include <string.h> 47 1.1 mrg 48 1.1 mrg #if HAVE_UNISTD_H 49 1.1 mrg #include <unistd.h> /* for getpid, R_OK */ 50 1.1 mrg #endif 51 1.1 mrg 52 1.1 mrg #if TIME_WITH_SYS_TIME 53 1.1 mrg # include <sys/time.h> /* for struct timeval */ 54 1.1 mrg # include <time.h> 55 1.1 mrg #else 56 1.1 mrg # if HAVE_SYS_TIME_H 57 1.1 mrg # include <sys/time.h> 58 1.1 mrg # else 59 1.1 mrg # include <time.h> 60 1.1 mrg # endif 61 1.1 mrg #endif 62 1.1 mrg 63 1.1 mrg #if HAVE_SYS_RESOURCE_H 64 1.1 mrg #include <sys/resource.h> /* for getrusage() */ 65 1.1 mrg #endif 66 1.1 mrg 67 1.1 mrg 68 1.1 mrg #include "gmp-impl.h" 69 1.1 mrg #include "longlong.h" /* for the benefit of speed-many.c */ 70 1.1 mrg #include "tests.h" 71 1.1 mrg #include "speed.h" 72 1.1 mrg 73 1.1 mrg 74 1.1 mrg #if !HAVE_DECL_OPTARG 75 1.1 mrg extern char *optarg; 76 1.1 mrg extern int optind, opterr; 77 1.1 mrg #endif 78 1.1 mrg 79 1.1 mrg #if !HAVE_STRTOUL 80 1.1 mrg #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 81 1.1 mrg #endif 82 1.1 mrg 83 1.1 mrg #ifdef SPEED_EXTRA_PROTOS 84 1.1 mrg SPEED_EXTRA_PROTOS 85 1.1 mrg #endif 86 1.1 mrg #ifdef SPEED_EXTRA_PROTOS2 87 1.1 mrg SPEED_EXTRA_PROTOS2 88 1.1 mrg #endif 89 1.1 mrg 90 1.1 mrg 91 1.1 mrg #if GMP_LIMB_BITS == 32 92 1.1 mrg #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 93 1.1 mrg #endif 94 1.1 mrg #if GMP_LIMB_BITS == 64 95 1.1 mrg #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 96 1.1 mrg #endif 97 1.1 mrg 98 1.1 mrg 99 1.1 mrg #define CMP_ABSOLUTE 1 100 1.1 mrg #define CMP_RATIO 2 101 1.1 mrg #define CMP_DIFFERENCE 3 102 1.1 mrg #define CMP_DIFFPREV 4 103 1.1 mrg int option_cmp = CMP_ABSOLUTE; 104 1.1 mrg 105 1.1 mrg #define UNIT_SECONDS 1 106 1.1 mrg #define UNIT_CYCLES 2 107 1.1 mrg #define UNIT_CYCLESPERLIMB 3 108 1.1 mrg int option_unit = UNIT_SECONDS; 109 1.1 mrg 110 1.1 mrg #define DATA_RANDOM 1 111 1.1 mrg #define DATA_RANDOM2 2 112 1.1 mrg #define DATA_ZEROS 3 113 1.1 mrg #define DATA_AAS 4 114 1.1 mrg #define DATA_FFS 5 115 1.1 mrg #define DATA_2FD 6 116 1.1 mrg int option_data = DATA_RANDOM; 117 1.1 mrg 118 1.1 mrg int option_square = 0; 119 1.1 mrg double option_factor = 0.0; 120 1.1 mrg mp_size_t option_step = 1; 121 1.1 mrg int option_gnuplot = 0; 122 1.1 mrg char *option_gnuplot_basename; 123 1.1 mrg struct size_array_t { 124 1.1 mrg mp_size_t start, end; 125 1.1 mrg } *size_array = NULL; 126 1.1 mrg mp_size_t size_num = 0; 127 1.1 mrg mp_size_t size_allocnum = 0; 128 1.1 mrg int option_resource_usage = 0; 129 1.1 mrg long option_seed = 123456789; 130 1.1 mrg 131 1.1 mrg struct speed_params sp; 132 1.1 mrg 133 1.1 mrg #define COLUMN_WIDTH 13 /* for the free-form output */ 134 1.1 mrg 135 1.1 mrg #define FLAG_R (1<<0) /* require ".r" */ 136 1.1 mrg #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 137 1.1 mrg #define FLAG_RSIZE (1<<2) 138 1.1 mrg #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 139 1.1 mrg 140 1.1 mrg const struct routine_t { 141 1.1 mrg /* constants */ 142 1.1 mrg const char *name; 143 1.1 mrg speed_function_t fun; 144 1.1 mrg int flag; 145 1.1 mrg } routine[] = { 146 1.1 mrg 147 1.1 mrg { "noop", speed_noop }, 148 1.1 mrg { "noop_wxs", speed_noop_wxs }, 149 1.1 mrg { "noop_wxys", speed_noop_wxys }, 150 1.1 mrg 151 1.1 mrg { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 152 1.1 mrg { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 153 1.1.1.3 mrg { "mpn_add_1", speed_mpn_add_1, FLAG_R }, 154 1.1.1.3 mrg { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R }, 155 1.1.1.3 mrg { "mpn_sub_1", speed_mpn_sub_1, FLAG_R }, 156 1.1.1.3 mrg { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R }, 157 1.1 mrg 158 1.1.1.2 mrg { "mpn_add_err1_n", speed_mpn_add_err1_n }, 159 1.1.1.2 mrg { "mpn_add_err2_n", speed_mpn_add_err2_n }, 160 1.1.1.2 mrg { "mpn_add_err3_n", speed_mpn_add_err3_n }, 161 1.1.1.2 mrg { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, 162 1.1.1.2 mrg { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, 163 1.1.1.2 mrg { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, 164 1.1.1.2 mrg 165 1.1 mrg #if HAVE_NATIVE_mpn_add_n_sub_n 166 1.1 mrg { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 167 1.1 mrg #endif 168 1.1 mrg 169 1.1 mrg { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 170 1.1 mrg { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 171 1.1 mrg #if HAVE_NATIVE_mpn_addmul_2 172 1.1 mrg { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 173 1.1 mrg #endif 174 1.1 mrg #if HAVE_NATIVE_mpn_addmul_3 175 1.1 mrg { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 176 1.1 mrg #endif 177 1.1 mrg #if HAVE_NATIVE_mpn_addmul_4 178 1.1 mrg { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 179 1.1 mrg #endif 180 1.1 mrg #if HAVE_NATIVE_mpn_addmul_5 181 1.1 mrg { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 182 1.1 mrg #endif 183 1.1 mrg #if HAVE_NATIVE_mpn_addmul_6 184 1.1 mrg { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 185 1.1 mrg #endif 186 1.1 mrg #if HAVE_NATIVE_mpn_addmul_7 187 1.1 mrg { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 188 1.1 mrg #endif 189 1.1 mrg #if HAVE_NATIVE_mpn_addmul_8 190 1.1 mrg { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 191 1.1 mrg #endif 192 1.1 mrg { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 193 1.1 mrg { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 194 1.1 mrg #if HAVE_NATIVE_mpn_mul_2 195 1.1 mrg { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 196 1.1 mrg #endif 197 1.1 mrg #if HAVE_NATIVE_mpn_mul_3 198 1.1 mrg { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 199 1.1 mrg #endif 200 1.1 mrg #if HAVE_NATIVE_mpn_mul_4 201 1.1 mrg { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 202 1.1 mrg #endif 203 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_5 204 1.1.1.2 mrg { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, 205 1.1.1.2 mrg #endif 206 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_6 207 1.1.1.2 mrg { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, 208 1.1.1.2 mrg #endif 209 1.1 mrg 210 1.1 mrg { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 211 1.1 mrg { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 212 1.1 mrg #if HAVE_NATIVE_mpn_divrem_1c 213 1.1 mrg { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 214 1.1 mrg { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 215 1.1 mrg #endif 216 1.1.1.2 mrg { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, 217 1.1 mrg #if HAVE_NATIVE_mpn_mod_1c 218 1.1.1.2 mrg { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, 219 1.1 mrg #endif 220 1.1 mrg { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 221 1.1 mrg { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 222 1.1 mrg { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 223 1.1 mrg 224 1.1.1.2 mrg { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, 225 1.1.1.2 mrg { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, 226 1.1.1.2 mrg { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, 227 1.1.1.2 mrg { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, 228 1.1.1.2 mrg { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, 229 1.1.1.2 mrg { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, 230 1.1 mrg 231 1.1 mrg { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 232 1.1 mrg { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 233 1.1 mrg { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 234 1.1 mrg { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 235 1.1 mrg { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 236 1.1 mrg { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 237 1.1 mrg 238 1.1 mrg { "mpn_divrem_2", speed_mpn_divrem_2, }, 239 1.1 mrg { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 240 1.1 mrg { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 241 1.1 mrg 242 1.1.1.3 mrg { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R }, 243 1.1.1.3 mrg { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R }, 244 1.1.1.3 mrg { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R }, 245 1.1.1.3 mrg { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R }, 246 1.1.1.3 mrg 247 1.1.1.2 mrg { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, 248 1.1.1.2 mrg { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, 249 1.1.1.2 mrg 250 1.1 mrg { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 251 1.1 mrg { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 252 1.1 mrg 253 1.1.1.2 mrg { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, 254 1.1 mrg { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 255 1.1 mrg { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 256 1.1 mrg 257 1.1 mrg #if HAVE_NATIVE_mpn_modexact_1_odd 258 1.1 mrg { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 259 1.1 mrg #endif 260 1.1 mrg { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 261 1.1 mrg 262 1.1 mrg #if GMP_NUMB_BITS % 4 == 0 263 1.1 mrg { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 264 1.1 mrg #endif 265 1.1 mrg 266 1.1 mrg { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 267 1.1 mrg { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 268 1.1 mrg { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 269 1.1 mrg 270 1.1 mrg { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 271 1.1 mrg { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 272 1.1 mrg { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 273 1.1 mrg { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 274 1.1 mrg { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 275 1.1 mrg { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 276 1.1 mrg { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 277 1.1 mrg { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 278 1.1 mrg { "mpn_com", speed_mpn_com }, 279 1.1.1.3 mrg { "mpn_neg", speed_mpn_neg }, 280 1.1 mrg 281 1.1 mrg { "mpn_popcount", speed_mpn_popcount }, 282 1.1 mrg { "mpn_hamdist", speed_mpn_hamdist }, 283 1.1 mrg 284 1.1 mrg { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 285 1.1 mrg 286 1.1.1.4 mrg { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, 287 1.1.1.4 mrg { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, 288 1.1.1.4 mrg { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, 289 1.1.1.4 mrg { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA }, 290 1.1.1.4 mrg { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA }, 291 1.1.1.4 mrg { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA }, 292 1.1 mrg { "mpn_hgcd", speed_mpn_hgcd }, 293 1.1 mrg { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 294 1.1.1.2 mrg { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, 295 1.1.1.2 mrg { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, 296 1.1.1.2 mrg 297 1.1.1.2 mrg { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, 298 1.1.1.2 mrg { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, 299 1.1.1.2 mrg { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, 300 1.1 mrg 301 1.1 mrg { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 302 1.1.1.4 mrg { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL }, 303 1.1 mrg { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 304 1.1.1.4 mrg { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL }, 305 1.1 mrg 306 1.1 mrg { "mpn_gcd", speed_mpn_gcd }, 307 1.1 mrg 308 1.1 mrg { "mpn_gcdext", speed_mpn_gcdext }, 309 1.1 mrg { "mpn_gcdext_single", speed_mpn_gcdext_single }, 310 1.1 mrg { "mpn_gcdext_double", speed_mpn_gcdext_double }, 311 1.1 mrg { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 312 1.1 mrg { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 313 1.1 mrg #if 0 314 1.1 mrg { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 315 1.1 mrg #endif 316 1.1.1.4 mrg 317 1.1.1.4 mrg { "mpz_nextprime", speed_mpz_nextprime }, 318 1.1.1.4 mrg 319 1.1 mrg { "mpz_jacobi", speed_mpz_jacobi }, 320 1.1 mrg { "mpn_jacobi_base", speed_mpn_jacobi_base }, 321 1.1 mrg { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 322 1.1 mrg { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 323 1.1 mrg { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 324 1.1.1.2 mrg { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, 325 1.1 mrg 326 1.1 mrg { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 327 1.1 mrg { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 328 1.1 mrg { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 329 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal 330 1.1 mrg { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 331 1.1 mrg #endif 332 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 333 1.1.1.2 mrg { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, 334 1.1.1.2 mrg #endif 335 1.1 mrg 336 1.1 mrg { "mpn_mul_n", speed_mpn_mul_n }, 337 1.1 mrg { "mpn_sqr", speed_mpn_sqr }, 338 1.1 mrg 339 1.1 mrg { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 340 1.1 mrg { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 341 1.1 mrg { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 342 1.1 mrg { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 343 1.1 mrg { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 344 1.1 mrg { "mpn_toom22_mul", speed_mpn_toom22_mul }, 345 1.1 mrg { "mpn_toom33_mul", speed_mpn_toom33_mul }, 346 1.1 mrg { "mpn_toom44_mul", speed_mpn_toom44_mul }, 347 1.1 mrg { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 348 1.1 mrg { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 349 1.1 mrg { "mpn_toom32_mul", speed_mpn_toom32_mul }, 350 1.1 mrg { "mpn_toom42_mul", speed_mpn_toom42_mul }, 351 1.1 mrg { "mpn_toom43_mul", speed_mpn_toom43_mul }, 352 1.1 mrg { "mpn_toom63_mul", speed_mpn_toom63_mul }, 353 1.1 mrg { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 354 1.1 mrg { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 355 1.1 mrg #if WANT_OLD_FFT_FULL 356 1.1 mrg { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 357 1.1 mrg { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 358 1.1 mrg #endif 359 1.1 mrg { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 360 1.1 mrg { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 361 1.1 mrg 362 1.1.1.3 mrg { "mpn_sqrlo", speed_mpn_sqrlo }, 363 1.1.1.3 mrg { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase }, 364 1.1 mrg { "mpn_mullo_n", speed_mpn_mullo_n }, 365 1.1 mrg { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 366 1.1 mrg 367 1.1.1.2 mrg { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, 368 1.1.1.2 mrg { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, 369 1.1.1.2 mrg { "mpn_mulmid_n", speed_mpn_mulmid_n }, 370 1.1.1.2 mrg { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, 371 1.1.1.2 mrg 372 1.1 mrg { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 373 1.1 mrg { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 374 1.1 mrg { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 375 1.1 mrg { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 376 1.1 mrg 377 1.1 mrg { "mpn_invert", speed_mpn_invert }, 378 1.1 mrg { "mpn_invertappr", speed_mpn_invertappr }, 379 1.1 mrg { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 380 1.1 mrg { "mpn_binvert", speed_mpn_binvert }, 381 1.1.1.3 mrg { "mpn_sec_invert", speed_mpn_sec_invert }, 382 1.1 mrg 383 1.1 mrg { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 384 1.1 mrg { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 385 1.1 mrg { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 386 1.1 mrg { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 387 1.1 mrg { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 388 1.1 mrg { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 389 1.1 mrg 390 1.1 mrg { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 391 1.1 mrg { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 392 1.1 mrg { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 393 1.1 mrg { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 394 1.1.1.4 mrg { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r }, 395 1.1 mrg 396 1.1.1.2 mrg { "mpn_broot", speed_mpn_broot, FLAG_R }, 397 1.1.1.2 mrg { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, 398 1.1.1.2 mrg { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, 399 1.1.1.2 mrg 400 1.1 mrg { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 401 1.1 mrg { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 402 1.1 mrg { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 403 1.1 mrg 404 1.1 mrg { "mpn_sqrtrem", speed_mpn_sqrtrem }, 405 1.1 mrg { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 406 1.1.1.3 mrg { "mpn_sqrt", speed_mpn_sqrt }, 407 1.1.1.3 mrg { "mpn_root", speed_mpn_root, FLAG_R }, 408 1.1 mrg 409 1.1.1.4 mrg { "mpn_perfect_power_p", speed_mpn_perfect_power_p, }, 410 1.1.1.4 mrg { "mpn_perfect_square_p", speed_mpn_perfect_square_p, }, 411 1.1.1.4 mrg 412 1.1 mrg { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 413 1.1 mrg { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 414 1.1 mrg { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 415 1.1 mrg { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 416 1.1 mrg { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 417 1.1 mrg 418 1.1 mrg { "mpz_add", speed_mpz_add }, 419 1.1.1.4 mrg { "mpz_invert", speed_mpz_invert, FLAG_R_OPTIONAL }, 420 1.1 mrg { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 421 1.1.1.2 mrg { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, 422 1.1 mrg { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 423 1.1.1.3 mrg { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA }, 424 1.1.1.4 mrg { "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 425 1.1.1.4 mrg { "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA }, 426 1.1.1.4 mrg { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL }, 427 1.1 mrg { "mpz_powm_mod", speed_mpz_powm_mod }, 428 1.1 mrg { "mpz_powm_redc", speed_mpz_powm_redc }, 429 1.1.1.2 mrg { "mpz_powm_sec", speed_mpz_powm_sec }, 430 1.1 mrg { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 431 1.1 mrg 432 1.1 mrg { "mpz_mod", speed_mpz_mod }, 433 1.1 mrg { "mpn_redc_1", speed_mpn_redc_1 }, 434 1.1 mrg { "mpn_redc_2", speed_mpn_redc_2 }, 435 1.1 mrg { "mpn_redc_n", speed_mpn_redc_n }, 436 1.1 mrg 437 1.1 mrg { "MPN_COPY", speed_MPN_COPY }, 438 1.1 mrg { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 439 1.1 mrg { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 440 1.1 mrg { "memcpy", speed_memcpy }, 441 1.1 mrg #if HAVE_NATIVE_mpn_copyi 442 1.1 mrg { "mpn_copyi", speed_mpn_copyi }, 443 1.1 mrg #endif 444 1.1 mrg #if HAVE_NATIVE_mpn_copyd 445 1.1 mrg { "mpn_copyd", speed_mpn_copyd }, 446 1.1 mrg #endif 447 1.1.1.3 mrg { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL }, 448 1.1.1.3 mrg #if HAVE_NATIVE_mpn_addlsh1_n == 1 449 1.1.1.2 mrg { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, 450 1.1 mrg #endif 451 1.1.1.3 mrg #if HAVE_NATIVE_mpn_sublsh1_n == 1 452 1.1.1.2 mrg { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, 453 1.1.1.2 mrg #endif 454 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip1 455 1.1.1.2 mrg { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, 456 1.1.1.2 mrg #endif 457 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip2 458 1.1.1.2 mrg { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, 459 1.1.1.2 mrg #endif 460 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sublsh1_n_ip1 461 1.1.1.2 mrg { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, 462 1.1 mrg #endif 463 1.1.1.3 mrg #if HAVE_NATIVE_mpn_rsblsh1_n == 1 464 1.1.1.2 mrg { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, 465 1.1 mrg #endif 466 1.1.1.3 mrg #if HAVE_NATIVE_mpn_addlsh2_n == 1 467 1.1.1.2 mrg { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, 468 1.1 mrg #endif 469 1.1.1.3 mrg #if HAVE_NATIVE_mpn_sublsh2_n == 1 470 1.1.1.2 mrg { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, 471 1.1.1.2 mrg #endif 472 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh2_n_ip1 473 1.1.1.2 mrg { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, 474 1.1.1.2 mrg #endif 475 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh2_n_ip2 476 1.1.1.2 mrg { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, 477 1.1.1.2 mrg #endif 478 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sublsh2_n_ip1 479 1.1.1.2 mrg { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, 480 1.1 mrg #endif 481 1.1.1.3 mrg #if HAVE_NATIVE_mpn_rsblsh2_n == 1 482 1.1.1.2 mrg { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, 483 1.1.1.2 mrg #endif 484 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh_n 485 1.1.1.2 mrg { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, 486 1.1.1.2 mrg #endif 487 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sublsh_n 488 1.1.1.2 mrg { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, 489 1.1.1.2 mrg #endif 490 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh_n_ip1 491 1.1.1.2 mrg { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, 492 1.1.1.2 mrg #endif 493 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh_n_ip2 494 1.1.1.2 mrg { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, 495 1.1.1.2 mrg #endif 496 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sublsh_n_ip1 497 1.1.1.2 mrg { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, 498 1.1.1.2 mrg #endif 499 1.1.1.2 mrg #if HAVE_NATIVE_mpn_rsblsh_n 500 1.1.1.2 mrg { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, 501 1.1 mrg #endif 502 1.1 mrg #if HAVE_NATIVE_mpn_rsh1add_n 503 1.1.1.2 mrg { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, 504 1.1 mrg #endif 505 1.1 mrg #if HAVE_NATIVE_mpn_rsh1sub_n 506 1.1.1.2 mrg { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, 507 1.1 mrg #endif 508 1.1 mrg 509 1.1.1.3 mrg { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL }, 510 1.1.1.3 mrg { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL }, 511 1.1.1.2 mrg 512 1.1 mrg { "MPN_ZERO", speed_MPN_ZERO }, 513 1.1 mrg 514 1.1 mrg { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 515 1.1 mrg { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 516 1.1 mrg { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 517 1.1 mrg { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 518 1.1 mrg { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 519 1.1 mrg 520 1.1 mrg { "malloc_free", speed_malloc_free }, 521 1.1 mrg { "malloc_realloc_free", speed_malloc_realloc_free }, 522 1.1 mrg { "gmp_allocate_free", speed_gmp_allocate_free }, 523 1.1 mrg { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 524 1.1 mrg { "mpz_init_clear", speed_mpz_init_clear }, 525 1.1 mrg { "mpq_init_clear", speed_mpq_init_clear }, 526 1.1 mrg { "mpf_init_clear", speed_mpf_init_clear }, 527 1.1 mrg { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 528 1.1 mrg 529 1.1 mrg { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 530 1.1 mrg #if HAVE_NATIVE_mpn_umul_ppmm 531 1.1 mrg { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 532 1.1 mrg #endif 533 1.1 mrg #if HAVE_NATIVE_mpn_umul_ppmm_r 534 1.1 mrg { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 535 1.1 mrg #endif 536 1.1 mrg 537 1.1 mrg { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 538 1.1 mrg { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 539 1.1 mrg 540 1.1 mrg { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 541 1.1 mrg { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 542 1.1 mrg #if HAVE_NATIVE_mpn_udiv_qrnnd 543 1.1 mrg { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 544 1.1 mrg #endif 545 1.1 mrg #if HAVE_NATIVE_mpn_udiv_qrnnd_r 546 1.1 mrg { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 547 1.1 mrg #endif 548 1.1 mrg { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 549 1.1 mrg 550 1.1 mrg { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 551 1.1 mrg { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 552 1.1 mrg 553 1.1 mrg { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 554 1.1 mrg { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 555 1.1 mrg { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 556 1.1 mrg 557 1.1 mrg #ifdef SPEED_EXTRA_ROUTINES 558 1.1 mrg SPEED_EXTRA_ROUTINES 559 1.1 mrg #endif 560 1.1 mrg #ifdef SPEED_EXTRA_ROUTINES2 561 1.1 mrg SPEED_EXTRA_ROUTINES2 562 1.1 mrg #endif 563 1.1 mrg }; 564 1.1 mrg 565 1.1 mrg 566 1.1 mrg struct choice_t { 567 1.1 mrg const struct routine_t *p; 568 1.1 mrg mp_limb_t r; 569 1.1 mrg double scale; 570 1.1 mrg double time; 571 1.1 mrg int no_time; 572 1.1 mrg double prev_time; 573 1.1 mrg const char *name; 574 1.1 mrg }; 575 1.1 mrg struct choice_t *choice; 576 1.1 mrg int num_choices = 0; 577 1.1 mrg 578 1.1 mrg 579 1.1 mrg void 580 1.1 mrg data_fill (mp_ptr ptr, mp_size_t size) 581 1.1 mrg { 582 1.1 mrg switch (option_data) { 583 1.1 mrg case DATA_RANDOM: 584 1.1 mrg mpn_random (ptr, size); 585 1.1 mrg break; 586 1.1 mrg case DATA_RANDOM2: 587 1.1 mrg mpn_random2 (ptr, size); 588 1.1 mrg break; 589 1.1 mrg case DATA_ZEROS: 590 1.1 mrg MPN_ZERO (ptr, size); 591 1.1 mrg break; 592 1.1 mrg case DATA_AAS: 593 1.1 mrg MPN_FILL (ptr, size, GMP_NUMB_0xAA); 594 1.1 mrg break; 595 1.1 mrg case DATA_FFS: 596 1.1 mrg MPN_FILL (ptr, size, GMP_NUMB_MAX); 597 1.1 mrg break; 598 1.1 mrg case DATA_2FD: 599 1.1 mrg MPN_FILL (ptr, size, GMP_NUMB_MAX); 600 1.1 mrg ptr[0] -= 2; 601 1.1 mrg break; 602 1.1 mrg default: 603 1.1 mrg abort(); 604 1.1 mrg /*NOTREACHED*/ 605 1.1 mrg } 606 1.1 mrg } 607 1.1 mrg 608 1.1 mrg /* The code here handling the various combinations of output options isn't 609 1.1 mrg too attractive, but it works and is fairly clean. */ 610 1.1 mrg 611 1.1 mrg #define SIZE_TO_DIVISOR(n) \ 612 1.1 mrg (option_square == 1 ? (n)*(n) \ 613 1.1 mrg : option_square == 2 ? (n)*((n)+1)/2 \ 614 1.1 mrg : (n)) 615 1.1 mrg 616 1.1 mrg void 617 1.1 mrg run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 618 1.1 mrg { 619 1.1 mrg const char *first_open_fastest, *first_open_notfastest, *first_close; 620 1.1 mrg int i, fastest, want_data; 621 1.1 mrg double fastest_time; 622 1.1 mrg TMP_DECL; 623 1.1 mrg 624 1.1 mrg TMP_MARK; 625 1.1 mrg 626 1.1 mrg /* allocate data, unless all routines are NODATA */ 627 1.1 mrg want_data = 0; 628 1.1 mrg for (i = 0; i < num_choices; i++) 629 1.1 mrg want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 630 1.1 mrg 631 1.1 mrg if (want_data) 632 1.1 mrg { 633 1.1 mrg SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 634 1.1 mrg SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 635 1.1 mrg 636 1.1 mrg data_fill (s->xp, s->size); 637 1.1 mrg data_fill (s->yp, s->size); 638 1.1 mrg } 639 1.1 mrg else 640 1.1 mrg { 641 1.1 mrg sp.xp = NULL; 642 1.1 mrg sp.yp = NULL; 643 1.1 mrg } 644 1.1 mrg 645 1.1 mrg if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 646 1.1 mrg { 647 1.1 mrg first_open_fastest = "(#"; 648 1.1 mrg first_open_notfastest = " ("; 649 1.1 mrg first_close = ")"; 650 1.1 mrg } 651 1.1 mrg else 652 1.1 mrg { 653 1.1 mrg first_open_fastest = "#"; 654 1.1 mrg first_open_notfastest = " "; 655 1.1 mrg first_close = ""; 656 1.1 mrg } 657 1.1 mrg 658 1.1 mrg fastest = -1; 659 1.1 mrg fastest_time = -1.0; 660 1.1 mrg for (i = 0; i < num_choices; i++) 661 1.1 mrg { 662 1.1 mrg s->r = choice[i].r; 663 1.1 mrg choice[i].time = speed_measure (choice[i].p->fun, s); 664 1.1 mrg choice[i].no_time = (choice[i].time == -1.0); 665 1.1 mrg if (! choice[i].no_time) 666 1.1 mrg choice[i].time *= choice[i].scale; 667 1.1 mrg 668 1.1 mrg /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 669 1.1 mrg is before any differences. */ 670 1.1 mrg { 671 1.1 mrg double t; 672 1.1 mrg t = choice[i].time; 673 1.1 mrg if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 674 1.1 mrg { 675 1.1 mrg if (choice[i].prev_time == -1.0) 676 1.1 mrg choice[i].no_time = 1; 677 1.1 mrg else 678 1.1 mrg choice[i].time = choice[i].time - choice[i].prev_time; 679 1.1 mrg } 680 1.1 mrg choice[i].prev_time = t; 681 1.1 mrg } 682 1.1 mrg 683 1.1 mrg if (choice[i].no_time) 684 1.1 mrg continue; 685 1.1 mrg 686 1.1 mrg /* Look for the fastest after CMP_DIFFPREV has been applied, but 687 1.1 mrg before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 688 1.1 mrg if there's more than one routine. */ 689 1.1 mrg if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 690 1.1 mrg { 691 1.1 mrg fastest = i; 692 1.1 mrg fastest_time = choice[i].time; 693 1.1 mrg } 694 1.1 mrg 695 1.1 mrg if (option_cmp == CMP_DIFFPREV) 696 1.1 mrg { 697 1.1 mrg /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 698 1.1 mrg if (option_unit == UNIT_CYCLES) 699 1.1 mrg choice[i].time /= speed_cycletime; 700 1.1 mrg else if (option_unit == UNIT_CYCLESPERLIMB) 701 1.1 mrg { 702 1.1 mrg if (prev_size == -1) 703 1.1 mrg choice[i].time /= speed_cycletime; 704 1.1 mrg else 705 1.1 mrg choice[i].time /= (speed_cycletime 706 1.1 mrg * (SIZE_TO_DIVISOR(s->size) 707 1.1 mrg - SIZE_TO_DIVISOR(prev_size))); 708 1.1 mrg } 709 1.1 mrg } 710 1.1 mrg else 711 1.1 mrg { 712 1.1 mrg if (option_unit == UNIT_CYCLES) 713 1.1 mrg choice[i].time /= speed_cycletime; 714 1.1 mrg else if (option_unit == UNIT_CYCLESPERLIMB) 715 1.1 mrg choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 716 1.1 mrg 717 1.1 mrg if (option_cmp == CMP_RATIO && i > 0) 718 1.1 mrg { 719 1.1 mrg /* A ratio isn't affected by the units chosen. */ 720 1.1 mrg if (choice[0].no_time || choice[0].time == 0.0) 721 1.1 mrg choice[i].no_time = 1; 722 1.1 mrg else 723 1.1 mrg choice[i].time /= choice[0].time; 724 1.1 mrg } 725 1.1 mrg else if (option_cmp == CMP_DIFFERENCE && i > 0) 726 1.1 mrg { 727 1.1 mrg if (choice[0].no_time) 728 1.1 mrg { 729 1.1 mrg choice[i].no_time = 1; 730 1.1 mrg continue; 731 1.1 mrg } 732 1.1 mrg choice[i].time -= choice[0].time; 733 1.1 mrg } 734 1.1 mrg } 735 1.1 mrg } 736 1.1 mrg 737 1.1 mrg if (option_gnuplot) 738 1.1 mrg { 739 1.1 mrg /* In CMP_DIFFPREV, don't print anything for the first size, start 740 1.1 mrg with the second where an actual difference is available. 741 1.1 mrg 742 1.1 mrg In CMP_RATIO, print the first column as 1.0. 743 1.1 mrg 744 1.1 mrg The 9 decimals printed is much more than the expected precision of 745 1.1 mrg the measurements actually. */ 746 1.1 mrg 747 1.1 mrg if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 748 1.1 mrg { 749 1.1 mrg fprintf (fp, "%-6ld ", s->size); 750 1.1 mrg for (i = 0; i < num_choices; i++) 751 1.1 mrg fprintf (fp, " %.9e", 752 1.1 mrg choice[i].no_time ? 0.0 753 1.1 mrg : (option_cmp == CMP_RATIO && i == 0) ? 1.0 754 1.1 mrg : choice[i].time); 755 1.1 mrg fprintf (fp, "\n"); 756 1.1 mrg } 757 1.1 mrg } 758 1.1 mrg else 759 1.1 mrg { 760 1.1 mrg fprintf (fp, "%-6ld ", s->size); 761 1.1 mrg for (i = 0; i < num_choices; i++) 762 1.1 mrg { 763 1.1 mrg char buf[128]; 764 1.1 mrg int decimals; 765 1.1 mrg 766 1.1 mrg if (choice[i].no_time) 767 1.1 mrg { 768 1.1 mrg fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 769 1.1 mrg } 770 1.1 mrg else 771 1.1 mrg {if (option_unit == UNIT_CYCLESPERLIMB 772 1.1 mrg || (option_cmp == CMP_RATIO && i > 0)) 773 1.1 mrg decimals = 4; 774 1.1 mrg else if (option_unit == UNIT_CYCLES) 775 1.1 mrg decimals = 2; 776 1.1 mrg else 777 1.1 mrg decimals = 9; 778 1.1 mrg 779 1.1 mrg sprintf (buf, "%s%.*f%s", 780 1.1 mrg i == fastest ? first_open_fastest : first_open_notfastest, 781 1.1 mrg decimals, choice[i].time, first_close); 782 1.1 mrg fprintf (fp, " %*s", COLUMN_WIDTH, buf); 783 1.1 mrg } 784 1.1 mrg } 785 1.1 mrg fprintf (fp, "\n"); 786 1.1 mrg } 787 1.1 mrg 788 1.1 mrg TMP_FREE; 789 1.1 mrg } 790 1.1 mrg 791 1.1 mrg void 792 1.1 mrg run_all (FILE *fp) 793 1.1 mrg { 794 1.1 mrg mp_size_t prev_size; 795 1.1 mrg int i; 796 1.1 mrg TMP_DECL; 797 1.1 mrg 798 1.1 mrg TMP_MARK; 799 1.1 mrg SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 800 1.1 mrg SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 801 1.1 mrg 802 1.1 mrg data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 803 1.1 mrg data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 804 1.1 mrg 805 1.1 mrg for (i = 0; i < size_num; i++) 806 1.1 mrg { 807 1.1 mrg sp.size = size_array[i].start; 808 1.1 mrg prev_size = -1; 809 1.1 mrg for (;;) 810 1.1 mrg { 811 1.1 mrg mp_size_t step; 812 1.1 mrg 813 1.1 mrg if (option_data == DATA_2FD && sp.size >= 2) 814 1.1 mrg sp.xp[sp.size-1] = 2; 815 1.1 mrg 816 1.1 mrg run_one (fp, &sp, prev_size); 817 1.1 mrg prev_size = sp.size; 818 1.1 mrg 819 1.1 mrg if (option_data == DATA_2FD && sp.size >= 2) 820 1.1 mrg sp.xp[sp.size-1] = MP_LIMB_T_MAX; 821 1.1 mrg 822 1.1 mrg if (option_factor != 0.0) 823 1.1 mrg { 824 1.1 mrg step = (mp_size_t) (sp.size * option_factor - sp.size); 825 1.1 mrg if (step < 1) 826 1.1 mrg step = 1; 827 1.1 mrg } 828 1.1 mrg else 829 1.1 mrg step = 1; 830 1.1 mrg if (step < option_step) 831 1.1 mrg step = option_step; 832 1.1 mrg 833 1.1 mrg sp.size += step; 834 1.1 mrg if (sp.size > size_array[i].end) 835 1.1 mrg break; 836 1.1 mrg } 837 1.1 mrg } 838 1.1 mrg 839 1.1 mrg TMP_FREE; 840 1.1 mrg } 841 1.1 mrg 842 1.1 mrg 843 1.1 mrg FILE * 844 1.1 mrg fopen_for_write (const char *filename) 845 1.1 mrg { 846 1.1 mrg FILE *fp; 847 1.1 mrg if ((fp = fopen (filename, "w")) == NULL) 848 1.1 mrg { 849 1.1 mrg fprintf (stderr, "Cannot create %s\n", filename); 850 1.1 mrg exit(1); 851 1.1 mrg } 852 1.1 mrg return fp; 853 1.1 mrg } 854 1.1 mrg 855 1.1 mrg void 856 1.1 mrg fclose_written (FILE *fp, const char *filename) 857 1.1 mrg { 858 1.1 mrg int err; 859 1.1 mrg 860 1.1 mrg err = ferror (fp); 861 1.1 mrg err |= fclose (fp); 862 1.1 mrg 863 1.1 mrg if (err) 864 1.1 mrg { 865 1.1 mrg fprintf (stderr, "Error writing %s\n", filename); 866 1.1 mrg exit(1); 867 1.1 mrg } 868 1.1 mrg } 869 1.1 mrg 870 1.1 mrg 871 1.1 mrg void 872 1.1 mrg run_gnuplot (int argc, char *argv[]) 873 1.1 mrg { 874 1.1 mrg char *plot_filename; 875 1.1 mrg char *data_filename; 876 1.1 mrg FILE *fp; 877 1.1 mrg int i; 878 1.1 mrg 879 1.1 mrg plot_filename = (char *) (*__gmp_allocate_func) 880 1.1 mrg (strlen (option_gnuplot_basename) + 20); 881 1.1 mrg data_filename = (char *) (*__gmp_allocate_func) 882 1.1 mrg (strlen (option_gnuplot_basename) + 20); 883 1.1 mrg 884 1.1 mrg sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 885 1.1 mrg sprintf (data_filename, "%s.data", option_gnuplot_basename); 886 1.1 mrg 887 1.1 mrg fp = fopen_for_write (plot_filename); 888 1.1 mrg 889 1.1 mrg fprintf (fp, "# Generated with:\n"); 890 1.1 mrg fprintf (fp, "#"); 891 1.1 mrg for (i = 0; i < argc; i++) 892 1.1 mrg fprintf (fp, " %s", argv[i]); 893 1.1 mrg fprintf (fp, "\n"); 894 1.1 mrg fprintf (fp, "\n"); 895 1.1 mrg 896 1.1 mrg fprintf (fp, "reset\n"); 897 1.1 mrg 898 1.1 mrg /* Putting the key at the top left is usually good, and you can change it 899 1.1 mrg interactively if it's not. */ 900 1.1 mrg fprintf (fp, "set key left\n"); 901 1.1 mrg 902 1.1.1.4 mrg /* write underscores, not subscripts */ 903 1.1.1.4 mrg fprintf (fp, "set termoption noenhanced\n"); 904 1.1.1.4 mrg 905 1.1 mrg /* designed to make it possible to see crossovers easily */ 906 1.1.1.2 mrg fprintf (fp, "set style data lines\n"); 907 1.1 mrg 908 1.1 mrg fprintf (fp, "plot "); 909 1.1 mrg for (i = 0; i < num_choices; i++) 910 1.1 mrg { 911 1.1 mrg fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 912 1.1 mrg fprintf (fp, " title \"%s\"", choice[i].name); 913 1.1 mrg 914 1.1 mrg if (i != num_choices-1) 915 1.1 mrg fprintf (fp, ", \\"); 916 1.1 mrg fprintf (fp, "\n"); 917 1.1 mrg } 918 1.1 mrg 919 1.1 mrg fprintf (fp, "load \"-\"\n"); 920 1.1 mrg fclose_written (fp, plot_filename); 921 1.1 mrg 922 1.1 mrg fp = fopen_for_write (data_filename); 923 1.1 mrg 924 1.1 mrg /* Unbuffered so you can see where the program was up to if it crashes or 925 1.1 mrg you kill it. */ 926 1.1 mrg setbuf (fp, NULL); 927 1.1 mrg 928 1.1 mrg run_all (fp); 929 1.1 mrg fclose_written (fp, data_filename); 930 1.1 mrg } 931 1.1 mrg 932 1.1 mrg 933 1.1 mrg /* Return a limb with n many one bits (starting from the least significant) */ 934 1.1 mrg 935 1.1 mrg #define LIMB_ONES(n) \ 936 1.1 mrg ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 937 1.1 mrg : (n) == 0 ? CNST_LIMB(0) \ 938 1.1 mrg : (CNST_LIMB(1) << (n)) - 1) 939 1.1 mrg 940 1.1 mrg mp_limb_t 941 1.1 mrg r_string (const char *s) 942 1.1 mrg { 943 1.1 mrg const char *s_orig = s; 944 1.1 mrg long n; 945 1.1 mrg 946 1.1 mrg if (strcmp (s, "aas") == 0) 947 1.1 mrg return GMP_NUMB_0xAA; 948 1.1 mrg 949 1.1 mrg { 950 1.1 mrg mpz_t z; 951 1.1 mrg mp_limb_t l; 952 1.1 mrg int set, siz; 953 1.1 mrg 954 1.1 mrg mpz_init (z); 955 1.1 mrg set = mpz_set_str (z, s, 0); 956 1.1 mrg siz = SIZ(z); 957 1.1 mrg l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 958 1.1 mrg mpz_clear (z); 959 1.1 mrg if (set == 0) 960 1.1 mrg { 961 1.1 mrg if (siz > 1 || siz < -1) 962 1.1 mrg printf ("Warning, r parameter %s truncated to %d bits\n", 963 1.1 mrg s_orig, GMP_LIMB_BITS); 964 1.1 mrg return l; 965 1.1 mrg } 966 1.1 mrg } 967 1.1 mrg 968 1.1 mrg if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 969 1.1 mrg n = strtoul (s+2, (char **) &s, 16); 970 1.1 mrg else 971 1.1 mrg n = strtol (s, (char **) &s, 10); 972 1.1 mrg 973 1.1 mrg if (strcmp (s, "bits") == 0) 974 1.1 mrg { 975 1.1 mrg mp_limb_t l; 976 1.1 mrg if (n > GMP_LIMB_BITS) 977 1.1 mrg { 978 1.1 mrg fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 979 1.1 mrg n, GMP_LIMB_BITS); 980 1.1 mrg exit (1); 981 1.1 mrg } 982 1.1 mrg mpn_random (&l, 1); 983 1.1 mrg return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 984 1.1 mrg } 985 1.1 mrg else if (strcmp (s, "ones") == 0) 986 1.1 mrg { 987 1.1 mrg if (n > GMP_LIMB_BITS) 988 1.1 mrg { 989 1.1 mrg fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 990 1.1 mrg n, GMP_LIMB_BITS); 991 1.1 mrg exit (1); 992 1.1 mrg } 993 1.1 mrg return LIMB_ONES (n); 994 1.1 mrg } 995 1.1 mrg else if (*s != '\0') 996 1.1 mrg { 997 1.1 mrg fprintf (stderr, "invalid r parameter: %s\n", s_orig); 998 1.1 mrg exit (1); 999 1.1 mrg } 1000 1.1 mrg 1001 1.1 mrg return n; 1002 1.1 mrg } 1003 1.1 mrg 1004 1.1 mrg 1005 1.1 mrg void 1006 1.1 mrg routine_find (struct choice_t *c, const char *s_orig) 1007 1.1 mrg { 1008 1.1 mrg const char *s; 1009 1.1 mrg int i; 1010 1.1 mrg size_t nlen; 1011 1.1 mrg 1012 1.1 mrg c->name = s_orig; 1013 1.1 mrg s = strchr (s_orig, '*'); 1014 1.1 mrg if (s != NULL) 1015 1.1 mrg { 1016 1.1 mrg c->scale = atof(s_orig); 1017 1.1 mrg s++; 1018 1.1 mrg } 1019 1.1 mrg else 1020 1.1 mrg { 1021 1.1 mrg c->scale = 1.0; 1022 1.1 mrg s = s_orig; 1023 1.1 mrg } 1024 1.1 mrg 1025 1.1 mrg for (i = 0; i < numberof (routine); i++) 1026 1.1 mrg { 1027 1.1 mrg nlen = strlen (routine[i].name); 1028 1.1 mrg if (memcmp (s, routine[i].name, nlen) != 0) 1029 1.1 mrg continue; 1030 1.1 mrg 1031 1.1 mrg if (s[nlen] == '.') 1032 1.1 mrg { 1033 1.1 mrg /* match, with a .r parameter */ 1034 1.1 mrg 1035 1.1 mrg if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 1036 1.1 mrg { 1037 1.1 mrg fprintf (stderr, 1038 1.1 mrg "Choice %s bad: doesn't take a \".<r>\" parameter\n", 1039 1.1 mrg s_orig); 1040 1.1 mrg exit (1); 1041 1.1 mrg } 1042 1.1 mrg 1043 1.1 mrg c->p = &routine[i]; 1044 1.1 mrg c->r = r_string (s + nlen + 1); 1045 1.1 mrg return; 1046 1.1 mrg } 1047 1.1 mrg 1048 1.1 mrg if (s[nlen] == '\0') 1049 1.1 mrg { 1050 1.1 mrg /* match, with no parameter */ 1051 1.1 mrg 1052 1.1 mrg if (routine[i].flag & FLAG_R) 1053 1.1 mrg { 1054 1.1 mrg fprintf (stderr, 1055 1.1 mrg "Choice %s bad: needs a \".<r>\" parameter\n", 1056 1.1 mrg s_orig); 1057 1.1 mrg exit (1); 1058 1.1 mrg } 1059 1.1 mrg 1060 1.1 mrg c->p = &routine[i]; 1061 1.1 mrg c->r = 0; 1062 1.1 mrg return; 1063 1.1 mrg } 1064 1.1 mrg } 1065 1.1 mrg 1066 1.1 mrg fprintf (stderr, "Choice %s unrecognised\n", s_orig); 1067 1.1 mrg exit (1); 1068 1.1 mrg } 1069 1.1 mrg 1070 1.1 mrg 1071 1.1 mrg void 1072 1.1 mrg usage (void) 1073 1.1 mrg { 1074 1.1 mrg int i; 1075 1.1 mrg 1076 1.1 mrg speed_time_init (); 1077 1.1 mrg 1078 1.1 mrg printf ("Usage: speed [-options] -s size <routine>...\n"); 1079 1.1 mrg printf ("Measure the speed of some routines.\n"); 1080 1.1 mrg printf ("Times are in seconds, accuracy is shown.\n"); 1081 1.1 mrg printf ("\n"); 1082 1.1 mrg printf (" -p num set precision as number of time units each routine must run\n"); 1083 1.1 mrg printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 1084 1.1 mrg printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 1085 1.1 mrg printf (" -t step step through sizes by given amount\n"); 1086 1.1 mrg printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 1087 1.1 mrg printf (" -r show times as ratios of the first routine\n"); 1088 1.1 mrg printf (" -d show times as difference from the first routine\n"); 1089 1.1 mrg printf (" -D show times as difference from previous size shown\n"); 1090 1.1 mrg printf (" -c show times in CPU cycles\n"); 1091 1.1 mrg printf (" -C show times in cycles per limb\n"); 1092 1.1 mrg printf (" -u print resource usage (memory) at end\n"); 1093 1.1 mrg printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 1094 1.1 mrg printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 1095 1.1 mrg printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 1096 1.1 mrg printf (" -o addrs print addresses of data blocks\n"); 1097 1.1 mrg printf ("\n"); 1098 1.1 mrg printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 1099 1.1 mrg printf ("is greater.\n"); 1100 1.1 mrg printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 1101 1.1 mrg printf ("size and the previous size.\n"); 1102 1.1 mrg printf ("\n"); 1103 1.1 mrg printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 1104 1.1 mrg printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 1105 1.1 mrg printf ("a log/log plot).\n"); 1106 1.1 mrg printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1107 1.1 mrg printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1108 1.1 mrg printf ("\n"); 1109 1.1 mrg printf ("The available routines are as follows.\n"); 1110 1.1 mrg printf ("\n"); 1111 1.1 mrg 1112 1.1 mrg for (i = 0; i < numberof (routine); i++) 1113 1.1 mrg { 1114 1.1 mrg if (routine[i].flag & FLAG_R) 1115 1.1 mrg printf ("\t%s.r\n", routine[i].name); 1116 1.1 mrg else if (routine[i].flag & FLAG_R_OPTIONAL) 1117 1.1 mrg printf ("\t%s (optional .r)\n", routine[i].name); 1118 1.1 mrg else 1119 1.1 mrg printf ("\t%s\n", routine[i].name); 1120 1.1 mrg } 1121 1.1 mrg printf ("\n"); 1122 1.1 mrg printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1123 1.1 mrg printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1124 1.1 mrg printf ("\n"); 1125 1.1 mrg printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1126 1.1 mrg printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1127 1.1 mrg printf ("\n"); 1128 1.1 mrg printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1129 1.1 mrg printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1130 1.1 mrg printf ("\n"); 1131 1.1 mrg printf ("%s", speed_time_string); 1132 1.1 mrg printf ("\n"); 1133 1.1 mrg printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1134 1.1 mrg printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1135 1.1 mrg } 1136 1.1 mrg 1137 1.1 mrg void 1138 1.1 mrg check_align_option (const char *name, mp_size_t align) 1139 1.1 mrg { 1140 1.1 mrg if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1141 1.1 mrg { 1142 1.1 mrg fprintf (stderr, "Alignment request out of range: %s %ld\n", 1143 1.1 mrg name, (long) align); 1144 1.1 mrg fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1145 1.1 mrg SPEED_TMP_ALLOC_ADJUST_MASK); 1146 1.1 mrg exit (1); 1147 1.1 mrg } 1148 1.1 mrg } 1149 1.1 mrg 1150 1.1 mrg int 1151 1.1 mrg main (int argc, char *argv[]) 1152 1.1 mrg { 1153 1.1 mrg int i; 1154 1.1 mrg int opt; 1155 1.1 mrg 1156 1.1 mrg /* Unbuffered so output goes straight out when directed to a pipe or file 1157 1.1 mrg and isn't lost on killing the program half way. */ 1158 1.1 mrg setbuf (stdout, NULL); 1159 1.1 mrg 1160 1.1 mrg for (;;) 1161 1.1 mrg { 1162 1.1 mrg opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1163 1.1 mrg if (opt == EOF) 1164 1.1 mrg break; 1165 1.1 mrg 1166 1.1 mrg switch (opt) { 1167 1.1 mrg case 'a': 1168 1.1 mrg if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1169 1.1 mrg else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1170 1.1 mrg else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1171 1.1 mrg else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1172 1.1 mrg else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1173 1.1 mrg else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1174 1.1 mrg else 1175 1.1 mrg { 1176 1.1 mrg fprintf (stderr, "unrecognised data option: %s\n", optarg); 1177 1.1 mrg exit (1); 1178 1.1 mrg } 1179 1.1 mrg break; 1180 1.1 mrg case 'C': 1181 1.1 mrg if (option_unit != UNIT_SECONDS) goto bad_unit; 1182 1.1 mrg option_unit = UNIT_CYCLESPERLIMB; 1183 1.1 mrg break; 1184 1.1 mrg case 'c': 1185 1.1 mrg if (option_unit != UNIT_SECONDS) 1186 1.1 mrg { 1187 1.1 mrg bad_unit: 1188 1.1 mrg fprintf (stderr, "cannot use more than one of -c, -C\n"); 1189 1.1 mrg exit (1); 1190 1.1 mrg } 1191 1.1 mrg option_unit = UNIT_CYCLES; 1192 1.1 mrg break; 1193 1.1 mrg case 'D': 1194 1.1 mrg if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1195 1.1 mrg option_cmp = CMP_DIFFPREV; 1196 1.1 mrg break; 1197 1.1 mrg case 'd': 1198 1.1 mrg if (option_cmp != CMP_ABSOLUTE) 1199 1.1 mrg { 1200 1.1 mrg bad_cmp: 1201 1.1 mrg fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1202 1.1 mrg exit (1); 1203 1.1 mrg } 1204 1.1 mrg option_cmp = CMP_DIFFERENCE; 1205 1.1 mrg break; 1206 1.1 mrg case 'E': 1207 1.1 mrg option_square = 1; 1208 1.1 mrg break; 1209 1.1 mrg case 'F': 1210 1.1 mrg option_square = 2; 1211 1.1 mrg break; 1212 1.1 mrg case 'f': 1213 1.1 mrg option_factor = atof (optarg); 1214 1.1 mrg if (option_factor <= 1.0) 1215 1.1 mrg { 1216 1.1 mrg fprintf (stderr, "-f factor must be > 1.0\n"); 1217 1.1 mrg exit (1); 1218 1.1 mrg } 1219 1.1 mrg break; 1220 1.1 mrg case 'o': 1221 1.1 mrg speed_option_set (optarg); 1222 1.1 mrg break; 1223 1.1 mrg case 'P': 1224 1.1 mrg option_gnuplot = 1; 1225 1.1 mrg option_gnuplot_basename = optarg; 1226 1.1 mrg break; 1227 1.1 mrg case 'p': 1228 1.1 mrg speed_precision = atoi (optarg); 1229 1.1 mrg break; 1230 1.1 mrg case 'R': 1231 1.1 mrg option_seed = time (NULL); 1232 1.1 mrg break; 1233 1.1 mrg case 'r': 1234 1.1 mrg if (option_cmp != CMP_ABSOLUTE) 1235 1.1 mrg goto bad_cmp; 1236 1.1 mrg option_cmp = CMP_RATIO; 1237 1.1 mrg break; 1238 1.1 mrg case 's': 1239 1.1 mrg { 1240 1.1 mrg char *s; 1241 1.1 mrg for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1242 1.1 mrg { 1243 1.1 mrg if (size_num == size_allocnum) 1244 1.1 mrg { 1245 1.1 mrg size_array = (struct size_array_t *) 1246 1.1 mrg __gmp_allocate_or_reallocate 1247 1.1 mrg (size_array, 1248 1.1 mrg size_allocnum * sizeof(size_array[0]), 1249 1.1 mrg (size_allocnum+10) * sizeof(size_array[0])); 1250 1.1 mrg size_allocnum += 10; 1251 1.1 mrg } 1252 1.1 mrg if (sscanf (s, "%ld-%ld", 1253 1.1 mrg &size_array[size_num].start, 1254 1.1 mrg &size_array[size_num].end) != 2) 1255 1.1 mrg { 1256 1.1 mrg size_array[size_num].start = size_array[size_num].end 1257 1.1 mrg = atol (s); 1258 1.1 mrg } 1259 1.1 mrg 1260 1.1 mrg if (size_array[size_num].start < 0 1261 1.1 mrg || size_array[size_num].end < 0 1262 1.1 mrg || size_array[size_num].start > size_array[size_num].end) 1263 1.1 mrg { 1264 1.1 mrg fprintf (stderr, "invalid size parameter: %s\n", s); 1265 1.1 mrg exit (1); 1266 1.1 mrg } 1267 1.1 mrg 1268 1.1 mrg size_num++; 1269 1.1 mrg } 1270 1.1 mrg } 1271 1.1 mrg break; 1272 1.1 mrg case 't': 1273 1.1 mrg option_step = atol (optarg); 1274 1.1 mrg if (option_step < 1) 1275 1.1 mrg { 1276 1.1 mrg fprintf (stderr, "-t step must be >= 1\n"); 1277 1.1 mrg exit (1); 1278 1.1 mrg } 1279 1.1 mrg break; 1280 1.1 mrg case 'u': 1281 1.1 mrg option_resource_usage = 1; 1282 1.1 mrg break; 1283 1.1 mrg case 'z': 1284 1.1 mrg sp.cache = 1; 1285 1.1 mrg break; 1286 1.1 mrg case 'x': 1287 1.1 mrg sp.align_xp = atol (optarg); 1288 1.1 mrg check_align_option ("-x", sp.align_xp); 1289 1.1 mrg break; 1290 1.1 mrg case 'y': 1291 1.1 mrg sp.align_yp = atol (optarg); 1292 1.1 mrg check_align_option ("-y", sp.align_yp); 1293 1.1 mrg break; 1294 1.1 mrg case 'w': 1295 1.1 mrg sp.align_wp = atol (optarg); 1296 1.1 mrg check_align_option ("-w", sp.align_wp); 1297 1.1 mrg break; 1298 1.1 mrg case 'W': 1299 1.1 mrg sp.align_wp2 = atol (optarg); 1300 1.1 mrg check_align_option ("-W", sp.align_wp2); 1301 1.1 mrg break; 1302 1.1 mrg case '?': 1303 1.1 mrg exit(1); 1304 1.1 mrg } 1305 1.1 mrg } 1306 1.1 mrg 1307 1.1 mrg if (optind >= argc) 1308 1.1 mrg { 1309 1.1 mrg usage (); 1310 1.1 mrg exit (1); 1311 1.1 mrg } 1312 1.1 mrg 1313 1.1 mrg if (size_num == 0) 1314 1.1 mrg { 1315 1.1 mrg fprintf (stderr, "-s <size> must be specified\n"); 1316 1.1 mrg exit (1); 1317 1.1 mrg } 1318 1.1 mrg 1319 1.1 mrg gmp_randinit_default (__gmp_rands); 1320 1.1 mrg __gmp_rands_initialized = 1; 1321 1.1 mrg gmp_randseed_ui (__gmp_rands, option_seed); 1322 1.1 mrg 1323 1.1 mrg choice = (struct choice_t *) (*__gmp_allocate_func) 1324 1.1 mrg ((argc - optind) * sizeof(choice[0])); 1325 1.1 mrg for ( ; optind < argc; optind++) 1326 1.1 mrg { 1327 1.1 mrg struct choice_t c; 1328 1.1 mrg routine_find (&c, argv[optind]); 1329 1.1 mrg choice[num_choices] = c; 1330 1.1 mrg num_choices++; 1331 1.1 mrg } 1332 1.1 mrg 1333 1.1 mrg if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1334 1.1 mrg num_choices < 2) 1335 1.1 mrg { 1336 1.1 mrg fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1337 1.1 mrg } 1338 1.1 mrg 1339 1.1 mrg speed_time_init (); 1340 1.1 mrg if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1341 1.1 mrg speed_cycletime_need_cycles (); 1342 1.1 mrg else 1343 1.1 mrg speed_cycletime_need_seconds (); 1344 1.1 mrg 1345 1.1 mrg if (option_gnuplot) 1346 1.1 mrg { 1347 1.1 mrg run_gnuplot (argc, argv); 1348 1.1 mrg } 1349 1.1 mrg else 1350 1.1 mrg { 1351 1.1 mrg if (option_unit == UNIT_SECONDS) 1352 1.1 mrg printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1353 1.1 mrg else 1354 1.1 mrg printf ("overhead %.2f cycles", 1355 1.1 mrg speed_measure (speed_noop, NULL) / speed_cycletime); 1356 1.1 mrg printf (", precision %d units of %.2e secs", 1357 1.1 mrg speed_precision, speed_unittime); 1358 1.1 mrg 1359 1.1 mrg if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1360 1.1 mrg printf (", CPU freq unknown\n"); 1361 1.1 mrg else 1362 1.1 mrg printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1363 1.1 mrg 1364 1.1 mrg printf (" "); 1365 1.1 mrg for (i = 0; i < num_choices; i++) 1366 1.1 mrg printf (" %*s", COLUMN_WIDTH, choice[i].name); 1367 1.1 mrg printf ("\n"); 1368 1.1 mrg 1369 1.1 mrg run_all (stdout); 1370 1.1 mrg } 1371 1.1 mrg 1372 1.1 mrg if (option_resource_usage) 1373 1.1 mrg { 1374 1.1 mrg #if HAVE_GETRUSAGE 1375 1.1 mrg { 1376 1.1 mrg /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1377 1.1 mrg struct rusage r; 1378 1.1 mrg if (getrusage (RUSAGE_SELF, &r) != 0) 1379 1.1 mrg perror ("getrusage"); 1380 1.1 mrg else 1381 1.1 mrg printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1382 1.1.1.5 mrg (long) r.ru_utime.tv_sec, (long) r.ru_utime.tv_usec, 1383 1.1 mrg r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1384 1.1 mrg } 1385 1.1 mrg #else 1386 1.1 mrg printf ("getrusage() not available\n"); 1387 1.1 mrg #endif 1388 1.1 mrg 1389 1.1 mrg /* Linux kernel. */ 1390 1.1 mrg { 1391 1.1 mrg char buf[128]; 1392 1.1 mrg sprintf (buf, "/proc/%d/status", getpid()); 1393 1.1 mrg if (access (buf, R_OK) == 0) 1394 1.1 mrg { 1395 1.1 mrg sprintf (buf, "cat /proc/%d/status", getpid()); 1396 1.1 mrg system (buf); 1397 1.1 mrg } 1398 1.1 mrg 1399 1.1 mrg } 1400 1.1 mrg } 1401 1.1 mrg 1402 1.1 mrg return 0; 1403 1.1 mrg } 1404