1 1.1 mrg /* Generate perfect square testing data. 2 1.1 mrg 3 1.1.1.3 mrg Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU MP Library. 6 1.1 mrg 7 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.3 mrg it under the terms of either: 9 1.1.1.3 mrg 10 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 11 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 12 1.1.1.3 mrg option) any later version. 13 1.1.1.3 mrg 14 1.1.1.3 mrg or 15 1.1.1.3 mrg 16 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 17 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.3 mrg later version. 19 1.1.1.3 mrg 20 1.1.1.3 mrg or both in parallel, as here. 21 1.1 mrg 22 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.3 mrg for more details. 26 1.1 mrg 27 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 28 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 30 1.1 mrg 31 1.1 mrg #include <stdio.h> 32 1.1 mrg #include <stdlib.h> 33 1.1 mrg 34 1.1.1.2 mrg #include "bootstrap.c" 35 1.1 mrg 36 1.1 mrg 37 1.1 mrg /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1 38 1.1 mrg (plus a PERFSQR_PP modulus), and generate tables indicating quadratic 39 1.1 mrg residues and non-residues modulo small factors of that modulus. 40 1.1 mrg 41 1.1 mrg For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That 42 1.1 mrg function exists specifically because 2^24-1 and 2^48-1 have nice sets of 43 1.1 mrg prime factors. For other limb sizes it's considered, but if it doesn't 44 1.1 mrg have good factors then mpn_mod_1 will be used instead. 45 1.1 mrg 46 1.1 mrg When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a 47 1.1 mrg selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb, 48 1.1 mrg with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <= 49 1.1 mrg GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its 50 1.1 mrg calculation within a single limb. 51 1.1 mrg 52 1.1 mrg In either case primes can be combined to make divisors. The table data 53 1.1 mrg then effectively indicates remainders which are quadratic residues mod 54 1.1 mrg all the primes. This sort of combining reduces the number of steps 55 1.1 mrg needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time. 56 1.1 mrg Nothing is gained or lost in terms of detections, the same total fraction 57 1.1 mrg of non-residues will be identified. 58 1.1 mrg 59 1.1 mrg Nothing particularly sophisticated is attempted for combining factors to 60 1.1 mrg make divisors. This is probably a kind of knapsack problem so it'd be 61 1.1 mrg too hard to attempt anything completely general. For the usual 32 and 64 62 1.1 mrg bit limbs we get a good enough result just pairing the biggest and 63 1.1 mrg smallest which fit together, repeatedly. 64 1.1 mrg 65 1.1 mrg Another aim is to get powerful combinations, ie. divisors which identify 66 1.1 mrg biggest fraction of non-residues, and have those run first. Again for 67 1.1 mrg the usual 32 and 64 bits it seems good enough just to pair for big 68 1.1 mrg divisors then sort according to the resulting fraction of non-residues 69 1.1 mrg identified. 70 1.1 mrg 71 1.1 mrg Also in this program, a table sq_res_0x100 of residues modulo 256 is 72 1.1 mrg generated. This simply fills bits into limbs of the appropriate 73 1.1 mrg build-time GMP_LIMB_BITS each. 74 1.1 mrg 75 1.1 mrg */ 76 1.1 mrg 77 1.1 mrg 78 1.1 mrg /* Normally we aren't using const in gen*.c programs, so as not to have to 79 1.1 mrg bother figuring out if it works, but using it with f_cmp_divisor and 80 1.1 mrg f_cmp_fraction avoids warnings from the qsort calls. */ 81 1.1 mrg 82 1.1 mrg /* Same tests as gmp.h. */ 83 1.1 mrg #if defined (__STDC__) \ 84 1.1 mrg || defined (__cplusplus) \ 85 1.1 mrg || defined (_AIX) \ 86 1.1 mrg || defined (__DECC) \ 87 1.1 mrg || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ 88 1.1 mrg || defined (_MSC_VER) \ 89 1.1 mrg || defined (_WIN32) 90 1.1 mrg #define HAVE_CONST 1 91 1.1 mrg #endif 92 1.1 mrg 93 1.1 mrg #if ! HAVE_CONST 94 1.1 mrg #define const 95 1.1 mrg #endif 96 1.1 mrg 97 1.1 mrg 98 1.1 mrg mpz_t *sq_res_0x100; /* table of limbs */ 99 1.1 mrg int nsq_res_0x100; /* elements in sq_res_0x100 array */ 100 1.1 mrg int sq_res_0x100_num; /* squares in sq_res_0x100 */ 101 1.1 mrg double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */ 102 1.1 mrg 103 1.1 mrg int mod34_bits; /* 3*GMP_NUMB_BITS/4 */ 104 1.1 mrg int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */ 105 1.1 mrg int max_divisor; /* all divisors <= max_divisor */ 106 1.1 mrg int max_divisor_bits; /* ceil(log2(max_divisor)) */ 107 1.1 mrg double total_fraction; /* of squares */ 108 1.1 mrg mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */ 109 1.1 mrg mpz_t pp_norm; /* pp shifted so NUMB high bit set */ 110 1.1 mrg mpz_t pp_inverted; /* invert_limb style inverse */ 111 1.1 mrg mpz_t mod_mask; /* 2^mod_bits-1 */ 112 1.1 mrg char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */ 113 1.1 mrg 114 1.1 mrg /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */ 115 1.1 mrg struct rawfactor_t { 116 1.1 mrg int divisor; 117 1.1 mrg int multiplicity; 118 1.1 mrg }; 119 1.1 mrg struct rawfactor_t *rawfactor; 120 1.1 mrg int nrawfactor; 121 1.1 mrg 122 1.1 mrg /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */ 123 1.1 mrg struct factor_t { 124 1.1 mrg int divisor; 125 1.1 mrg mpz_t inverse; /* 1/divisor mod 2^mod_bits */ 126 1.1 mrg mpz_t mask; /* indicating squares mod divisor */ 127 1.1 mrg double fraction; /* squares/total */ 128 1.1 mrg }; 129 1.1 mrg struct factor_t *factor; 130 1.1 mrg int nfactor; /* entries in use in factor array */ 131 1.1 mrg int factor_alloc; /* entries allocated to factor array */ 132 1.1 mrg 133 1.1 mrg 134 1.1 mrg int 135 1.1 mrg f_cmp_divisor (const void *parg, const void *qarg) 136 1.1 mrg { 137 1.1 mrg const struct factor_t *p, *q; 138 1.1.1.3 mrg p = (const struct factor_t *) parg; 139 1.1.1.3 mrg q = (const struct factor_t *) qarg; 140 1.1 mrg if (p->divisor > q->divisor) 141 1.1 mrg return 1; 142 1.1 mrg else if (p->divisor < q->divisor) 143 1.1 mrg return -1; 144 1.1 mrg else 145 1.1 mrg return 0; 146 1.1 mrg } 147 1.1 mrg 148 1.1 mrg int 149 1.1 mrg f_cmp_fraction (const void *parg, const void *qarg) 150 1.1 mrg { 151 1.1 mrg const struct factor_t *p, *q; 152 1.1.1.3 mrg p = (const struct factor_t *) parg; 153 1.1.1.3 mrg q = (const struct factor_t *) qarg; 154 1.1 mrg if (p->fraction > q->fraction) 155 1.1 mrg return 1; 156 1.1 mrg else if (p->fraction < q->fraction) 157 1.1 mrg return -1; 158 1.1 mrg else 159 1.1 mrg return 0; 160 1.1 mrg } 161 1.1 mrg 162 1.1 mrg /* Remove array[idx] by copying the remainder down, and adjust narray 163 1.1 mrg accordingly. */ 164 1.1 mrg #define COLLAPSE_ELEMENT(array, idx, narray) \ 165 1.1 mrg do { \ 166 1.1.1.2 mrg memmove (&(array)[idx], \ 167 1.1.1.2 mrg &(array)[idx+1], \ 168 1.1.1.2 mrg ((narray)-((idx)+1)) * sizeof (array[0])); \ 169 1.1 mrg (narray)--; \ 170 1.1 mrg } while (0) 171 1.1 mrg 172 1.1 mrg 173 1.1 mrg /* return n*2^p mod m */ 174 1.1 mrg int 175 1.1 mrg mul_2exp_mod (int n, int p, int m) 176 1.1 mrg { 177 1.1.1.3 mrg while (--p >= 0) 178 1.1 mrg n = (2 * n) % m; 179 1.1 mrg return n; 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg /* return -n mod m */ 183 1.1 mrg int 184 1.1 mrg neg_mod (int n, int m) 185 1.1 mrg { 186 1.1.1.2 mrg assert (n >= 0 && n < m); 187 1.1 mrg return (n == 0 ? 0 : m-n); 188 1.1 mrg } 189 1.1 mrg 190 1.1 mrg /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if 191 1.1 mrg "-(idx<<mod_bits)" can be a square modulo m. */ 192 1.1 mrg void 193 1.1 mrg square_mask (mpz_t mask, int m) 194 1.1 mrg { 195 1.1 mrg int p, i, r, idx; 196 1.1 mrg 197 1.1 mrg p = mul_2exp_mod (1, mod_bits, m); 198 1.1 mrg p = neg_mod (p, m); 199 1.1 mrg 200 1.1 mrg mpz_set_ui (mask, 0L); 201 1.1 mrg for (i = 0; i < m; i++) 202 1.1 mrg { 203 1.1 mrg r = (i * i) % m; 204 1.1 mrg idx = (r * p) % m; 205 1.1 mrg mpz_setbit (mask, (unsigned long) idx); 206 1.1 mrg } 207 1.1 mrg } 208 1.1 mrg 209 1.1 mrg void 210 1.1 mrg generate_sq_res_0x100 (int limb_bits) 211 1.1 mrg { 212 1.1 mrg int i, res; 213 1.1 mrg 214 1.1 mrg nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits; 215 1.1.1.3 mrg sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100)); 216 1.1 mrg 217 1.1 mrg for (i = 0; i < nsq_res_0x100; i++) 218 1.1 mrg mpz_init_set_ui (sq_res_0x100[i], 0L); 219 1.1 mrg 220 1.1 mrg for (i = 0; i < 0x100; i++) 221 1.1 mrg { 222 1.1 mrg res = (i * i) % 0x100; 223 1.1 mrg mpz_setbit (sq_res_0x100[res / limb_bits], 224 1.1 mrg (unsigned long) (res % limb_bits)); 225 1.1 mrg } 226 1.1 mrg 227 1.1 mrg sq_res_0x100_num = 0; 228 1.1 mrg for (i = 0; i < nsq_res_0x100; i++) 229 1.1 mrg sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]); 230 1.1 mrg sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0; 231 1.1 mrg } 232 1.1 mrg 233 1.1 mrg void 234 1.1 mrg generate_mod (int limb_bits, int nail_bits) 235 1.1 mrg { 236 1.1 mrg int numb_bits = limb_bits - nail_bits; 237 1.1 mrg int i, divisor; 238 1.1 mrg 239 1.1 mrg mpz_init_set_ui (pp, 0L); 240 1.1 mrg mpz_init_set_ui (pp_norm, 0L); 241 1.1 mrg mpz_init_set_ui (pp_inverted, 0L); 242 1.1 mrg 243 1.1 mrg /* no more than limb_bits many factors in a one limb modulus (and of 244 1.1 mrg course in reality nothing like that many) */ 245 1.1 mrg factor_alloc = limb_bits; 246 1.1.1.3 mrg factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor)); 247 1.1.1.3 mrg rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor)); 248 1.1 mrg 249 1.1 mrg if (numb_bits % 4 != 0) 250 1.1 mrg { 251 1.1 mrg strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0"); 252 1.1 mrg goto use_pp; 253 1.1 mrg } 254 1.1 mrg 255 1.1 mrg max_divisor = 2*limb_bits; 256 1.1 mrg max_divisor_bits = log2_ceil (max_divisor); 257 1.1 mrg 258 1.1 mrg if (numb_bits / 4 < max_divisor_bits) 259 1.1 mrg { 260 1.1 mrg /* Wind back to one limb worth of max_divisor, if that will let us use 261 1.1 mrg mpn_mod_34lsub1. */ 262 1.1 mrg max_divisor = limb_bits; 263 1.1 mrg max_divisor_bits = log2_ceil (max_divisor); 264 1.1 mrg 265 1.1 mrg if (numb_bits / 4 < max_divisor_bits) 266 1.1 mrg { 267 1.1 mrg strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small"); 268 1.1 mrg goto use_pp; 269 1.1 mrg } 270 1.1 mrg } 271 1.1 mrg 272 1.1 mrg { 273 1.1 mrg /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */ 274 1.1 mrg mpz_t m, q, r; 275 1.1 mrg int multiplicity; 276 1.1 mrg 277 1.1 mrg mod34_bits = (numb_bits / 4) * 3; 278 1.1 mrg 279 1.1 mrg /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at 280 1.1 mrg the mod34_bits mark, adding the two halves for a remainder of at most 281 1.1 mrg mod34_bits+1 many bits */ 282 1.1 mrg mod_bits = mod34_bits + 1; 283 1.1 mrg 284 1.1 mrg mpz_init_set_ui (m, 1L); 285 1.1 mrg mpz_mul_2exp (m, m, mod34_bits); 286 1.1 mrg mpz_sub_ui (m, m, 1L); 287 1.1 mrg 288 1.1 mrg mpz_init (q); 289 1.1 mrg mpz_init (r); 290 1.1 mrg 291 1.1.1.3 mrg for (i = 3; i <= max_divisor; i+=2) 292 1.1 mrg { 293 1.1 mrg if (! isprime (i)) 294 1.1 mrg continue; 295 1.1 mrg 296 1.1 mrg mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 297 1.1 mrg if (mpz_sgn (r) != 0) 298 1.1 mrg continue; 299 1.1 mrg 300 1.1 mrg /* if a repeated prime is found it's used as an i^n in one factor */ 301 1.1 mrg divisor = 1; 302 1.1 mrg multiplicity = 0; 303 1.1 mrg do 304 1.1 mrg { 305 1.1 mrg if (divisor > max_divisor / i) 306 1.1 mrg break; 307 1.1 mrg multiplicity++; 308 1.1 mrg mpz_set (m, q); 309 1.1 mrg mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 310 1.1 mrg } 311 1.1 mrg while (mpz_sgn (r) == 0); 312 1.1 mrg 313 1.1.1.2 mrg assert (nrawfactor < factor_alloc); 314 1.1 mrg rawfactor[nrawfactor].divisor = i; 315 1.1 mrg rawfactor[nrawfactor].multiplicity = multiplicity; 316 1.1 mrg nrawfactor++; 317 1.1 mrg } 318 1.1 mrg 319 1.1 mrg mpz_clear (m); 320 1.1 mrg mpz_clear (q); 321 1.1 mrg mpz_clear (r); 322 1.1 mrg } 323 1.1 mrg 324 1.1 mrg if (nrawfactor <= 2) 325 1.1 mrg { 326 1.1 mrg mpz_t new_pp; 327 1.1 mrg 328 1.1 mrg sprintf (mod34_excuse, "only %d small factor%s", 329 1.1 mrg nrawfactor, nrawfactor == 1 ? "" : "s"); 330 1.1 mrg 331 1.1 mrg use_pp: 332 1.1 mrg /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code 333 1.1 mrg tried with just one */ 334 1.1 mrg max_divisor = 2*limb_bits; 335 1.1 mrg max_divisor_bits = log2_ceil (max_divisor); 336 1.1 mrg 337 1.1 mrg mpz_init (new_pp); 338 1.1 mrg nrawfactor = 0; 339 1.1 mrg mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits); 340 1.1 mrg 341 1.1 mrg /* one copy of each small prime */ 342 1.1 mrg mpz_set_ui (pp, 1L); 343 1.1.1.3 mrg for (i = 3; i <= max_divisor; i+=2) 344 1.1 mrg { 345 1.1 mrg if (! isprime (i)) 346 1.1 mrg continue; 347 1.1 mrg 348 1.1 mrg mpz_mul_ui (new_pp, pp, (unsigned long) i); 349 1.1 mrg if (mpz_sizeinbase (new_pp, 2) > mod_bits) 350 1.1 mrg break; 351 1.1 mrg mpz_set (pp, new_pp); 352 1.1 mrg 353 1.1.1.2 mrg assert (nrawfactor < factor_alloc); 354 1.1 mrg rawfactor[nrawfactor].divisor = i; 355 1.1 mrg rawfactor[nrawfactor].multiplicity = 1; 356 1.1 mrg nrawfactor++; 357 1.1 mrg } 358 1.1 mrg 359 1.1 mrg /* Plus an extra copy of one or more of the primes selected, if that 360 1.1 mrg still fits in max_divisor and the total in mod_bits. Usually only 361 1.1 mrg 3 or 5 will be candidates */ 362 1.1 mrg for (i = nrawfactor-1; i >= 0; i--) 363 1.1 mrg { 364 1.1 mrg if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor) 365 1.1 mrg continue; 366 1.1 mrg mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor); 367 1.1 mrg if (mpz_sizeinbase (new_pp, 2) > mod_bits) 368 1.1 mrg continue; 369 1.1 mrg mpz_set (pp, new_pp); 370 1.1 mrg 371 1.1 mrg rawfactor[i].multiplicity++; 372 1.1 mrg } 373 1.1 mrg 374 1.1 mrg mod_bits = mpz_sizeinbase (pp, 2); 375 1.1 mrg 376 1.1 mrg mpz_set (pp_norm, pp); 377 1.1 mrg while (mpz_sizeinbase (pp_norm, 2) < numb_bits) 378 1.1 mrg mpz_add (pp_norm, pp_norm, pp_norm); 379 1.1 mrg 380 1.1 mrg mpz_preinv_invert (pp_inverted, pp_norm, numb_bits); 381 1.1 mrg 382 1.1 mrg mpz_clear (new_pp); 383 1.1 mrg } 384 1.1 mrg 385 1.1 mrg /* start the factor array */ 386 1.1 mrg for (i = 0; i < nrawfactor; i++) 387 1.1 mrg { 388 1.1 mrg int j; 389 1.1.1.2 mrg assert (nfactor < factor_alloc); 390 1.1 mrg factor[nfactor].divisor = 1; 391 1.1 mrg for (j = 0; j < rawfactor[i].multiplicity; j++) 392 1.1 mrg factor[nfactor].divisor *= rawfactor[i].divisor; 393 1.1 mrg nfactor++; 394 1.1 mrg } 395 1.1 mrg 396 1.1 mrg combine: 397 1.1 mrg /* Combine entries in the factor array. Combine the smallest entry with 398 1.1 mrg the biggest one that will fit with it (ie. under max_divisor), then 399 1.1 mrg repeat that with the new smallest entry. */ 400 1.1 mrg qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor); 401 1.1 mrg for (i = nfactor-1; i >= 1; i--) 402 1.1 mrg { 403 1.1 mrg if (factor[i].divisor <= max_divisor / factor[0].divisor) 404 1.1 mrg { 405 1.1 mrg factor[0].divisor *= factor[i].divisor; 406 1.1 mrg COLLAPSE_ELEMENT (factor, i, nfactor); 407 1.1 mrg goto combine; 408 1.1 mrg } 409 1.1 mrg } 410 1.1 mrg 411 1.1 mrg total_fraction = 1.0; 412 1.1 mrg for (i = 0; i < nfactor; i++) 413 1.1 mrg { 414 1.1 mrg mpz_init (factor[i].inverse); 415 1.1 mrg mpz_invert_ui_2exp (factor[i].inverse, 416 1.1 mrg (unsigned long) factor[i].divisor, 417 1.1 mrg (unsigned long) mod_bits); 418 1.1 mrg 419 1.1 mrg mpz_init (factor[i].mask); 420 1.1 mrg square_mask (factor[i].mask, factor[i].divisor); 421 1.1 mrg 422 1.1 mrg /* fraction of possible squares */ 423 1.1 mrg factor[i].fraction = (double) mpz_popcount (factor[i].mask) 424 1.1 mrg / factor[i].divisor; 425 1.1 mrg 426 1.1 mrg /* total fraction of possible squares */ 427 1.1 mrg total_fraction *= factor[i].fraction; 428 1.1 mrg } 429 1.1 mrg 430 1.1 mrg /* best tests first (ie. smallest fraction) */ 431 1.1 mrg qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction); 432 1.1 mrg } 433 1.1 mrg 434 1.1 mrg void 435 1.1 mrg print (int limb_bits, int nail_bits) 436 1.1 mrg { 437 1.1 mrg int i; 438 1.1 mrg mpz_t mhi, mlo; 439 1.1 mrg 440 1.1 mrg printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n"); 441 1.1 mrg printf ("\n"); 442 1.1 mrg 443 1.1 mrg printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n", 444 1.1 mrg limb_bits, nail_bits); 445 1.1 mrg printf ("Error, error, this data is for %d bit limb and %d bit nail\n", 446 1.1 mrg limb_bits, nail_bits); 447 1.1 mrg printf ("#endif\n"); 448 1.1 mrg printf ("\n"); 449 1.1 mrg 450 1.1 mrg printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n"); 451 1.1 mrg printf (" This test identifies %.2f%% as non-squares (%d/256). */\n", 452 1.1 mrg (1.0 - sq_res_0x100_fraction) * 100.0, 453 1.1 mrg 0x100 - sq_res_0x100_num); 454 1.1 mrg printf ("static const mp_limb_t\n"); 455 1.1 mrg printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100); 456 1.1 mrg for (i = 0; i < nsq_res_0x100; i++) 457 1.1 mrg { 458 1.1 mrg printf (" CNST_LIMB(0x"); 459 1.1 mrg mpz_out_str (stdout, 16, sq_res_0x100[i]); 460 1.1 mrg printf ("),\n"); 461 1.1 mrg } 462 1.1 mrg printf ("};\n"); 463 1.1 mrg printf ("\n"); 464 1.1 mrg 465 1.1 mrg if (mpz_sgn (pp) != 0) 466 1.1 mrg { 467 1.1 mrg printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse); 468 1.1 mrg printf ("/* PERFSQR_PP = "); 469 1.1 mrg } 470 1.1 mrg else 471 1.1 mrg printf ("/* 2^%d-1 = ", mod34_bits); 472 1.1 mrg for (i = 0; i < nrawfactor; i++) 473 1.1 mrg { 474 1.1 mrg if (i != 0) 475 1.1 mrg printf (" * "); 476 1.1 mrg printf ("%d", rawfactor[i].divisor); 477 1.1 mrg if (rawfactor[i].multiplicity != 1) 478 1.1 mrg printf ("^%d", rawfactor[i].multiplicity); 479 1.1 mrg } 480 1.1 mrg printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : ""); 481 1.1 mrg 482 1.1 mrg printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits); 483 1.1 mrg if (mpz_sgn (pp) != 0) 484 1.1 mrg { 485 1.1 mrg printf ("#define PERFSQR_PP CNST_LIMB(0x"); 486 1.1 mrg mpz_out_str (stdout, 16, pp); 487 1.1 mrg printf (")\n"); 488 1.1 mrg printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x"); 489 1.1 mrg mpz_out_str (stdout, 16, pp_norm); 490 1.1 mrg printf (")\n"); 491 1.1 mrg printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x"); 492 1.1 mrg mpz_out_str (stdout, 16, pp_inverted); 493 1.1 mrg printf (")\n"); 494 1.1 mrg } 495 1.1 mrg printf ("\n"); 496 1.1 mrg 497 1.1 mrg mpz_init (mhi); 498 1.1 mrg mpz_init (mlo); 499 1.1 mrg 500 1.1 mrg printf ("/* This test identifies %.2f%% as non-squares. */\n", 501 1.1 mrg (1.0 - total_fraction) * 100.0); 502 1.1 mrg printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n"); 503 1.1 mrg printf (" do { \\\n"); 504 1.1 mrg printf (" mp_limb_t r; \\\n"); 505 1.1 mrg if (mpz_sgn (pp) != 0) 506 1.1 mrg printf (" PERFSQR_MOD_PP (r, up, usize); \\\n"); 507 1.1 mrg else 508 1.1 mrg printf (" PERFSQR_MOD_34 (r, up, usize); \\\n"); 509 1.1 mrg 510 1.1 mrg for (i = 0; i < nfactor; i++) 511 1.1 mrg { 512 1.1 mrg printf (" \\\n"); 513 1.1 mrg printf (" /* %5.2f%% */ \\\n", 514 1.1 mrg (1.0 - factor[i].fraction) * 100.0); 515 1.1 mrg 516 1.1 mrg printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x", 517 1.1 mrg factor[i].divisor <= limb_bits ? 1 : 2, 518 1.1 mrg factor[i].divisor); 519 1.1 mrg mpz_out_str (stdout, 16, factor[i].inverse); 520 1.1 mrg printf ("), \\\n"); 521 1.1 mrg printf (" CNST_LIMB(0x"); 522 1.1 mrg 523 1.1 mrg if ( factor[i].divisor <= limb_bits) 524 1.1 mrg { 525 1.1 mrg mpz_out_str (stdout, 16, factor[i].mask); 526 1.1 mrg } 527 1.1 mrg else 528 1.1 mrg { 529 1.1 mrg mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits); 530 1.1 mrg mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits); 531 1.1 mrg mpz_out_str (stdout, 16, mhi); 532 1.1 mrg printf ("), CNST_LIMB(0x"); 533 1.1 mrg mpz_out_str (stdout, 16, mlo); 534 1.1 mrg } 535 1.1 mrg printf (")); \\\n"); 536 1.1 mrg } 537 1.1 mrg 538 1.1 mrg printf (" } while (0)\n"); 539 1.1 mrg printf ("\n"); 540 1.1 mrg 541 1.1 mrg printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n", 542 1.1 mrg (1.0 - (total_fraction * 44.0/256.0)) * 100.0); 543 1.1 mrg printf ("\n"); 544 1.1 mrg 545 1.1 mrg printf ("/* helper for tests/mpz/t-perfsqr.c */\n"); 546 1.1 mrg printf ("#define PERFSQR_DIVISORS { 256,"); 547 1.1 mrg for (i = 0; i < nfactor; i++) 548 1.1 mrg printf (" %d,", factor[i].divisor); 549 1.1 mrg printf (" }\n"); 550 1.1 mrg 551 1.1 mrg 552 1.1 mrg mpz_clear (mhi); 553 1.1 mrg mpz_clear (mlo); 554 1.1 mrg } 555 1.1 mrg 556 1.1 mrg int 557 1.1 mrg main (int argc, char *argv[]) 558 1.1 mrg { 559 1.1 mrg int limb_bits, nail_bits; 560 1.1 mrg 561 1.1 mrg if (argc != 3) 562 1.1 mrg { 563 1.1 mrg fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n"); 564 1.1 mrg exit (1); 565 1.1 mrg } 566 1.1 mrg 567 1.1 mrg limb_bits = atoi (argv[1]); 568 1.1 mrg nail_bits = atoi (argv[2]); 569 1.1 mrg 570 1.1 mrg if (limb_bits <= 0 571 1.1 mrg || nail_bits < 0 572 1.1 mrg || nail_bits >= limb_bits) 573 1.1 mrg { 574 1.1 mrg fprintf (stderr, "Invalid limb/nail bits: %d %d\n", 575 1.1 mrg limb_bits, nail_bits); 576 1.1 mrg exit (1); 577 1.1 mrg } 578 1.1 mrg 579 1.1 mrg generate_sq_res_0x100 (limb_bits); 580 1.1 mrg generate_mod (limb_bits, nail_bits); 581 1.1 mrg 582 1.1 mrg print (limb_bits, nail_bits); 583 1.1 mrg 584 1.1 mrg return 0; 585 1.1 mrg } 586