Home | History | Annotate | Line # | Download | only in dist
      1      1.1  mrg /* Generate perfect square testing data.
      2      1.1  mrg 
      3  1.1.1.3  mrg Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg This file is part of the GNU MP Library.
      6      1.1  mrg 
      7      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.3  mrg it under the terms of either:
      9  1.1.1.3  mrg 
     10  1.1.1.3  mrg   * the GNU Lesser General Public License as published by the Free
     11  1.1.1.3  mrg     Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.3  mrg     option) any later version.
     13  1.1.1.3  mrg 
     14  1.1.1.3  mrg or
     15  1.1.1.3  mrg 
     16  1.1.1.3  mrg   * the GNU General Public License as published by the Free Software
     17  1.1.1.3  mrg     Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.3  mrg     later version.
     19  1.1.1.3  mrg 
     20  1.1.1.3  mrg or both in parallel, as here.
     21      1.1  mrg 
     22      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.3  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.3  mrg for more details.
     26      1.1  mrg 
     27  1.1.1.3  mrg You should have received copies of the GNU General Public License and the
     28  1.1.1.3  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.3  mrg see https://www.gnu.org/licenses/.  */
     30      1.1  mrg 
     31      1.1  mrg #include <stdio.h>
     32      1.1  mrg #include <stdlib.h>
     33      1.1  mrg 
     34  1.1.1.2  mrg #include "bootstrap.c"
     35      1.1  mrg 
     36      1.1  mrg 
     37      1.1  mrg /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
     38      1.1  mrg    (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
     39      1.1  mrg    residues and non-residues modulo small factors of that modulus.
     40      1.1  mrg 
     41      1.1  mrg    For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
     42      1.1  mrg    function exists specifically because 2^24-1 and 2^48-1 have nice sets of
     43      1.1  mrg    prime factors.  For other limb sizes it's considered, but if it doesn't
     44      1.1  mrg    have good factors then mpn_mod_1 will be used instead.
     45      1.1  mrg 
     46      1.1  mrg    When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
     47      1.1  mrg    selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
     48      1.1  mrg    with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
     49      1.1  mrg    GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
     50      1.1  mrg    calculation within a single limb.
     51      1.1  mrg 
     52      1.1  mrg    In either case primes can be combined to make divisors.  The table data
     53      1.1  mrg    then effectively indicates remainders which are quadratic residues mod
     54      1.1  mrg    all the primes.  This sort of combining reduces the number of steps
     55      1.1  mrg    needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
     56      1.1  mrg    Nothing is gained or lost in terms of detections, the same total fraction
     57      1.1  mrg    of non-residues will be identified.
     58      1.1  mrg 
     59      1.1  mrg    Nothing particularly sophisticated is attempted for combining factors to
     60      1.1  mrg    make divisors.  This is probably a kind of knapsack problem so it'd be
     61      1.1  mrg    too hard to attempt anything completely general.  For the usual 32 and 64
     62      1.1  mrg    bit limbs we get a good enough result just pairing the biggest and
     63      1.1  mrg    smallest which fit together, repeatedly.
     64      1.1  mrg 
     65      1.1  mrg    Another aim is to get powerful combinations, ie. divisors which identify
     66      1.1  mrg    biggest fraction of non-residues, and have those run first.  Again for
     67      1.1  mrg    the usual 32 and 64 bits it seems good enough just to pair for big
     68      1.1  mrg    divisors then sort according to the resulting fraction of non-residues
     69      1.1  mrg    identified.
     70      1.1  mrg 
     71      1.1  mrg    Also in this program, a table sq_res_0x100 of residues modulo 256 is
     72      1.1  mrg    generated.  This simply fills bits into limbs of the appropriate
     73      1.1  mrg    build-time GMP_LIMB_BITS each.
     74      1.1  mrg 
     75      1.1  mrg */
     76      1.1  mrg 
     77      1.1  mrg 
     78      1.1  mrg /* Normally we aren't using const in gen*.c programs, so as not to have to
     79      1.1  mrg    bother figuring out if it works, but using it with f_cmp_divisor and
     80      1.1  mrg    f_cmp_fraction avoids warnings from the qsort calls. */
     81      1.1  mrg 
     82      1.1  mrg /* Same tests as gmp.h. */
     83      1.1  mrg #if  defined (__STDC__)                                 \
     84      1.1  mrg   || defined (__cplusplus)                              \
     85      1.1  mrg   || defined (_AIX)                                     \
     86      1.1  mrg   || defined (__DECC)                                   \
     87      1.1  mrg   || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
     88      1.1  mrg   || defined (_MSC_VER)                                 \
     89      1.1  mrg   || defined (_WIN32)
     90      1.1  mrg #define HAVE_CONST        1
     91      1.1  mrg #endif
     92      1.1  mrg 
     93      1.1  mrg #if ! HAVE_CONST
     94      1.1  mrg #define const
     95      1.1  mrg #endif
     96      1.1  mrg 
     97      1.1  mrg 
     98      1.1  mrg mpz_t  *sq_res_0x100;          /* table of limbs */
     99      1.1  mrg int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
    100      1.1  mrg int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
    101      1.1  mrg double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
    102      1.1  mrg 
    103      1.1  mrg int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
    104      1.1  mrg int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
    105      1.1  mrg int     max_divisor;       /* all divisors <= max_divisor */
    106      1.1  mrg int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
    107      1.1  mrg double  total_fraction;    /* of squares */
    108      1.1  mrg mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
    109      1.1  mrg mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
    110      1.1  mrg mpz_t   pp_inverted;       /* invert_limb style inverse */
    111      1.1  mrg mpz_t   mod_mask;          /* 2^mod_bits-1 */
    112      1.1  mrg char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
    113      1.1  mrg 
    114      1.1  mrg /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
    115      1.1  mrg struct rawfactor_t {
    116      1.1  mrg   int     divisor;
    117      1.1  mrg   int     multiplicity;
    118      1.1  mrg };
    119      1.1  mrg struct rawfactor_t  *rawfactor;
    120      1.1  mrg int                 nrawfactor;
    121      1.1  mrg 
    122      1.1  mrg /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
    123      1.1  mrg struct factor_t {
    124      1.1  mrg   int     divisor;
    125      1.1  mrg   mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
    126      1.1  mrg   mpz_t   mask;      /* indicating squares mod divisor */
    127      1.1  mrg   double  fraction;  /* squares/total */
    128      1.1  mrg };
    129      1.1  mrg struct factor_t  *factor;
    130      1.1  mrg int              nfactor;       /* entries in use in factor array */
    131      1.1  mrg int              factor_alloc;  /* entries allocated to factor array */
    132      1.1  mrg 
    133      1.1  mrg 
    134      1.1  mrg int
    135      1.1  mrg f_cmp_divisor (const void *parg, const void *qarg)
    136      1.1  mrg {
    137      1.1  mrg   const struct factor_t *p, *q;
    138  1.1.1.3  mrg   p = (const struct factor_t *) parg;
    139  1.1.1.3  mrg   q = (const struct factor_t *) qarg;
    140      1.1  mrg   if (p->divisor > q->divisor)
    141      1.1  mrg     return 1;
    142      1.1  mrg   else if (p->divisor < q->divisor)
    143      1.1  mrg     return -1;
    144      1.1  mrg   else
    145      1.1  mrg     return 0;
    146      1.1  mrg }
    147      1.1  mrg 
    148      1.1  mrg int
    149      1.1  mrg f_cmp_fraction (const void *parg, const void *qarg)
    150      1.1  mrg {
    151      1.1  mrg   const struct factor_t *p, *q;
    152  1.1.1.3  mrg   p = (const struct factor_t *) parg;
    153  1.1.1.3  mrg   q = (const struct factor_t *) qarg;
    154      1.1  mrg   if (p->fraction > q->fraction)
    155      1.1  mrg     return 1;
    156      1.1  mrg   else if (p->fraction < q->fraction)
    157      1.1  mrg     return -1;
    158      1.1  mrg   else
    159      1.1  mrg     return 0;
    160      1.1  mrg }
    161      1.1  mrg 
    162      1.1  mrg /* Remove array[idx] by copying the remainder down, and adjust narray
    163      1.1  mrg    accordingly.  */
    164      1.1  mrg #define COLLAPSE_ELEMENT(array, idx, narray)                    \
    165      1.1  mrg   do {                                                          \
    166  1.1.1.2  mrg     memmove (&(array)[idx],					\
    167  1.1.1.2  mrg 	     &(array)[idx+1],					\
    168  1.1.1.2  mrg 	     ((narray)-((idx)+1)) * sizeof (array[0]));		\
    169      1.1  mrg     (narray)--;                                                 \
    170      1.1  mrg   } while (0)
    171      1.1  mrg 
    172      1.1  mrg 
    173      1.1  mrg /* return n*2^p mod m */
    174      1.1  mrg int
    175      1.1  mrg mul_2exp_mod (int n, int p, int m)
    176      1.1  mrg {
    177  1.1.1.3  mrg   while (--p >= 0)
    178      1.1  mrg     n = (2 * n) % m;
    179      1.1  mrg   return n;
    180      1.1  mrg }
    181      1.1  mrg 
    182      1.1  mrg /* return -n mod m */
    183      1.1  mrg int
    184      1.1  mrg neg_mod (int n, int m)
    185      1.1  mrg {
    186  1.1.1.2  mrg   assert (n >= 0 && n < m);
    187      1.1  mrg   return (n == 0 ? 0 : m-n);
    188      1.1  mrg }
    189      1.1  mrg 
    190      1.1  mrg /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
    191      1.1  mrg    "-(idx<<mod_bits)" can be a square modulo m.  */
    192      1.1  mrg void
    193      1.1  mrg square_mask (mpz_t mask, int m)
    194      1.1  mrg {
    195      1.1  mrg   int    p, i, r, idx;
    196      1.1  mrg 
    197      1.1  mrg   p = mul_2exp_mod (1, mod_bits, m);
    198      1.1  mrg   p = neg_mod (p, m);
    199      1.1  mrg 
    200      1.1  mrg   mpz_set_ui (mask, 0L);
    201      1.1  mrg   for (i = 0; i < m; i++)
    202      1.1  mrg     {
    203      1.1  mrg       r = (i * i) % m;
    204      1.1  mrg       idx = (r * p) % m;
    205      1.1  mrg       mpz_setbit (mask, (unsigned long) idx);
    206      1.1  mrg     }
    207      1.1  mrg }
    208      1.1  mrg 
    209      1.1  mrg void
    210      1.1  mrg generate_sq_res_0x100 (int limb_bits)
    211      1.1  mrg {
    212      1.1  mrg   int  i, res;
    213      1.1  mrg 
    214      1.1  mrg   nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
    215  1.1.1.3  mrg   sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
    216      1.1  mrg 
    217      1.1  mrg   for (i = 0; i < nsq_res_0x100; i++)
    218      1.1  mrg     mpz_init_set_ui (sq_res_0x100[i], 0L);
    219      1.1  mrg 
    220      1.1  mrg   for (i = 0; i < 0x100; i++)
    221      1.1  mrg     {
    222      1.1  mrg       res = (i * i) % 0x100;
    223      1.1  mrg       mpz_setbit (sq_res_0x100[res / limb_bits],
    224      1.1  mrg                   (unsigned long) (res % limb_bits));
    225      1.1  mrg     }
    226      1.1  mrg 
    227      1.1  mrg   sq_res_0x100_num = 0;
    228      1.1  mrg   for (i = 0; i < nsq_res_0x100; i++)
    229      1.1  mrg     sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
    230      1.1  mrg   sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
    231      1.1  mrg }
    232      1.1  mrg 
    233      1.1  mrg void
    234      1.1  mrg generate_mod (int limb_bits, int nail_bits)
    235      1.1  mrg {
    236      1.1  mrg   int    numb_bits = limb_bits - nail_bits;
    237      1.1  mrg   int    i, divisor;
    238      1.1  mrg 
    239      1.1  mrg   mpz_init_set_ui (pp, 0L);
    240      1.1  mrg   mpz_init_set_ui (pp_norm, 0L);
    241      1.1  mrg   mpz_init_set_ui (pp_inverted, 0L);
    242      1.1  mrg 
    243      1.1  mrg   /* no more than limb_bits many factors in a one limb modulus (and of
    244      1.1  mrg      course in reality nothing like that many) */
    245      1.1  mrg   factor_alloc = limb_bits;
    246  1.1.1.3  mrg   factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
    247  1.1.1.3  mrg   rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
    248      1.1  mrg 
    249      1.1  mrg   if (numb_bits % 4 != 0)
    250      1.1  mrg     {
    251      1.1  mrg       strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
    252      1.1  mrg       goto use_pp;
    253      1.1  mrg     }
    254      1.1  mrg 
    255      1.1  mrg   max_divisor = 2*limb_bits;
    256      1.1  mrg   max_divisor_bits = log2_ceil (max_divisor);
    257      1.1  mrg 
    258      1.1  mrg   if (numb_bits / 4 < max_divisor_bits)
    259      1.1  mrg     {
    260      1.1  mrg       /* Wind back to one limb worth of max_divisor, if that will let us use
    261      1.1  mrg          mpn_mod_34lsub1.  */
    262      1.1  mrg       max_divisor = limb_bits;
    263      1.1  mrg       max_divisor_bits = log2_ceil (max_divisor);
    264      1.1  mrg 
    265      1.1  mrg       if (numb_bits / 4 < max_divisor_bits)
    266      1.1  mrg         {
    267      1.1  mrg           strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
    268      1.1  mrg           goto use_pp;
    269      1.1  mrg         }
    270      1.1  mrg     }
    271      1.1  mrg 
    272      1.1  mrg   {
    273      1.1  mrg     /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
    274      1.1  mrg     mpz_t  m, q, r;
    275      1.1  mrg     int    multiplicity;
    276      1.1  mrg 
    277      1.1  mrg     mod34_bits = (numb_bits / 4) * 3;
    278      1.1  mrg 
    279      1.1  mrg     /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
    280      1.1  mrg        the mod34_bits mark, adding the two halves for a remainder of at most
    281      1.1  mrg        mod34_bits+1 many bits */
    282      1.1  mrg     mod_bits = mod34_bits + 1;
    283      1.1  mrg 
    284      1.1  mrg     mpz_init_set_ui (m, 1L);
    285      1.1  mrg     mpz_mul_2exp (m, m, mod34_bits);
    286      1.1  mrg     mpz_sub_ui (m, m, 1L);
    287      1.1  mrg 
    288      1.1  mrg     mpz_init (q);
    289      1.1  mrg     mpz_init (r);
    290      1.1  mrg 
    291  1.1.1.3  mrg     for (i = 3; i <= max_divisor; i+=2)
    292      1.1  mrg       {
    293      1.1  mrg         if (! isprime (i))
    294      1.1  mrg           continue;
    295      1.1  mrg 
    296      1.1  mrg         mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
    297      1.1  mrg         if (mpz_sgn (r) != 0)
    298      1.1  mrg           continue;
    299      1.1  mrg 
    300      1.1  mrg         /* if a repeated prime is found it's used as an i^n in one factor */
    301      1.1  mrg         divisor = 1;
    302      1.1  mrg         multiplicity = 0;
    303      1.1  mrg         do
    304      1.1  mrg           {
    305      1.1  mrg             if (divisor > max_divisor / i)
    306      1.1  mrg               break;
    307      1.1  mrg             multiplicity++;
    308      1.1  mrg             mpz_set (m, q);
    309      1.1  mrg             mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
    310      1.1  mrg           }
    311      1.1  mrg         while (mpz_sgn (r) == 0);
    312      1.1  mrg 
    313  1.1.1.2  mrg         assert (nrawfactor < factor_alloc);
    314      1.1  mrg         rawfactor[nrawfactor].divisor = i;
    315      1.1  mrg         rawfactor[nrawfactor].multiplicity = multiplicity;
    316      1.1  mrg         nrawfactor++;
    317      1.1  mrg       }
    318      1.1  mrg 
    319      1.1  mrg     mpz_clear (m);
    320      1.1  mrg     mpz_clear (q);
    321      1.1  mrg     mpz_clear (r);
    322      1.1  mrg   }
    323      1.1  mrg 
    324      1.1  mrg   if (nrawfactor <= 2)
    325      1.1  mrg     {
    326      1.1  mrg       mpz_t  new_pp;
    327      1.1  mrg 
    328      1.1  mrg       sprintf (mod34_excuse, "only %d small factor%s",
    329      1.1  mrg                nrawfactor, nrawfactor == 1 ? "" : "s");
    330      1.1  mrg 
    331      1.1  mrg     use_pp:
    332      1.1  mrg       /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
    333      1.1  mrg          tried with just one */
    334      1.1  mrg       max_divisor = 2*limb_bits;
    335      1.1  mrg       max_divisor_bits = log2_ceil (max_divisor);
    336      1.1  mrg 
    337      1.1  mrg       mpz_init (new_pp);
    338      1.1  mrg       nrawfactor = 0;
    339      1.1  mrg       mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
    340      1.1  mrg 
    341      1.1  mrg       /* one copy of each small prime */
    342      1.1  mrg       mpz_set_ui (pp, 1L);
    343  1.1.1.3  mrg       for (i = 3; i <= max_divisor; i+=2)
    344      1.1  mrg         {
    345      1.1  mrg           if (! isprime (i))
    346      1.1  mrg             continue;
    347      1.1  mrg 
    348      1.1  mrg           mpz_mul_ui (new_pp, pp, (unsigned long) i);
    349      1.1  mrg           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
    350      1.1  mrg             break;
    351      1.1  mrg           mpz_set (pp, new_pp);
    352      1.1  mrg 
    353  1.1.1.2  mrg           assert (nrawfactor < factor_alloc);
    354      1.1  mrg           rawfactor[nrawfactor].divisor = i;
    355      1.1  mrg           rawfactor[nrawfactor].multiplicity = 1;
    356      1.1  mrg           nrawfactor++;
    357      1.1  mrg         }
    358      1.1  mrg 
    359      1.1  mrg       /* Plus an extra copy of one or more of the primes selected, if that
    360      1.1  mrg          still fits in max_divisor and the total in mod_bits.  Usually only
    361      1.1  mrg          3 or 5 will be candidates */
    362      1.1  mrg       for (i = nrawfactor-1; i >= 0; i--)
    363      1.1  mrg         {
    364      1.1  mrg           if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
    365      1.1  mrg             continue;
    366      1.1  mrg           mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
    367      1.1  mrg           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
    368      1.1  mrg             continue;
    369      1.1  mrg           mpz_set (pp, new_pp);
    370      1.1  mrg 
    371      1.1  mrg           rawfactor[i].multiplicity++;
    372      1.1  mrg         }
    373      1.1  mrg 
    374      1.1  mrg       mod_bits = mpz_sizeinbase (pp, 2);
    375      1.1  mrg 
    376      1.1  mrg       mpz_set (pp_norm, pp);
    377      1.1  mrg       while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
    378      1.1  mrg         mpz_add (pp_norm, pp_norm, pp_norm);
    379      1.1  mrg 
    380      1.1  mrg       mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
    381      1.1  mrg 
    382      1.1  mrg       mpz_clear (new_pp);
    383      1.1  mrg     }
    384      1.1  mrg 
    385      1.1  mrg   /* start the factor array */
    386      1.1  mrg   for (i = 0; i < nrawfactor; i++)
    387      1.1  mrg     {
    388      1.1  mrg       int  j;
    389  1.1.1.2  mrg       assert (nfactor < factor_alloc);
    390      1.1  mrg       factor[nfactor].divisor = 1;
    391      1.1  mrg       for (j = 0; j < rawfactor[i].multiplicity; j++)
    392      1.1  mrg         factor[nfactor].divisor *= rawfactor[i].divisor;
    393      1.1  mrg       nfactor++;
    394      1.1  mrg     }
    395      1.1  mrg 
    396      1.1  mrg  combine:
    397      1.1  mrg   /* Combine entries in the factor array.  Combine the smallest entry with
    398      1.1  mrg      the biggest one that will fit with it (ie. under max_divisor), then
    399      1.1  mrg      repeat that with the new smallest entry. */
    400      1.1  mrg   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
    401      1.1  mrg   for (i = nfactor-1; i >= 1; i--)
    402      1.1  mrg     {
    403      1.1  mrg       if (factor[i].divisor <= max_divisor / factor[0].divisor)
    404      1.1  mrg         {
    405      1.1  mrg           factor[0].divisor *= factor[i].divisor;
    406      1.1  mrg           COLLAPSE_ELEMENT (factor, i, nfactor);
    407      1.1  mrg           goto combine;
    408      1.1  mrg         }
    409      1.1  mrg     }
    410      1.1  mrg 
    411      1.1  mrg   total_fraction = 1.0;
    412      1.1  mrg   for (i = 0; i < nfactor; i++)
    413      1.1  mrg     {
    414      1.1  mrg       mpz_init (factor[i].inverse);
    415      1.1  mrg       mpz_invert_ui_2exp (factor[i].inverse,
    416      1.1  mrg                           (unsigned long) factor[i].divisor,
    417      1.1  mrg                           (unsigned long) mod_bits);
    418      1.1  mrg 
    419      1.1  mrg       mpz_init (factor[i].mask);
    420      1.1  mrg       square_mask (factor[i].mask, factor[i].divisor);
    421      1.1  mrg 
    422      1.1  mrg       /* fraction of possible squares */
    423      1.1  mrg       factor[i].fraction = (double) mpz_popcount (factor[i].mask)
    424      1.1  mrg         / factor[i].divisor;
    425      1.1  mrg 
    426      1.1  mrg       /* total fraction of possible squares */
    427      1.1  mrg       total_fraction *= factor[i].fraction;
    428      1.1  mrg     }
    429      1.1  mrg 
    430      1.1  mrg   /* best tests first (ie. smallest fraction) */
    431      1.1  mrg   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
    432      1.1  mrg }
    433      1.1  mrg 
    434      1.1  mrg void
    435      1.1  mrg print (int limb_bits, int nail_bits)
    436      1.1  mrg {
    437      1.1  mrg   int    i;
    438      1.1  mrg   mpz_t  mhi, mlo;
    439      1.1  mrg 
    440      1.1  mrg   printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
    441      1.1  mrg   printf ("\n");
    442      1.1  mrg 
    443      1.1  mrg   printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
    444      1.1  mrg           limb_bits, nail_bits);
    445      1.1  mrg   printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
    446      1.1  mrg           limb_bits, nail_bits);
    447      1.1  mrg   printf ("#endif\n");
    448      1.1  mrg   printf ("\n");
    449      1.1  mrg 
    450      1.1  mrg   printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
    451      1.1  mrg   printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
    452      1.1  mrg           (1.0 - sq_res_0x100_fraction) * 100.0,
    453      1.1  mrg           0x100 - sq_res_0x100_num);
    454      1.1  mrg   printf ("static const mp_limb_t\n");
    455      1.1  mrg   printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
    456      1.1  mrg   for (i = 0; i < nsq_res_0x100; i++)
    457      1.1  mrg     {
    458      1.1  mrg       printf ("  CNST_LIMB(0x");
    459      1.1  mrg       mpz_out_str (stdout, 16, sq_res_0x100[i]);
    460      1.1  mrg       printf ("),\n");
    461      1.1  mrg     }
    462      1.1  mrg   printf ("};\n");
    463      1.1  mrg   printf ("\n");
    464      1.1  mrg 
    465      1.1  mrg   if (mpz_sgn (pp) != 0)
    466      1.1  mrg     {
    467      1.1  mrg       printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
    468      1.1  mrg       printf ("/* PERFSQR_PP = ");
    469      1.1  mrg     }
    470      1.1  mrg   else
    471      1.1  mrg     printf ("/* 2^%d-1 = ", mod34_bits);
    472      1.1  mrg   for (i = 0; i < nrawfactor; i++)
    473      1.1  mrg     {
    474      1.1  mrg       if (i != 0)
    475      1.1  mrg         printf (" * ");
    476      1.1  mrg       printf ("%d", rawfactor[i].divisor);
    477      1.1  mrg       if (rawfactor[i].multiplicity != 1)
    478      1.1  mrg         printf ("^%d", rawfactor[i].multiplicity);
    479      1.1  mrg     }
    480      1.1  mrg   printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
    481      1.1  mrg 
    482      1.1  mrg   printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
    483      1.1  mrg   if (mpz_sgn (pp) != 0)
    484      1.1  mrg     {
    485      1.1  mrg       printf ("#define PERFSQR_PP            CNST_LIMB(0x");
    486      1.1  mrg       mpz_out_str (stdout, 16, pp);
    487      1.1  mrg       printf (")\n");
    488      1.1  mrg       printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
    489      1.1  mrg       mpz_out_str (stdout, 16, pp_norm);
    490      1.1  mrg       printf (")\n");
    491      1.1  mrg       printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
    492      1.1  mrg       mpz_out_str (stdout, 16, pp_inverted);
    493      1.1  mrg       printf (")\n");
    494      1.1  mrg     }
    495      1.1  mrg   printf ("\n");
    496      1.1  mrg 
    497      1.1  mrg   mpz_init (mhi);
    498      1.1  mrg   mpz_init (mlo);
    499      1.1  mrg 
    500      1.1  mrg   printf ("/* This test identifies %.2f%% as non-squares. */\n",
    501      1.1  mrg           (1.0 - total_fraction) * 100.0);
    502      1.1  mrg   printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
    503      1.1  mrg   printf ("  do {                              \\\n");
    504      1.1  mrg   printf ("    mp_limb_t  r;                   \\\n");
    505      1.1  mrg   if (mpz_sgn (pp) != 0)
    506      1.1  mrg     printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
    507      1.1  mrg   else
    508      1.1  mrg     printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
    509      1.1  mrg 
    510      1.1  mrg   for (i = 0; i < nfactor; i++)
    511      1.1  mrg     {
    512      1.1  mrg       printf ("                                    \\\n");
    513      1.1  mrg       printf ("    /* %5.2f%% */                    \\\n",
    514      1.1  mrg               (1.0 - factor[i].fraction) * 100.0);
    515      1.1  mrg 
    516      1.1  mrg       printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
    517      1.1  mrg               factor[i].divisor <= limb_bits ? 1 : 2,
    518      1.1  mrg               factor[i].divisor);
    519      1.1  mrg       mpz_out_str (stdout, 16, factor[i].inverse);
    520      1.1  mrg       printf ("), \\\n");
    521      1.1  mrg       printf ("                   CNST_LIMB(0x");
    522      1.1  mrg 
    523      1.1  mrg       if ( factor[i].divisor <= limb_bits)
    524      1.1  mrg         {
    525      1.1  mrg           mpz_out_str (stdout, 16, factor[i].mask);
    526      1.1  mrg         }
    527      1.1  mrg       else
    528      1.1  mrg         {
    529      1.1  mrg           mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
    530      1.1  mrg           mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
    531      1.1  mrg           mpz_out_str (stdout, 16, mhi);
    532      1.1  mrg           printf ("), CNST_LIMB(0x");
    533      1.1  mrg           mpz_out_str (stdout, 16, mlo);
    534      1.1  mrg         }
    535      1.1  mrg       printf (")); \\\n");
    536      1.1  mrg     }
    537      1.1  mrg 
    538      1.1  mrg   printf ("  } while (0)\n");
    539      1.1  mrg   printf ("\n");
    540      1.1  mrg 
    541      1.1  mrg   printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
    542      1.1  mrg           (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
    543      1.1  mrg   printf ("\n");
    544      1.1  mrg 
    545      1.1  mrg   printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
    546      1.1  mrg   printf ("#define PERFSQR_DIVISORS  { 256,");
    547      1.1  mrg   for (i = 0; i < nfactor; i++)
    548      1.1  mrg       printf (" %d,", factor[i].divisor);
    549      1.1  mrg   printf (" }\n");
    550      1.1  mrg 
    551      1.1  mrg 
    552      1.1  mrg   mpz_clear (mhi);
    553      1.1  mrg   mpz_clear (mlo);
    554      1.1  mrg }
    555      1.1  mrg 
    556      1.1  mrg int
    557      1.1  mrg main (int argc, char *argv[])
    558      1.1  mrg {
    559      1.1  mrg   int  limb_bits, nail_bits;
    560      1.1  mrg 
    561      1.1  mrg   if (argc != 3)
    562      1.1  mrg     {
    563      1.1  mrg       fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
    564      1.1  mrg       exit (1);
    565      1.1  mrg     }
    566      1.1  mrg 
    567      1.1  mrg   limb_bits = atoi (argv[1]);
    568      1.1  mrg   nail_bits = atoi (argv[2]);
    569      1.1  mrg 
    570      1.1  mrg   if (limb_bits <= 0
    571      1.1  mrg       || nail_bits < 0
    572      1.1  mrg       || nail_bits >= limb_bits)
    573      1.1  mrg     {
    574      1.1  mrg       fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
    575      1.1  mrg                limb_bits, nail_bits);
    576      1.1  mrg       exit (1);
    577      1.1  mrg     }
    578      1.1  mrg 
    579      1.1  mrg   generate_sq_res_0x100 (limb_bits);
    580      1.1  mrg   generate_mod (limb_bits, nail_bits);
    581      1.1  mrg 
    582      1.1  mrg   print (limb_bits, nail_bits);
    583      1.1  mrg 
    584      1.1  mrg   return 0;
    585      1.1  mrg }
    586