Home | History | Annotate | Line # | Download | only in dist
      1 /* Generate perfect square testing data.
      2 
      3 Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
      4 
      5 This file is part of the GNU MP Library.
      6 
      7 The GNU MP Library is free software; you can redistribute it and/or modify
      8 it under the terms of either:
      9 
     10   * the GNU Lesser General Public License as published by the Free
     11     Software Foundation; either version 3 of the License, or (at your
     12     option) any later version.
     13 
     14 or
     15 
     16   * the GNU General Public License as published by the Free Software
     17     Foundation; either version 2 of the License, or (at your option) any
     18     later version.
     19 
     20 or both in parallel, as here.
     21 
     22 The GNU MP Library is distributed in the hope that it will be useful, but
     23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 for more details.
     26 
     27 You should have received copies of the GNU General Public License and the
     28 GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 see https://www.gnu.org/licenses/.  */
     30 
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 
     34 #include "bootstrap.c"
     35 
     36 
     37 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
     38    (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
     39    residues and non-residues modulo small factors of that modulus.
     40 
     41    For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
     42    function exists specifically because 2^24-1 and 2^48-1 have nice sets of
     43    prime factors.  For other limb sizes it's considered, but if it doesn't
     44    have good factors then mpn_mod_1 will be used instead.
     45 
     46    When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
     47    selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
     48    with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
     49    GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
     50    calculation within a single limb.
     51 
     52    In either case primes can be combined to make divisors.  The table data
     53    then effectively indicates remainders which are quadratic residues mod
     54    all the primes.  This sort of combining reduces the number of steps
     55    needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
     56    Nothing is gained or lost in terms of detections, the same total fraction
     57    of non-residues will be identified.
     58 
     59    Nothing particularly sophisticated is attempted for combining factors to
     60    make divisors.  This is probably a kind of knapsack problem so it'd be
     61    too hard to attempt anything completely general.  For the usual 32 and 64
     62    bit limbs we get a good enough result just pairing the biggest and
     63    smallest which fit together, repeatedly.
     64 
     65    Another aim is to get powerful combinations, ie. divisors which identify
     66    biggest fraction of non-residues, and have those run first.  Again for
     67    the usual 32 and 64 bits it seems good enough just to pair for big
     68    divisors then sort according to the resulting fraction of non-residues
     69    identified.
     70 
     71    Also in this program, a table sq_res_0x100 of residues modulo 256 is
     72    generated.  This simply fills bits into limbs of the appropriate
     73    build-time GMP_LIMB_BITS each.
     74 
     75 */
     76 
     77 
     78 /* Normally we aren't using const in gen*.c programs, so as not to have to
     79    bother figuring out if it works, but using it with f_cmp_divisor and
     80    f_cmp_fraction avoids warnings from the qsort calls. */
     81 
     82 /* Same tests as gmp.h. */
     83 #if  defined (__STDC__)                                 \
     84   || defined (__cplusplus)                              \
     85   || defined (_AIX)                                     \
     86   || defined (__DECC)                                   \
     87   || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
     88   || defined (_MSC_VER)                                 \
     89   || defined (_WIN32)
     90 #define HAVE_CONST        1
     91 #endif
     92 
     93 #if ! HAVE_CONST
     94 #define const
     95 #endif
     96 
     97 
     98 mpz_t  *sq_res_0x100;          /* table of limbs */
     99 int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
    100 int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
    101 double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
    102 
    103 int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
    104 int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
    105 int     max_divisor;       /* all divisors <= max_divisor */
    106 int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
    107 double  total_fraction;    /* of squares */
    108 mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
    109 mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
    110 mpz_t   pp_inverted;       /* invert_limb style inverse */
    111 mpz_t   mod_mask;          /* 2^mod_bits-1 */
    112 char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
    113 
    114 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
    115 struct rawfactor_t {
    116   int     divisor;
    117   int     multiplicity;
    118 };
    119 struct rawfactor_t  *rawfactor;
    120 int                 nrawfactor;
    121 
    122 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
    123 struct factor_t {
    124   int     divisor;
    125   mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
    126   mpz_t   mask;      /* indicating squares mod divisor */
    127   double  fraction;  /* squares/total */
    128 };
    129 struct factor_t  *factor;
    130 int              nfactor;       /* entries in use in factor array */
    131 int              factor_alloc;  /* entries allocated to factor array */
    132 
    133 
    134 int
    135 f_cmp_divisor (const void *parg, const void *qarg)
    136 {
    137   const struct factor_t *p, *q;
    138   p = (const struct factor_t *) parg;
    139   q = (const struct factor_t *) qarg;
    140   if (p->divisor > q->divisor)
    141     return 1;
    142   else if (p->divisor < q->divisor)
    143     return -1;
    144   else
    145     return 0;
    146 }
    147 
    148 int
    149 f_cmp_fraction (const void *parg, const void *qarg)
    150 {
    151   const struct factor_t *p, *q;
    152   p = (const struct factor_t *) parg;
    153   q = (const struct factor_t *) qarg;
    154   if (p->fraction > q->fraction)
    155     return 1;
    156   else if (p->fraction < q->fraction)
    157     return -1;
    158   else
    159     return 0;
    160 }
    161 
    162 /* Remove array[idx] by copying the remainder down, and adjust narray
    163    accordingly.  */
    164 #define COLLAPSE_ELEMENT(array, idx, narray)                    \
    165   do {                                                          \
    166     memmove (&(array)[idx],					\
    167 	     &(array)[idx+1],					\
    168 	     ((narray)-((idx)+1)) * sizeof (array[0]));		\
    169     (narray)--;                                                 \
    170   } while (0)
    171 
    172 
    173 /* return n*2^p mod m */
    174 int
    175 mul_2exp_mod (int n, int p, int m)
    176 {
    177   while (--p >= 0)
    178     n = (2 * n) % m;
    179   return n;
    180 }
    181 
    182 /* return -n mod m */
    183 int
    184 neg_mod (int n, int m)
    185 {
    186   assert (n >= 0 && n < m);
    187   return (n == 0 ? 0 : m-n);
    188 }
    189 
    190 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
    191    "-(idx<<mod_bits)" can be a square modulo m.  */
    192 void
    193 square_mask (mpz_t mask, int m)
    194 {
    195   int    p, i, r, idx;
    196 
    197   p = mul_2exp_mod (1, mod_bits, m);
    198   p = neg_mod (p, m);
    199 
    200   mpz_set_ui (mask, 0L);
    201   for (i = 0; i < m; i++)
    202     {
    203       r = (i * i) % m;
    204       idx = (r * p) % m;
    205       mpz_setbit (mask, (unsigned long) idx);
    206     }
    207 }
    208 
    209 void
    210 generate_sq_res_0x100 (int limb_bits)
    211 {
    212   int  i, res;
    213 
    214   nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
    215   sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
    216 
    217   for (i = 0; i < nsq_res_0x100; i++)
    218     mpz_init_set_ui (sq_res_0x100[i], 0L);
    219 
    220   for (i = 0; i < 0x100; i++)
    221     {
    222       res = (i * i) % 0x100;
    223       mpz_setbit (sq_res_0x100[res / limb_bits],
    224                   (unsigned long) (res % limb_bits));
    225     }
    226 
    227   sq_res_0x100_num = 0;
    228   for (i = 0; i < nsq_res_0x100; i++)
    229     sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
    230   sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
    231 }
    232 
    233 void
    234 generate_mod (int limb_bits, int nail_bits)
    235 {
    236   int    numb_bits = limb_bits - nail_bits;
    237   int    i, divisor;
    238 
    239   mpz_init_set_ui (pp, 0L);
    240   mpz_init_set_ui (pp_norm, 0L);
    241   mpz_init_set_ui (pp_inverted, 0L);
    242 
    243   /* no more than limb_bits many factors in a one limb modulus (and of
    244      course in reality nothing like that many) */
    245   factor_alloc = limb_bits;
    246   factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
    247   rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
    248 
    249   if (numb_bits % 4 != 0)
    250     {
    251       strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
    252       goto use_pp;
    253     }
    254 
    255   max_divisor = 2*limb_bits;
    256   max_divisor_bits = log2_ceil (max_divisor);
    257 
    258   if (numb_bits / 4 < max_divisor_bits)
    259     {
    260       /* Wind back to one limb worth of max_divisor, if that will let us use
    261          mpn_mod_34lsub1.  */
    262       max_divisor = limb_bits;
    263       max_divisor_bits = log2_ceil (max_divisor);
    264 
    265       if (numb_bits / 4 < max_divisor_bits)
    266         {
    267           strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
    268           goto use_pp;
    269         }
    270     }
    271 
    272   {
    273     /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
    274     mpz_t  m, q, r;
    275     int    multiplicity;
    276 
    277     mod34_bits = (numb_bits / 4) * 3;
    278 
    279     /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
    280        the mod34_bits mark, adding the two halves for a remainder of at most
    281        mod34_bits+1 many bits */
    282     mod_bits = mod34_bits + 1;
    283 
    284     mpz_init_set_ui (m, 1L);
    285     mpz_mul_2exp (m, m, mod34_bits);
    286     mpz_sub_ui (m, m, 1L);
    287 
    288     mpz_init (q);
    289     mpz_init (r);
    290 
    291     for (i = 3; i <= max_divisor; i+=2)
    292       {
    293         if (! isprime (i))
    294           continue;
    295 
    296         mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
    297         if (mpz_sgn (r) != 0)
    298           continue;
    299 
    300         /* if a repeated prime is found it's used as an i^n in one factor */
    301         divisor = 1;
    302         multiplicity = 0;
    303         do
    304           {
    305             if (divisor > max_divisor / i)
    306               break;
    307             multiplicity++;
    308             mpz_set (m, q);
    309             mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
    310           }
    311         while (mpz_sgn (r) == 0);
    312 
    313         assert (nrawfactor < factor_alloc);
    314         rawfactor[nrawfactor].divisor = i;
    315         rawfactor[nrawfactor].multiplicity = multiplicity;
    316         nrawfactor++;
    317       }
    318 
    319     mpz_clear (m);
    320     mpz_clear (q);
    321     mpz_clear (r);
    322   }
    323 
    324   if (nrawfactor <= 2)
    325     {
    326       mpz_t  new_pp;
    327 
    328       sprintf (mod34_excuse, "only %d small factor%s",
    329                nrawfactor, nrawfactor == 1 ? "" : "s");
    330 
    331     use_pp:
    332       /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
    333          tried with just one */
    334       max_divisor = 2*limb_bits;
    335       max_divisor_bits = log2_ceil (max_divisor);
    336 
    337       mpz_init (new_pp);
    338       nrawfactor = 0;
    339       mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
    340 
    341       /* one copy of each small prime */
    342       mpz_set_ui (pp, 1L);
    343       for (i = 3; i <= max_divisor; i+=2)
    344         {
    345           if (! isprime (i))
    346             continue;
    347 
    348           mpz_mul_ui (new_pp, pp, (unsigned long) i);
    349           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
    350             break;
    351           mpz_set (pp, new_pp);
    352 
    353           assert (nrawfactor < factor_alloc);
    354           rawfactor[nrawfactor].divisor = i;
    355           rawfactor[nrawfactor].multiplicity = 1;
    356           nrawfactor++;
    357         }
    358 
    359       /* Plus an extra copy of one or more of the primes selected, if that
    360          still fits in max_divisor and the total in mod_bits.  Usually only
    361          3 or 5 will be candidates */
    362       for (i = nrawfactor-1; i >= 0; i--)
    363         {
    364           if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
    365             continue;
    366           mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
    367           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
    368             continue;
    369           mpz_set (pp, new_pp);
    370 
    371           rawfactor[i].multiplicity++;
    372         }
    373 
    374       mod_bits = mpz_sizeinbase (pp, 2);
    375 
    376       mpz_set (pp_norm, pp);
    377       while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
    378         mpz_add (pp_norm, pp_norm, pp_norm);
    379 
    380       mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
    381 
    382       mpz_clear (new_pp);
    383     }
    384 
    385   /* start the factor array */
    386   for (i = 0; i < nrawfactor; i++)
    387     {
    388       int  j;
    389       assert (nfactor < factor_alloc);
    390       factor[nfactor].divisor = 1;
    391       for (j = 0; j < rawfactor[i].multiplicity; j++)
    392         factor[nfactor].divisor *= rawfactor[i].divisor;
    393       nfactor++;
    394     }
    395 
    396  combine:
    397   /* Combine entries in the factor array.  Combine the smallest entry with
    398      the biggest one that will fit with it (ie. under max_divisor), then
    399      repeat that with the new smallest entry. */
    400   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
    401   for (i = nfactor-1; i >= 1; i--)
    402     {
    403       if (factor[i].divisor <= max_divisor / factor[0].divisor)
    404         {
    405           factor[0].divisor *= factor[i].divisor;
    406           COLLAPSE_ELEMENT (factor, i, nfactor);
    407           goto combine;
    408         }
    409     }
    410 
    411   total_fraction = 1.0;
    412   for (i = 0; i < nfactor; i++)
    413     {
    414       mpz_init (factor[i].inverse);
    415       mpz_invert_ui_2exp (factor[i].inverse,
    416                           (unsigned long) factor[i].divisor,
    417                           (unsigned long) mod_bits);
    418 
    419       mpz_init (factor[i].mask);
    420       square_mask (factor[i].mask, factor[i].divisor);
    421 
    422       /* fraction of possible squares */
    423       factor[i].fraction = (double) mpz_popcount (factor[i].mask)
    424         / factor[i].divisor;
    425 
    426       /* total fraction of possible squares */
    427       total_fraction *= factor[i].fraction;
    428     }
    429 
    430   /* best tests first (ie. smallest fraction) */
    431   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
    432 }
    433 
    434 void
    435 print (int limb_bits, int nail_bits)
    436 {
    437   int    i;
    438   mpz_t  mhi, mlo;
    439 
    440   printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
    441   printf ("\n");
    442 
    443   printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
    444           limb_bits, nail_bits);
    445   printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
    446           limb_bits, nail_bits);
    447   printf ("#endif\n");
    448   printf ("\n");
    449 
    450   printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
    451   printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
    452           (1.0 - sq_res_0x100_fraction) * 100.0,
    453           0x100 - sq_res_0x100_num);
    454   printf ("static const mp_limb_t\n");
    455   printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
    456   for (i = 0; i < nsq_res_0x100; i++)
    457     {
    458       printf ("  CNST_LIMB(0x");
    459       mpz_out_str (stdout, 16, sq_res_0x100[i]);
    460       printf ("),\n");
    461     }
    462   printf ("};\n");
    463   printf ("\n");
    464 
    465   if (mpz_sgn (pp) != 0)
    466     {
    467       printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
    468       printf ("/* PERFSQR_PP = ");
    469     }
    470   else
    471     printf ("/* 2^%d-1 = ", mod34_bits);
    472   for (i = 0; i < nrawfactor; i++)
    473     {
    474       if (i != 0)
    475         printf (" * ");
    476       printf ("%d", rawfactor[i].divisor);
    477       if (rawfactor[i].multiplicity != 1)
    478         printf ("^%d", rawfactor[i].multiplicity);
    479     }
    480   printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
    481 
    482   printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
    483   if (mpz_sgn (pp) != 0)
    484     {
    485       printf ("#define PERFSQR_PP            CNST_LIMB(0x");
    486       mpz_out_str (stdout, 16, pp);
    487       printf (")\n");
    488       printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
    489       mpz_out_str (stdout, 16, pp_norm);
    490       printf (")\n");
    491       printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
    492       mpz_out_str (stdout, 16, pp_inverted);
    493       printf (")\n");
    494     }
    495   printf ("\n");
    496 
    497   mpz_init (mhi);
    498   mpz_init (mlo);
    499 
    500   printf ("/* This test identifies %.2f%% as non-squares. */\n",
    501           (1.0 - total_fraction) * 100.0);
    502   printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
    503   printf ("  do {                              \\\n");
    504   printf ("    mp_limb_t  r;                   \\\n");
    505   if (mpz_sgn (pp) != 0)
    506     printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
    507   else
    508     printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
    509 
    510   for (i = 0; i < nfactor; i++)
    511     {
    512       printf ("                                    \\\n");
    513       printf ("    /* %5.2f%% */                    \\\n",
    514               (1.0 - factor[i].fraction) * 100.0);
    515 
    516       printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
    517               factor[i].divisor <= limb_bits ? 1 : 2,
    518               factor[i].divisor);
    519       mpz_out_str (stdout, 16, factor[i].inverse);
    520       printf ("), \\\n");
    521       printf ("                   CNST_LIMB(0x");
    522 
    523       if ( factor[i].divisor <= limb_bits)
    524         {
    525           mpz_out_str (stdout, 16, factor[i].mask);
    526         }
    527       else
    528         {
    529           mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
    530           mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
    531           mpz_out_str (stdout, 16, mhi);
    532           printf ("), CNST_LIMB(0x");
    533           mpz_out_str (stdout, 16, mlo);
    534         }
    535       printf (")); \\\n");
    536     }
    537 
    538   printf ("  } while (0)\n");
    539   printf ("\n");
    540 
    541   printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
    542           (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
    543   printf ("\n");
    544 
    545   printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
    546   printf ("#define PERFSQR_DIVISORS  { 256,");
    547   for (i = 0; i < nfactor; i++)
    548       printf (" %d,", factor[i].divisor);
    549   printf (" }\n");
    550 
    551 
    552   mpz_clear (mhi);
    553   mpz_clear (mlo);
    554 }
    555 
    556 int
    557 main (int argc, char *argv[])
    558 {
    559   int  limb_bits, nail_bits;
    560 
    561   if (argc != 3)
    562     {
    563       fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
    564       exit (1);
    565     }
    566 
    567   limb_bits = atoi (argv[1]);
    568   nail_bits = atoi (argv[2]);
    569 
    570   if (limb_bits <= 0
    571       || nail_bits < 0
    572       || nail_bits >= limb_bits)
    573     {
    574       fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
    575                limb_bits, nail_bits);
    576       exit (1);
    577     }
    578 
    579   generate_sq_res_0x100 (limb_bits);
    580   generate_mod (limb_bits, nail_bits);
    581 
    582   print (limb_bits, nail_bits);
    583 
    584   return 0;
    585 }
    586