Home | History | Annotate | Line # | Download | only in tests
tnrandom_chisq.c revision 1.1.1.1
      1 /* Chi-squared test for mpfr_nrandom
      2 
      3 Copyright 2011-2018 Free Software Foundation, Inc.
      4 Contributed by Charles Karney <charles (at) karney.com>, SRI International.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #include "mpfr-test.h"
     24 
     25 /* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function
     26  * for the normal distribution.  We only take differences of this function so
     27  * the offset doesn't matter; here Phi(0) = 0. */
     28 static void
     29 normal_cumulative (mpfr_t z, mpfr_t x, mpfr_rnd_t rnd)
     30 {
     31   mpfr_sqrt_ui (z, 2, rnd);
     32   mpfr_div (z, x, z, rnd);
     33   mpfr_erf (z, z, rnd);
     34   mpfr_div_ui (z, z, 2, rnd);
     35 }
     36 
     37 /* Given nu and chisqp, compute probability that chisq > chisqp.  This uses,
     38  * A&S 26.4.16,
     39  *
     40  * Q(nu,chisqp) =
     41  *     erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
     42  *
     43  * which is valid for nu > 30.  This is the basis for the formula in Knuth,
     44  * TAOCP, Vol 2, 3.3.1, Table 1.  It more accurate than the similar formula,
     45  * DLMF 8.11.10. */
     46 static void
     47 chisq_prob (mpfr_t q, long nu, mpfr_t chisqp)
     48 {
     49   mpfr_t t;
     50   mpfr_rnd_t rnd;
     51 
     52   rnd = MPFR_RNDN;  /* This uses an approx formula.  Might as well use RNDN. */
     53   mpfr_init2 (t, mpfr_get_prec (q));
     54 
     55   mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
     56   mpfr_cbrt (q, q, rnd);            /* (chisqp/nu)^(1/3) */
     57   mpfr_sub_ui (q, q, 1, rnd);       /* (chisqp/nu)^(1/3) - 1 */
     58   mpfr_set_ui (t, 2, rnd);
     59   mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
     60   mpfr_add (q, q, t, rnd);       /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
     61   mpfr_sqrt_ui (t, nu, rnd);     /* sqrt(nu) */
     62   mpfr_mul_d (t, t, 1.5, rnd);   /* (3/2)*sqrt(nu) */
     63   mpfr_mul (q, q, t, rnd);       /* arg to erfc */
     64   mpfr_erfc (q, q, rnd);         /* erfc(...) */
     65   mpfr_div_ui (q, q, 2, rnd);    /* erfc(...)/2 */
     66 
     67   mpfr_clear (t);
     68 }
     69 
     70 /* The continuous chi-squared test on with a set of bins of equal width.
     71  *
     72  * A single precision is picked for sampling and the chi-squared calculation.
     73  * This should picked high enough so that binning in test doesn't need to be
     74  * accurately aligned with possible values of the deviates.  Also we need the
     75  * precision big enough that chi-squared calculation itself is reliable.
     76  *
     77  * There's no particular benefit is testing with at very higher precisions;
     78  * because of the way tnrandom samples, this just adds additional barely
     79  * significant random bits to the deviates.  So this chi-squared test with
     80  * continuous equal width bins isn't a good tool for finding problems here.
     81  *
     82  * The testing of low precision normal deviates is done by
     83  * test_nrandom_chisq_disc. */
     84 static double
     85 test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
     86                          double xmin, double xmax, int verbose)
     87 {
     88   mpfr_t x, a, b, dx, z, pa, pb, ps, t;
     89   long *counts;
     90   int i, inexact;
     91   long k;
     92   mpfr_rnd_t rnd, rndd;
     93   double Q, chisq;
     94 
     95   rnd = MPFR_RNDN;              /* For chi-squared calculation */
     96   rndd = MPFR_RNDD;             /* For sampling and figuring the bins */
     97   mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
     98 
     99   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
    100   for (i = 0; i <= nu; i++)
    101     counts[i] = 0;
    102 
    103   /* a and b are bounds of nu equally spaced bins.  Set dx = (b-a)/nu */
    104   mpfr_set_d (a, xmin, rnd);
    105   mpfr_set_d (b, xmax, rnd);
    106 
    107   mpfr_sub (dx, b, a, rnd);
    108   mpfr_div_si (dx, dx, nu, rnd);
    109 
    110   for (k = 0; k < num; ++k)
    111     {
    112       inexact = mpfr_nrandom (x, RANDS, rndd);
    113       if (inexact == 0)
    114         {
    115           /* one call in the loop pretended to return an exact number! */
    116           printf ("Error: mpfr_nrandom() returns a zero ternary value.\n");
    117           exit (1);
    118         }
    119       mpfr_sub (x, x, a, rndd);
    120       mpfr_div (x, x, dx, rndd);
    121       i = mpfr_get_si (x, rndd);
    122       ++counts[i >= 0 && i < nu ? i : nu];
    123     }
    124 
    125   mpfr_set (x, a, rnd);
    126   normal_cumulative (pa, x, rnd);
    127   mpfr_add_ui (ps, pa, 1, rnd);
    128   mpfr_set_zero (t, 1);
    129   for (i = 0; i <= nu; ++i)
    130     {
    131       if (i < nu)
    132         {
    133           mpfr_add (x, x, dx, rnd);
    134           normal_cumulative (pb, x, rnd);
    135           mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
    136         }
    137       else
    138         mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */
    139 
    140       /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
    141       mpfr_mul_ui (pa, pa, num, rnd);
    142       mpfr_ui_sub (z, counts[i], pa, rnd);
    143       mpfr_sqr (z, z, rnd);
    144       mpfr_div (z, z, pa, rnd);
    145       mpfr_add (t, t, z, rnd);
    146       mpfr_swap (pa, pb);       /* i.e., pa = pb */
    147     }
    148 
    149   chisq = mpfr_get_d (t, rnd);
    150   chisq_prob (t, nu, t);
    151   Q = mpfr_get_d (t, rnd);
    152   if (verbose)
    153     {
    154       printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
    155               num, xmin, xmax, nu, chisq);
    156       if (Q < 0.05)
    157         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    158     }
    159 
    160   tests_free (counts, (nu + 1) * sizeof (long));
    161   mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
    162   return Q;
    163 }
    164 
    165 /* Return a sequential number for a positive low-precision x.  x is altered by
    166  * this fuction.  low precision means prec = 2, 3, or 4.  High values of
    167  * precision will result in integer overflow. */
    168 static long
    169 sequential (mpfr_t x)
    170 {
    171   long expt, prec;
    172 
    173   prec = mpfr_get_prec (x);
    174   expt =  mpfr_get_exp (x);
    175   mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);
    176 
    177   return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
    178 }
    179 
    180 /* The chi-squared test on low precision normal deviates.  wprec is the working
    181  * precision for the chi-squared calculation.  prec is the precision for the
    182  * sampling; choose this in [2,5].  The bins consist of all the possible
    183  * deviate values in the range [xmin, xmax] coupled with the value of inexact.
    184  * Thus with prec = 2, the bins are
    185  *   ...
    186  *   (7/16, 1/2)  x = 1/2, inexact = +1
    187  *   (1/2 , 5/8)  x = 1/2, inexact = -1
    188  *   (5/8 , 3/4)  x = 3/4, inexact = +1
    189  *   (3/4 , 7/8)  x = 3/4, inexact = -1
    190  *   (7/8 , 1  )  x = 1  , inexact = +1
    191  *   (1   , 5/4)  x = 1  , inexact = -1
    192  *   (5/4 , 3/2)  x = 3/2, inexact = +1
    193  *   (3/2 , 7/4)  x = 3/2, inexact = -1
    194  *   ...
    195  * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
    196  *
    197  * This test is applied to the absolute values of the deviates.  The sign is
    198  * tested by test_nrandom_chisq_cont.  In any case, the way the sign is
    199  * assigned in mpfr_nrandom is trivial.  In addition, the sampling is with
    200  * MPFR_RNDN.  This is the rounding mode which elicits the most information.
    201  * trandom_deviate includes checks on the consistency of the results extracted
    202  * from a random_deviate with other rounding modes.  */
    203 static double
    204 test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
    205                          double xmin, double xmax, int verbose)
    206 {
    207   mpfr_t x, v, pa, pb, z, t;
    208   mpfr_rnd_t rnd;
    209   int i, inexact, nu;
    210   long *counts;
    211   long k, seqmin, seqmax, seq;
    212   double Q, chisq;
    213 
    214   rnd = MPFR_RNDN;
    215   mpfr_init2 (x, prec);
    216   mpfr_init2 (v, prec+1);
    217   mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);
    218 
    219   mpfr_set_d (x, xmin, rnd);
    220   xmin = mpfr_get_d (x, rnd);
    221   mpfr_set (v, x, rnd);
    222   seqmin = sequential (x);
    223   mpfr_set_d (x, xmax, rnd);
    224   xmax = mpfr_get_d (x, rnd);
    225   seqmax = sequential (x);
    226 
    227   /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
    228    * umin and 1 for u > umax, minus 1 for degrees of freedom */
    229   nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
    230   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
    231   for (i = 0; i <= nu; i++)
    232     counts[i] = 0;
    233 
    234   for (k = 0; k < num; ++k)
    235     {
    236       inexact = mpfr_nrandom (x, RANDS, rnd);
    237       if (mpfr_signbit (x))
    238         {
    239           inexact = -inexact;
    240           mpfr_setsign (x, x, 0, rnd);
    241         }
    242       /* Don't call sequential with small args to avoid undefined behavior with
    243        * zero and possibility of overflow. */
    244       seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
    245       ++counts[seq < seqmin ? 0 :
    246                seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
    247                nu];
    248     }
    249 
    250   mpfr_set_zero (v, 1);
    251   normal_cumulative (pa, v, rnd);
    252   /* Cycle through all the bin boundaries using mpfr_nextabove at precision
    253    * prec + 1 starting at mpfr_nextbelow (xmin) */
    254   mpfr_set_d (x, xmin, rnd);
    255   mpfr_set (v, x, rnd);
    256   mpfr_nextbelow (v);
    257   mpfr_nextbelow (v);
    258   mpfr_set_zero (t, 1);
    259   for (i = 0; i <= nu; ++i)
    260     {
    261       if (i < nu)
    262         mpfr_nextabove (v);
    263       else
    264         mpfr_set_inf (v, 1);
    265       normal_cumulative (pb, v, rnd);
    266       mpfr_sub (pa, pb, pa, rnd);
    267 
    268       /* Compute z = counts[i] - num * p; t += z * z / (num * p).  2*num to
    269        * account for the fact the p needs to be doubled since we are
    270        * considering only the absolute value of the deviates. */
    271       mpfr_mul_ui (pa, pa, 2*num, rnd);
    272       mpfr_ui_sub (z, counts[i], pa, rnd);
    273       mpfr_sqr (z, z, rnd);
    274       mpfr_div (z, z, pa, rnd);
    275       mpfr_add (t, t, z, rnd);
    276       mpfr_swap (pa, pb);       /* i.e., pa = pb */
    277     }
    278 
    279   chisq = mpfr_get_d (t, rnd);
    280   chisq_prob (t, nu, t);
    281   Q = mpfr_get_d (t, rnd);
    282   if (verbose)
    283     {
    284       printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
    285               "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
    286       if (Q < 0.05)
    287         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    288     }
    289 
    290   tests_free (counts, (nu + 1) * sizeof (long));
    291   mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
    292   return Q;
    293 }
    294 
    295 static void
    296 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
    297            long num, mpfr_prec_t prec, int bin,
    298            double xmin, double xmax, int verbose)
    299 {
    300   double Q, Qcum, Qbad, Qthresh;
    301   int i;
    302 
    303   Qcum = 1;
    304   Qbad = 1.e-9;
    305   Qthresh = 0.01;
    306   for (i = 0; i < 3; ++i)
    307     {
    308       Q = (*f)(num, prec, bin, xmin, xmax, verbose);
    309       Qcum *= Q;
    310       if (Q > Qthresh)
    311         return;
    312       else if (Q < Qbad)
    313         {
    314           printf ("Error: mpfr_nrandom chi-squared failure "
    315                   "(prob = %.2e)\n", Q);
    316           exit (1);
    317         }
    318       num *= 10;
    319       Qthresh /= 10;
    320     }
    321   if (Qcum < Qbad)              /* Presumably this is true */
    322     {
    323       printf ("Error: mpfr_nrandom combined chi-squared failure "
    324               "(prob = %.2e)\n", Qcum);
    325       exit (1);
    326     }
    327 }
    328 
    329 int
    330 main (int argc, char *argv[])
    331 {
    332   long nbtests;
    333   int verbose;
    334 
    335   tests_start_mpfr ();
    336 
    337   verbose = 0;
    338   nbtests = 100000;
    339   if (argc > 1)
    340     {
    341       long a = atol (argv[1]);
    342       verbose = 1;
    343       if (a != 0)
    344         nbtests = a;
    345     }
    346 
    347   run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose);
    348   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose);
    349   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose);
    350   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose);
    351 
    352   tests_end_mpfr ();
    353   return 0;
    354 }
    355