dist/tests/tnrandom_chisq.c

    1.1  mrg /* Chi-squared test for mpfr_nrandom
    1.1  mrg
1.1.1.3  mrg Copyright 2011-2023 Free Software Foundation, Inc.
    1.1  mrg Contributed by Charles Karney <charles (at) karney.com>, SRI International.
    1.1  mrg
    1.1  mrg This file is part of the GNU MPFR Library.
    1.1  mrg
    1.1  mrg The GNU MPFR Library is free software; you can redistribute it and/or modify
    1.1  mrg it under the terms of the GNU Lesser General Public License as published by
    1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
    1.1  mrg option) any later version.
    1.1  mrg
    1.1  mrg The GNU MPFR Library is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    1.1  mrg License for more details.
    1.1  mrg
    1.1  mrg You should have received a copy of the GNU Lesser General Public License
    1.1  mrg along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
1.1.1.2  mrg https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
    1.1  mrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
    1.1  mrg
    1.1  mrg #include "mpfr-test.h"
    1.1  mrg
    1.1  mrg /* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function
    1.1  mrg  * for the normal distribution.  We only take differences of this function so
    1.1  mrg  * the offset doesn't matter; here Phi(0) = 0. */
    1.1  mrg static void
1.1.1.3  mrg normal_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd)
    1.1  mrg {
    1.1  mrg   mpfr_sqrt_ui (z, 2, rnd);
    1.1  mrg   mpfr_div (z, x, z, rnd);
    1.1  mrg   mpfr_erf (z, z, rnd);
    1.1  mrg   mpfr_div_ui (z, z, 2, rnd);
    1.1  mrg }
    1.1  mrg
    1.1  mrg /* Given nu and chisqp, compute probability that chisq > chisqp.  This uses,
    1.1  mrg  * A&S 26.4.16,
    1.1  mrg  *
    1.1  mrg  * Q(nu,chisqp) =
    1.1  mrg  *     erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
    1.1  mrg  *
    1.1  mrg  * which is valid for nu > 30.  This is the basis for the formula in Knuth,
    1.1  mrg  * TAOCP, Vol 2, 3.3.1, Table 1.  It more accurate than the similar formula,
    1.1  mrg  * DLMF 8.11.10. */
    1.1  mrg static void
1.1.1.3  mrg chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp)
    1.1  mrg {
    1.1  mrg   mpfr_t t;
    1.1  mrg   mpfr_rnd_t rnd;
    1.1  mrg
    1.1  mrg   rnd = MPFR_RNDN;  /* This uses an approx formula.  Might as well use RNDN. */
    1.1  mrg   mpfr_init2 (t, mpfr_get_prec (q));
    1.1  mrg
    1.1  mrg   mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
    1.1  mrg   mpfr_cbrt (q, q, rnd);            /* (chisqp/nu)^(1/3) */
    1.1  mrg   mpfr_sub_ui (q, q, 1, rnd);       /* (chisqp/nu)^(1/3) - 1 */
    1.1  mrg   mpfr_set_ui (t, 2, rnd);
    1.1  mrg   mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
    1.1  mrg   mpfr_add (q, q, t, rnd);       /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
    1.1  mrg   mpfr_sqrt_ui (t, nu, rnd);     /* sqrt(nu) */
    1.1  mrg   mpfr_mul_d (t, t, 1.5, rnd);   /* (3/2)*sqrt(nu) */
    1.1  mrg   mpfr_mul (q, q, t, rnd);       /* arg to erfc */
    1.1  mrg   mpfr_erfc (q, q, rnd);         /* erfc(...) */
    1.1  mrg   mpfr_div_ui (q, q, 2, rnd);    /* erfc(...)/2 */
    1.1  mrg
    1.1  mrg   mpfr_clear (t);
    1.1  mrg }
    1.1  mrg
    1.1  mrg /* The continuous chi-squared test on with a set of bins of equal width.
    1.1  mrg  *
    1.1  mrg  * A single precision is picked for sampling and the chi-squared calculation.
    1.1  mrg  * This should picked high enough so that binning in test doesn't need to be
    1.1  mrg  * accurately aligned with possible values of the deviates.  Also we need the
    1.1  mrg  * precision big enough that chi-squared calculation itself is reliable.
    1.1  mrg  *
    1.1  mrg  * There's no particular benefit is testing with at very higher precisions;
    1.1  mrg  * because of the way tnrandom samples, this just adds additional barely
    1.1  mrg  * significant random bits to the deviates.  So this chi-squared test with
    1.1  mrg  * continuous equal width bins isn't a good tool for finding problems here.
    1.1  mrg  *
    1.1  mrg  * The testing of low precision normal deviates is done by
    1.1  mrg  * test_nrandom_chisq_disc. */
    1.1  mrg static double
    1.1  mrg test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
    1.1  mrg                          double xmin, double xmax, int verbose)
    1.1  mrg {
    1.1  mrg   mpfr_t x, a, b, dx, z, pa, pb, ps, t;
    1.1  mrg   long *counts;
    1.1  mrg   int i, inexact;
    1.1  mrg   long k;
    1.1  mrg   mpfr_rnd_t rnd, rndd;
    1.1  mrg   double Q, chisq;
    1.1  mrg
    1.1  mrg   rnd = MPFR_RNDN;              /* For chi-squared calculation */
    1.1  mrg   rndd = MPFR_RNDD;             /* For sampling and figuring the bins */
    1.1  mrg   mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
    1.1  mrg
    1.1  mrg   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
    1.1  mrg   for (i = 0; i <= nu; i++)
    1.1  mrg     counts[i] = 0;
    1.1  mrg
    1.1  mrg   /* a and b are bounds of nu equally spaced bins.  Set dx = (b-a)/nu */
    1.1  mrg   mpfr_set_d (a, xmin, rnd);
    1.1  mrg   mpfr_set_d (b, xmax, rnd);
    1.1  mrg
    1.1  mrg   mpfr_sub (dx, b, a, rnd);
    1.1  mrg   mpfr_div_si (dx, dx, nu, rnd);
    1.1  mrg
    1.1  mrg   for (k = 0; k < num; ++k)
    1.1  mrg     {
    1.1  mrg       inexact = mpfr_nrandom (x, RANDS, rndd);
    1.1  mrg       if (inexact == 0)
    1.1  mrg         {
    1.1  mrg           /* one call in the loop pretended to return an exact number! */
    1.1  mrg           printf ("Error: mpfr_nrandom() returns a zero ternary value.\n");
    1.1  mrg           exit (1);
    1.1  mrg         }
    1.1  mrg       mpfr_sub (x, x, a, rndd);
    1.1  mrg       mpfr_div (x, x, dx, rndd);
    1.1  mrg       i = mpfr_get_si (x, rndd);
    1.1  mrg       ++counts[i >= 0 && i < nu ? i : nu];
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   mpfr_set (x, a, rnd);
    1.1  mrg   normal_cumulative (pa, x, rnd);
    1.1  mrg   mpfr_add_ui (ps, pa, 1, rnd);
    1.1  mrg   mpfr_set_zero (t, 1);
    1.1  mrg   for (i = 0; i <= nu; ++i)
    1.1  mrg     {
    1.1  mrg       if (i < nu)
    1.1  mrg         {
    1.1  mrg           mpfr_add (x, x, dx, rnd);
    1.1  mrg           normal_cumulative (pb, x, rnd);
    1.1  mrg           mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
    1.1  mrg         }
    1.1  mrg       else
    1.1  mrg         mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */
    1.1  mrg
    1.1  mrg       /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
    1.1  mrg       mpfr_mul_ui (pa, pa, num, rnd);
    1.1  mrg       mpfr_ui_sub (z, counts[i], pa, rnd);
    1.1  mrg       mpfr_sqr (z, z, rnd);
    1.1  mrg       mpfr_div (z, z, pa, rnd);
    1.1  mrg       mpfr_add (t, t, z, rnd);
    1.1  mrg       mpfr_swap (pa, pb);       /* i.e., pa = pb */
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   chisq = mpfr_get_d (t, rnd);
    1.1  mrg   chisq_prob (t, nu, t);
    1.1  mrg   Q = mpfr_get_d (t, rnd);
    1.1  mrg   if (verbose)
    1.1  mrg     {
    1.1  mrg       printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
    1.1  mrg               num, xmin, xmax, nu, chisq);
    1.1  mrg       if (Q < 0.05)
    1.1  mrg         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   tests_free (counts, (nu + 1) * sizeof (long));
    1.1  mrg   mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
    1.1  mrg   return Q;
    1.1  mrg }
    1.1  mrg
    1.1  mrg /* Return a sequential number for a positive low-precision x.  x is altered by
1.1.1.2  mrg  * this function.  low precision means prec = 2, 3, or 4.  High values of
    1.1  mrg  * precision will result in integer overflow. */
    1.1  mrg static long
1.1.1.3  mrg sequential (mpfr_ptr x)
    1.1  mrg {
    1.1  mrg   long expt, prec;
    1.1  mrg
    1.1  mrg   prec = mpfr_get_prec (x);
    1.1  mrg   expt =  mpfr_get_exp (x);
    1.1  mrg   mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);
    1.1  mrg
    1.1  mrg   return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
    1.1  mrg }
    1.1  mrg
    1.1  mrg /* The chi-squared test on low precision normal deviates.  wprec is the working
    1.1  mrg  * precision for the chi-squared calculation.  prec is the precision for the
    1.1  mrg  * sampling; choose this in [2,5].  The bins consist of all the possible
    1.1  mrg  * deviate values in the range [xmin, xmax] coupled with the value of inexact.
    1.1  mrg  * Thus with prec = 2, the bins are
    1.1  mrg  *   ...
    1.1  mrg  *   (7/16, 1/2)  x = 1/2, inexact = +1
    1.1  mrg  *   (1/2 , 5/8)  x = 1/2, inexact = -1
    1.1  mrg  *   (5/8 , 3/4)  x = 3/4, inexact = +1
    1.1  mrg  *   (3/4 , 7/8)  x = 3/4, inexact = -1
    1.1  mrg  *   (7/8 , 1  )  x = 1  , inexact = +1
    1.1  mrg  *   (1   , 5/4)  x = 1  , inexact = -1
    1.1  mrg  *   (5/4 , 3/2)  x = 3/2, inexact = +1
    1.1  mrg  *   (3/2 , 7/4)  x = 3/2, inexact = -1
    1.1  mrg  *   ...
    1.1  mrg  * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
    1.1  mrg  *
    1.1  mrg  * This test is applied to the absolute values of the deviates.  The sign is
    1.1  mrg  * tested by test_nrandom_chisq_cont.  In any case, the way the sign is
    1.1  mrg  * assigned in mpfr_nrandom is trivial.  In addition, the sampling is with
    1.1  mrg  * MPFR_RNDN.  This is the rounding mode which elicits the most information.
    1.1  mrg  * trandom_deviate includes checks on the consistency of the results extracted
    1.1  mrg  * from a random_deviate with other rounding modes.  */
    1.1  mrg static double
    1.1  mrg test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
    1.1  mrg                          double xmin, double xmax, int verbose)
    1.1  mrg {
    1.1  mrg   mpfr_t x, v, pa, pb, z, t;
    1.1  mrg   mpfr_rnd_t rnd;
    1.1  mrg   int i, inexact, nu;
    1.1  mrg   long *counts;
    1.1  mrg   long k, seqmin, seqmax, seq;
    1.1  mrg   double Q, chisq;
    1.1  mrg
    1.1  mrg   rnd = MPFR_RNDN;
    1.1  mrg   mpfr_init2 (x, prec);
    1.1  mrg   mpfr_init2 (v, prec+1);
    1.1  mrg   mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);
    1.1  mrg
    1.1  mrg   mpfr_set_d (x, xmin, rnd);
    1.1  mrg   xmin = mpfr_get_d (x, rnd);
    1.1  mrg   mpfr_set (v, x, rnd);
    1.1  mrg   seqmin = sequential (x);
    1.1  mrg   mpfr_set_d (x, xmax, rnd);
    1.1  mrg   xmax = mpfr_get_d (x, rnd);
    1.1  mrg   seqmax = sequential (x);
    1.1  mrg
    1.1  mrg   /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
    1.1  mrg    * umin and 1 for u > umax, minus 1 for degrees of freedom */
    1.1  mrg   nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
    1.1  mrg   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
    1.1  mrg   for (i = 0; i <= nu; i++)
    1.1  mrg     counts[i] = 0;
    1.1  mrg
    1.1  mrg   for (k = 0; k < num; ++k)
    1.1  mrg     {
    1.1  mrg       inexact = mpfr_nrandom (x, RANDS, rnd);
    1.1  mrg       if (mpfr_signbit (x))
    1.1  mrg         {
    1.1  mrg           inexact = -inexact;
    1.1  mrg           mpfr_setsign (x, x, 0, rnd);
    1.1  mrg         }
    1.1  mrg       /* Don't call sequential with small args to avoid undefined behavior with
    1.1  mrg        * zero and possibility of overflow. */
    1.1  mrg       seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
    1.1  mrg       ++counts[seq < seqmin ? 0 :
    1.1  mrg                seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
    1.1  mrg                nu];
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   mpfr_set_zero (v, 1);
    1.1  mrg   normal_cumulative (pa, v, rnd);
    1.1  mrg   /* Cycle through all the bin boundaries using mpfr_nextabove at precision
    1.1  mrg    * prec + 1 starting at mpfr_nextbelow (xmin) */
    1.1  mrg   mpfr_set_d (x, xmin, rnd);
    1.1  mrg   mpfr_set (v, x, rnd);
    1.1  mrg   mpfr_nextbelow (v);
    1.1  mrg   mpfr_nextbelow (v);
    1.1  mrg   mpfr_set_zero (t, 1);
    1.1  mrg   for (i = 0; i <= nu; ++i)
    1.1  mrg     {
    1.1  mrg       if (i < nu)
    1.1  mrg         mpfr_nextabove (v);
    1.1  mrg       else
    1.1  mrg         mpfr_set_inf (v, 1);
    1.1  mrg       normal_cumulative (pb, v, rnd);
    1.1  mrg       mpfr_sub (pa, pb, pa, rnd);
    1.1  mrg
    1.1  mrg       /* Compute z = counts[i] - num * p; t += z * z / (num * p).  2*num to
    1.1  mrg        * account for the fact the p needs to be doubled since we are
    1.1  mrg        * considering only the absolute value of the deviates. */
    1.1  mrg       mpfr_mul_ui (pa, pa, 2*num, rnd);
    1.1  mrg       mpfr_ui_sub (z, counts[i], pa, rnd);
    1.1  mrg       mpfr_sqr (z, z, rnd);
    1.1  mrg       mpfr_div (z, z, pa, rnd);
    1.1  mrg       mpfr_add (t, t, z, rnd);
    1.1  mrg       mpfr_swap (pa, pb);       /* i.e., pa = pb */
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   chisq = mpfr_get_d (t, rnd);
    1.1  mrg   chisq_prob (t, nu, t);
    1.1  mrg   Q = mpfr_get_d (t, rnd);
    1.1  mrg   if (verbose)
    1.1  mrg     {
    1.1  mrg       printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
    1.1  mrg               "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
    1.1  mrg       if (Q < 0.05)
    1.1  mrg         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   tests_free (counts, (nu + 1) * sizeof (long));
    1.1  mrg   mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
    1.1  mrg   return Q;
    1.1  mrg }
    1.1  mrg
    1.1  mrg static void
    1.1  mrg run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
    1.1  mrg            long num, mpfr_prec_t prec, int bin,
    1.1  mrg            double xmin, double xmax, int verbose)
    1.1  mrg {
    1.1  mrg   double Q, Qcum, Qbad, Qthresh;
    1.1  mrg   int i;
    1.1  mrg
    1.1  mrg   Qcum = 1;
    1.1  mrg   Qbad = 1.e-9;
    1.1  mrg   Qthresh = 0.01;
    1.1  mrg   for (i = 0; i < 3; ++i)
    1.1  mrg     {
    1.1  mrg       Q = (*f)(num, prec, bin, xmin, xmax, verbose);
    1.1  mrg       Qcum *= Q;
    1.1  mrg       if (Q > Qthresh)
    1.1  mrg         return;
    1.1  mrg       else if (Q < Qbad)
    1.1  mrg         {
    1.1  mrg           printf ("Error: mpfr_nrandom chi-squared failure "
    1.1  mrg                   "(prob = %.2e)\n", Q);
    1.1  mrg           exit (1);
    1.1  mrg         }
    1.1  mrg       num *= 10;
    1.1  mrg       Qthresh /= 10;
    1.1  mrg     }
    1.1  mrg   if (Qcum < Qbad)              /* Presumably this is true */
    1.1  mrg     {
    1.1  mrg       printf ("Error: mpfr_nrandom combined chi-squared failure "
    1.1  mrg               "(prob = %.2e)\n", Qcum);
    1.1  mrg       exit (1);
    1.1  mrg     }
    1.1  mrg }
    1.1  mrg
    1.1  mrg int
    1.1  mrg main (int argc, char *argv[])
    1.1  mrg {
    1.1  mrg   long nbtests;
    1.1  mrg   int verbose;
    1.1  mrg
    1.1  mrg   tests_start_mpfr ();
    1.1  mrg
    1.1  mrg   verbose = 0;
    1.1  mrg   nbtests = 100000;
    1.1  mrg   if (argc > 1)
    1.1  mrg     {
    1.1  mrg       long a = atol (argv[1]);
    1.1  mrg       verbose = 1;
    1.1  mrg       if (a != 0)
    1.1  mrg         nbtests = a;
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose);
    1.1  mrg   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose);
    1.1  mrg   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose);
    1.1  mrg   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose);
    1.1  mrg
    1.1  mrg   tests_end_mpfr ();
    1.1  mrg   return 0;
    1.1  mrg }