Home | History | Annotate | Line # | Download | only in intrinsics
random.c revision 1.1.1.4
      1 /* Implementation of the RANDOM intrinsics
      2    Copyright (C) 2002-2024 Free Software Foundation, Inc.
      3    Contributed by Lars Segerlund <seger (at) linuxmail.org>,
      4    Steve Kargl and Janne Blomqvist.
      5 
      6 This file is part of the GNU Fortran runtime library (libgfortran).
      7 
      8 Libgfortran is free software; you can redistribute it and/or
      9 modify it under the terms of the GNU General Public
     10 License as published by the Free Software Foundation; either
     11 version 3 of the License, or (at your option) any later version.
     12 
     13 Ligbfortran is distributed in the hope that it will be useful,
     14 but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 GNU General Public License for more details.
     17 
     18 Under Section 7 of GPL version 3, you are granted additional
     19 permissions described in the GCC Runtime Library Exception, version
     20 3.1, as published by the Free Software Foundation.
     21 
     22 You should have received a copy of the GNU General Public License and
     23 a copy of the GCC Runtime Library Exception along with this program;
     24 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25 <http://www.gnu.org/licenses/>.  */
     26 
     27 /* For rand_s.  */
     28 #define _CRT_RAND_S
     29 
     30 #include "libgfortran.h"
     31 #include <gthr.h>
     32 #include <string.h>
     33 
     34 #ifdef HAVE_UNISTD_H
     35 #include <unistd.h>
     36 #endif
     37 #include <sys/stat.h>
     38 #include <fcntl.h>
     39 #include "time_1.h"
     40 #ifdef HAVE_SYS_RANDOM_H
     41 #include <sys/random.h>
     42 #endif
     43 
     44 #ifdef __MINGW32__
     45 #define HAVE_GETPID 1
     46 #include <process.h>
     47 #include <_mingw.h> /* For __MINGW64_VERSION_MAJOR  */
     48 #endif
     49 
     50 extern void random_r4 (GFC_REAL_4 *);
     51 iexport_proto(random_r4);
     52 
     53 extern void random_r8 (GFC_REAL_8 *);
     54 iexport_proto(random_r8);
     55 
     56 extern void arandom_r4 (gfc_array_r4 *);
     57 export_proto(arandom_r4);
     58 
     59 extern void arandom_r8 (gfc_array_r8 *);
     60 export_proto(arandom_r8);
     61 
     62 #ifdef HAVE_GFC_REAL_10
     63 
     64 extern void random_r10 (GFC_REAL_10 *);
     65 iexport_proto(random_r10);
     66 
     67 extern void arandom_r10 (gfc_array_r10 *);
     68 export_proto(arandom_r10);
     69 
     70 #endif
     71 
     72 #ifdef HAVE_GFC_REAL_16
     73 
     74 extern void random_r16 (GFC_REAL_16 *);
     75 iexport_proto(random_r16);
     76 
     77 extern void arandom_r16 (gfc_array_r16 *);
     78 export_proto(arandom_r16);
     79 
     80 #endif
     81 
     82 #ifdef HAVE_GFC_REAL_17
     83 
     84 extern void random_r17 (GFC_REAL_17 *);
     85 iexport_proto(random_r17);
     86 
     87 extern void arandom_r17 (gfc_array_r17 *);
     88 export_proto(arandom_r17);
     89 
     90 #endif
     91 
     92 #ifdef __GTHREAD_MUTEX_INIT
     93 static __gthread_mutex_t random_lock = __GTHREAD_MUTEX_INIT;
     94 #else
     95 static __gthread_mutex_t random_lock;
     96 #endif
     97 
     98 /* Helper routines to map a GFC_UINTEGER_* to the corresponding
     99    GFC_REAL_* types in the range of [0,1).  If GFC_REAL_*_RADIX are 2
    100    or 16, respectively, we mask off the bits that don't fit into the
    101    correct GFC_REAL_*, convert to the real type, then multiply by the
    102    correct offset.  */
    103 
    104 
    105 static void
    106 rnumber_4 (GFC_REAL_4 *f, GFC_UINTEGER_4 v)
    107 {
    108   GFC_UINTEGER_4 mask;
    109 #if GFC_REAL_4_RADIX == 2
    110   mask = ~ (GFC_UINTEGER_4) 0u << (32 - GFC_REAL_4_DIGITS);
    111 #elif GFC_REAL_4_RADIX == 16
    112   mask = ~ (GFC_UINTEGER_4) 0u << ((8 - GFC_REAL_4_DIGITS) * 4);
    113 #else
    114 #error "GFC_REAL_4_RADIX has unknown value"
    115 #endif
    116   v = v & mask;
    117   *f = (GFC_REAL_4) v * GFC_REAL_4_LITERAL(0x1.p-32);
    118 }
    119 
    120 static void
    121 rnumber_8 (GFC_REAL_8 *f, GFC_UINTEGER_8 v)
    122 {
    123   GFC_UINTEGER_8 mask;
    124 #if GFC_REAL_8_RADIX == 2
    125   mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_8_DIGITS);
    126 #elif GFC_REAL_8_RADIX == 16
    127   mask = ~ (GFC_UINTEGER_8) 0u << (16 - GFC_REAL_8_DIGITS) * 4);
    128 #else
    129 #error "GFC_REAL_8_RADIX has unknown value"
    130 #endif
    131   v = v & mask;
    132   *f = (GFC_REAL_8) v * GFC_REAL_8_LITERAL(0x1.p-64);
    133 }
    134 
    135 #ifdef HAVE_GFC_REAL_10
    136 
    137 static void
    138 rnumber_10 (GFC_REAL_10 *f, GFC_UINTEGER_8 v)
    139 {
    140   GFC_UINTEGER_8 mask;
    141 #if GFC_REAL_10_RADIX == 2
    142   mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_10_DIGITS);
    143 #elif GFC_REAL_10_RADIX == 16
    144   mask = ~ (GFC_UINTEGER_10) 0u << ((16 - GFC_REAL_10_DIGITS) * 4);
    145 #else
    146 #error "GFC_REAL_10_RADIX has unknown value"
    147 #endif
    148   v = v & mask;
    149   *f = (GFC_REAL_10) v * GFC_REAL_10_LITERAL(0x1.p-64);
    150 }
    151 #endif
    152 
    153 #ifdef HAVE_GFC_REAL_16
    154 
    155 /* For REAL(KIND=16), we only need to mask off the lower bits.  */
    156 
    157 static void
    158 rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
    159 {
    160   GFC_UINTEGER_8 mask;
    161 #if GFC_REAL_16_RADIX == 2
    162   mask = ~ (GFC_UINTEGER_8) 0u << (128 - GFC_REAL_16_DIGITS);
    163 #elif GFC_REAL_16_RADIX == 16
    164   mask = ~ (GFC_UINTEGER_8) 0u << ((32 - GFC_REAL_16_DIGITS) * 4);
    165 #else
    166 #error "GFC_REAL_16_RADIX has unknown value"
    167 #endif
    168   v2 = v2 & mask;
    169   *f = (GFC_REAL_16) v1 * GFC_REAL_16_LITERAL(0x1.p-64)
    170     + (GFC_REAL_16) v2 * GFC_REAL_16_LITERAL(0x1.p-128);
    171 }
    172 #endif
    173 
    174 #ifdef HAVE_GFC_REAL_17
    175 
    176 /* For REAL(KIND=16), we only need to mask off the lower bits.  */
    177 
    178 static void
    179 rnumber_17 (GFC_REAL_17 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
    180 {
    181   GFC_UINTEGER_8 mask;
    182 #if GFC_REAL_17_RADIX == 2
    183   mask = ~ (GFC_UINTEGER_8) 0u << (128 - GFC_REAL_17_DIGITS);
    184 #elif GFC_REAL_17_RADIX == 16
    185   mask = ~ (GFC_UINTEGER_8) 0u << ((32 - GFC_REAL_17_DIGITS) * 4);
    186 #else
    187 #error "GFC_REAL_17_RADIX has unknown value"
    188 #endif
    189   v2 = v2 & mask;
    190   *f = (GFC_REAL_17) v1 * GFC_REAL_17_LITERAL(0x1.p-64)
    191     + (GFC_REAL_17) v2 * GFC_REAL_17_LITERAL(0x1.p-128);
    192 }
    193 #endif
    194 
    195 
    196 /*
    197 
    198    We use the xoshiro256** generator, a fast high-quality generator
    199    that:
    200 
    201    - passes TestU1 without any failures
    202 
    203    - provides a "jump" function making it easy to provide many
    204      independent parallel streams.
    205 
    206    - Long period of 2**256 - 1
    207 
    208    A description can be found at
    209 
    210    http://prng.di.unimi.it/
    211 
    212    or
    213 
    214    https://arxiv.org/abs/1805.01407
    215 
    216    The paper includes public domain source code which is the basis for
    217    the implementation below.
    218 
    219 */
    220 typedef struct
    221 {
    222   bool init;
    223   uint64_t s[4];
    224 }
    225 prng_state;
    226 
    227 
    228 /* master_state is the only variable protected by random_lock.  */
    229 static prng_state master_state = { .init = false, .s = {
    230     0xad63fa1ed3b55f36ULL, 0xd94473e78978b497ULL, 0xbc60592a98172477ULL,
    231     0xa3de7c6e81265301ULL }
    232 };
    233 
    234 
    235 static __gthread_key_t rand_state_key;
    236 
    237 static prng_state*
    238 get_rand_state (void)
    239 {
    240   /* For single threaded apps.  */
    241   static prng_state rand_state;
    242 
    243   if (__gthread_active_p ())
    244     {
    245       void* p = __gthread_getspecific (rand_state_key);
    246       if (!p)
    247 	{
    248 	  p = xcalloc (1, sizeof (prng_state));
    249 	  __gthread_setspecific (rand_state_key, p);
    250 	}
    251       return p;
    252     }
    253   else
    254     return &rand_state;
    255 }
    256 
    257 static inline uint64_t
    258 rotl (const uint64_t x, int k)
    259 {
    260 	return (x << k) | (x >> (64 - k));
    261 }
    262 
    263 
    264 static uint64_t
    265 prng_next (prng_state* rs)
    266 {
    267   const uint64_t result = rotl(rs->s[1] * 5, 7) * 9;
    268 
    269   const uint64_t t = rs->s[1] << 17;
    270 
    271   rs->s[2] ^= rs->s[0];
    272   rs->s[3] ^= rs->s[1];
    273   rs->s[1] ^= rs->s[2];
    274   rs->s[0] ^= rs->s[3];
    275 
    276   rs->s[2] ^= t;
    277 
    278   rs->s[3] = rotl(rs->s[3], 45);
    279 
    280   return result;
    281 }
    282 
    283 
    284 /* This is the jump function for the generator. It is equivalent to
    285    2^128 calls to prng_next(); it can be used to generate 2^128
    286    non-overlapping subsequences for parallel computations. */
    287 
    288 static void
    289 jump (prng_state* rs)
    290 {
    291   static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
    292 
    293   uint64_t s0 = 0;
    294   uint64_t s1 = 0;
    295   uint64_t s2 = 0;
    296   uint64_t s3 = 0;
    297   for(size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
    298     for(int b = 0; b < 64; b++) {
    299       if (JUMP[i] & UINT64_C(1) << b) {
    300 	s0 ^= rs->s[0];
    301 	s1 ^= rs->s[1];
    302 	s2 ^= rs->s[2];
    303 	s3 ^= rs->s[3];
    304       }
    305       prng_next (rs);
    306     }
    307 
    308   rs->s[0] = s0;
    309   rs->s[1] = s1;
    310   rs->s[2] = s2;
    311   rs->s[3] = s3;
    312 }
    313 
    314 
    315 /* Splitmix64 recommended by xoshiro author for initializing.  After
    316    getting one uint64_t value from the OS, this is used to fill in the
    317    rest of the xoshiro state.  */
    318 
    319 static uint64_t
    320 splitmix64 (uint64_t x)
    321 {
    322   uint64_t z = (x += 0x9e3779b97f4a7c15);
    323   z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
    324   z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
    325   return z ^ (z >> 31);
    326 }
    327 
    328 
    329 /* Get some bytes from the operating system in order to seed
    330    the PRNG.  */
    331 
    332 static int
    333 getosrandom (void *buf, size_t buflen)
    334 {
    335   /* rand_s is available in MinGW-w64 but not plain MinGW.  */
    336 #if defined(__MINGW64_VERSION_MAJOR)
    337   unsigned int* b = buf;
    338   for (size_t i = 0; i < buflen / sizeof (unsigned int); i++)
    339     rand_s (&b[i]);
    340   return buflen;
    341 #else
    342 #ifdef HAVE_GETENTROPY
    343   if (getentropy (buf, buflen) == 0)
    344     return buflen;
    345 #endif
    346   int flags = O_RDONLY;
    347 #ifdef O_CLOEXEC
    348   flags |= O_CLOEXEC;
    349 #endif
    350   int fd = open("/dev/urandom", flags);
    351   if (fd != -1)
    352     {
    353       int res = read(fd, buf, buflen);
    354       close (fd);
    355       return res;
    356     }
    357   uint64_t seed = 0x047f7684e9fc949dULL;
    358   time_t secs;
    359   long usecs;
    360   if (gf_gettime (&secs, &usecs) == 0)
    361     {
    362       seed ^= secs;
    363       seed ^= usecs;
    364     }
    365 #ifdef HAVE_GETPID
    366   pid_t pid = getpid();
    367   seed ^= pid;
    368 #endif
    369   size_t size = buflen < sizeof (uint64_t) ? buflen : sizeof (uint64_t);
    370   memcpy (buf, &seed, size);
    371   return size;
    372 #endif /* __MINGW64_VERSION_MAJOR  */
    373 }
    374 
    375 
    376 /* Initialize the random number generator for the current thread,
    377    using the master state and the number of times we must jump.  */
    378 
    379 static void
    380 init_rand_state (prng_state* rs, const bool locked)
    381 {
    382   if (!locked)
    383     __gthread_mutex_lock (&random_lock);
    384   if (!master_state.init)
    385     {
    386       uint64_t os_seed;
    387       getosrandom (&os_seed, sizeof (os_seed));
    388       for (uint64_t i = 0; i < sizeof (master_state.s) / sizeof (uint64_t); i++)
    389 	{
    390           os_seed = splitmix64 (os_seed);
    391           master_state.s[i] = os_seed;
    392         }
    393       master_state.init = true;
    394     }
    395   memcpy (&rs->s, master_state.s, sizeof (master_state.s));
    396   jump (&master_state);
    397   if (!locked)
    398     __gthread_mutex_unlock (&random_lock);
    399   rs->init = true;
    400 }
    401 
    402 
    403 /*  This function produces a REAL(4) value from the uniform distribution
    404     with range [0,1).  */
    405 
    406 void
    407 random_r4 (GFC_REAL_4 *x)
    408 {
    409   prng_state* rs = get_rand_state();
    410 
    411   if (unlikely (!rs->init))
    412     init_rand_state (rs, false);
    413   uint64_t r = prng_next (rs);
    414   /* Take the higher bits, ensuring that a stream of real(4), real(8),
    415      and real(10) will be identical (except for precision).  */
    416   uint32_t high = (uint32_t) (r >> 32);
    417   rnumber_4 (x, high);
    418 }
    419 iexport(random_r4);
    420 
    421 /*  This function produces a REAL(8) value from the uniform distribution
    422     with range [0,1).  */
    423 
    424 void
    425 random_r8 (GFC_REAL_8 *x)
    426 {
    427   GFC_UINTEGER_8 r;
    428   prng_state* rs = get_rand_state();
    429 
    430   if (unlikely (!rs->init))
    431     init_rand_state (rs, false);
    432   r = prng_next (rs);
    433   rnumber_8 (x, r);
    434 }
    435 iexport(random_r8);
    436 
    437 #ifdef HAVE_GFC_REAL_10
    438 
    439 /*  This function produces a REAL(10) value from the uniform distribution
    440     with range [0,1).  */
    441 
    442 void
    443 random_r10 (GFC_REAL_10 *x)
    444 {
    445   GFC_UINTEGER_8 r;
    446   prng_state* rs = get_rand_state();
    447 
    448   if (unlikely (!rs->init))
    449     init_rand_state (rs, false);
    450   r = prng_next (rs);
    451   rnumber_10 (x, r);
    452 }
    453 iexport(random_r10);
    454 
    455 #endif
    456 
    457 /*  This function produces a REAL(16) value from the uniform distribution
    458     with range [0,1).  */
    459 
    460 #ifdef HAVE_GFC_REAL_16
    461 
    462 void
    463 random_r16 (GFC_REAL_16 *x)
    464 {
    465   GFC_UINTEGER_8 r1, r2;
    466   prng_state* rs = get_rand_state();
    467 
    468   if (unlikely (!rs->init))
    469     init_rand_state (rs, false);
    470   r1 = prng_next (rs);
    471   r2 = prng_next (rs);
    472   rnumber_16 (x, r1, r2);
    473 }
    474 iexport(random_r16);
    475 
    476 
    477 #endif
    478 
    479 /*  This function produces a REAL(16) value from the uniform distribution
    480     with range [0,1).  */
    481 
    482 #ifdef HAVE_GFC_REAL_17
    483 
    484 void
    485 random_r17 (GFC_REAL_17 *x)
    486 {
    487   GFC_UINTEGER_8 r1, r2;
    488   prng_state* rs = get_rand_state();
    489 
    490   if (unlikely (!rs->init))
    491     init_rand_state (rs, false);
    492   r1 = prng_next (rs);
    493   r2 = prng_next (rs);
    494   rnumber_17 (x, r1, r2);
    495 }
    496 iexport(random_r17);
    497 
    498 
    499 #endif
    500 
    501 /*  This function fills a REAL(4) array with values from the uniform
    502     distribution with range [0,1).  */
    503 
    504 void
    505 arandom_r4 (gfc_array_r4 *x)
    506 {
    507   index_type count[GFC_MAX_DIMENSIONS];
    508   index_type extent[GFC_MAX_DIMENSIONS];
    509   index_type stride[GFC_MAX_DIMENSIONS];
    510   index_type stride0;
    511   index_type dim;
    512   GFC_REAL_4 *dest;
    513   prng_state* rs = get_rand_state();
    514 
    515   dest = x->base_addr;
    516 
    517   dim = GFC_DESCRIPTOR_RANK (x);
    518 
    519   for (index_type n = 0; n < dim; n++)
    520     {
    521       count[n] = 0;
    522       stride[n] = GFC_DESCRIPTOR_STRIDE(x,n);
    523       extent[n] = GFC_DESCRIPTOR_EXTENT(x,n);
    524       if (extent[n] <= 0)
    525         return;
    526     }
    527 
    528   stride0 = stride[0];
    529 
    530   if (unlikely (!rs->init))
    531     init_rand_state (rs, false);
    532 
    533   while (dest)
    534     {
    535       /* random_r4 (dest);  */
    536       uint64_t r = prng_next (rs);
    537       uint32_t high = (uint32_t) (r >> 32);
    538       rnumber_4 (dest, high);
    539 
    540       /* Advance to the next element.  */
    541       dest += stride0;
    542       count[0]++;
    543       /* Advance to the next source element.  */
    544       index_type n = 0;
    545       while (count[n] == extent[n])
    546         {
    547           /* When we get to the end of a dimension, reset it and increment
    548              the next dimension.  */
    549           count[n] = 0;
    550           /* We could precalculate these products, but this is a less
    551              frequently used path so probably not worth it.  */
    552           dest -= stride[n] * extent[n];
    553           n++;
    554           if (n == dim)
    555             {
    556               dest = NULL;
    557               break;
    558             }
    559           else
    560             {
    561               count[n]++;
    562               dest += stride[n];
    563             }
    564         }
    565     }
    566 }
    567 
    568 /*  This function fills a REAL(8) array with values from the uniform
    569     distribution with range [0,1).  */
    570 
    571 void
    572 arandom_r8 (gfc_array_r8 *x)
    573 {
    574   index_type count[GFC_MAX_DIMENSIONS];
    575   index_type extent[GFC_MAX_DIMENSIONS];
    576   index_type stride[GFC_MAX_DIMENSIONS];
    577   index_type stride0;
    578   index_type dim;
    579   GFC_REAL_8 *dest;
    580   prng_state* rs = get_rand_state();
    581 
    582   dest = x->base_addr;
    583 
    584   dim = GFC_DESCRIPTOR_RANK (x);
    585 
    586   for (index_type n = 0; n < dim; n++)
    587     {
    588       count[n] = 0;
    589       stride[n] = GFC_DESCRIPTOR_STRIDE(x,n);
    590       extent[n] = GFC_DESCRIPTOR_EXTENT(x,n);
    591       if (extent[n] <= 0)
    592         return;
    593     }
    594 
    595   stride0 = stride[0];
    596 
    597   if (unlikely (!rs->init))
    598     init_rand_state (rs, false);
    599 
    600   while (dest)
    601     {
    602       /* random_r8 (dest);  */
    603       uint64_t r = prng_next (rs);
    604       rnumber_8 (dest, r);
    605 
    606       /* Advance to the next element.  */
    607       dest += stride0;
    608       count[0]++;
    609       /* Advance to the next source element.  */
    610       index_type n = 0;
    611       while (count[n] == extent[n])
    612         {
    613           /* When we get to the end of a dimension, reset it and increment
    614              the next dimension.  */
    615           count[n] = 0;
    616           /* We could precalculate these products, but this is a less
    617              frequently used path so probably not worth it.  */
    618           dest -= stride[n] * extent[n];
    619           n++;
    620           if (n == dim)
    621             {
    622               dest = NULL;
    623               break;
    624             }
    625           else
    626             {
    627               count[n]++;
    628               dest += stride[n];
    629             }
    630         }
    631     }
    632 }
    633 
    634 #ifdef HAVE_GFC_REAL_10
    635 
    636 /*  This function fills a REAL(10) array with values from the uniform
    637     distribution with range [0,1).  */
    638 
    639 void
    640 arandom_r10 (gfc_array_r10 *x)
    641 {
    642   index_type count[GFC_MAX_DIMENSIONS];
    643   index_type extent[GFC_MAX_DIMENSIONS];
    644   index_type stride[GFC_MAX_DIMENSIONS];
    645   index_type stride0;
    646   index_type dim;
    647   GFC_REAL_10 *dest;
    648   prng_state* rs = get_rand_state();
    649 
    650   dest = x->base_addr;
    651 
    652   dim = GFC_DESCRIPTOR_RANK (x);
    653 
    654   for (index_type n = 0; n < dim; n++)
    655     {
    656       count[n] = 0;
    657       stride[n] = GFC_DESCRIPTOR_STRIDE(x,n);
    658       extent[n] = GFC_DESCRIPTOR_EXTENT(x,n);
    659       if (extent[n] <= 0)
    660         return;
    661     }
    662 
    663   stride0 = stride[0];
    664 
    665   if (unlikely (!rs->init))
    666     init_rand_state (rs, false);
    667 
    668   while (dest)
    669     {
    670       /* random_r10 (dest);  */
    671       uint64_t r = prng_next (rs);
    672       rnumber_10 (dest, r);
    673 
    674       /* Advance to the next element.  */
    675       dest += stride0;
    676       count[0]++;
    677       /* Advance to the next source element.  */
    678       index_type n = 0;
    679       while (count[n] == extent[n])
    680         {
    681           /* When we get to the end of a dimension, reset it and increment
    682              the next dimension.  */
    683           count[n] = 0;
    684           /* We could precalculate these products, but this is a less
    685              frequently used path so probably not worth it.  */
    686           dest -= stride[n] * extent[n];
    687           n++;
    688           if (n == dim)
    689             {
    690               dest = NULL;
    691               break;
    692             }
    693           else
    694             {
    695               count[n]++;
    696               dest += stride[n];
    697             }
    698         }
    699     }
    700 }
    701 
    702 #endif
    703 
    704 #ifdef HAVE_GFC_REAL_16
    705 
    706 /*  This function fills a REAL(16) array with values from the uniform
    707     distribution with range [0,1).  */
    708 
    709 void
    710 arandom_r16 (gfc_array_r16 *x)
    711 {
    712   index_type count[GFC_MAX_DIMENSIONS];
    713   index_type extent[GFC_MAX_DIMENSIONS];
    714   index_type stride[GFC_MAX_DIMENSIONS];
    715   index_type stride0;
    716   index_type dim;
    717   GFC_REAL_16 *dest;
    718   prng_state* rs = get_rand_state();
    719 
    720   dest = x->base_addr;
    721 
    722   dim = GFC_DESCRIPTOR_RANK (x);
    723 
    724   for (index_type n = 0; n < dim; n++)
    725     {
    726       count[n] = 0;
    727       stride[n] = GFC_DESCRIPTOR_STRIDE(x,n);
    728       extent[n] = GFC_DESCRIPTOR_EXTENT(x,n);
    729       if (extent[n] <= 0)
    730         return;
    731     }
    732 
    733   stride0 = stride[0];
    734 
    735   if (unlikely (!rs->init))
    736     init_rand_state (rs, false);
    737 
    738   while (dest)
    739     {
    740       /* random_r16 (dest);  */
    741       uint64_t r1 = prng_next (rs);
    742       uint64_t r2 = prng_next (rs);
    743       rnumber_16 (dest, r1, r2);
    744 
    745       /* Advance to the next element.  */
    746       dest += stride0;
    747       count[0]++;
    748       /* Advance to the next source element.  */
    749       index_type n = 0;
    750       while (count[n] == extent[n])
    751         {
    752           /* When we get to the end of a dimension, reset it and increment
    753              the next dimension.  */
    754           count[n] = 0;
    755           /* We could precalculate these products, but this is a less
    756              frequently used path so probably not worth it.  */
    757           dest -= stride[n] * extent[n];
    758           n++;
    759           if (n == dim)
    760             {
    761               dest = NULL;
    762               break;
    763             }
    764           else
    765             {
    766               count[n]++;
    767               dest += stride[n];
    768             }
    769         }
    770     }
    771 }
    772 
    773 #endif
    774 
    775 #ifdef HAVE_GFC_REAL_17
    776 
    777 /*  This function fills a REAL(16) array with values from the uniform
    778     distribution with range [0,1).  */
    779 
    780 void
    781 arandom_r17 (gfc_array_r17 *x)
    782 {
    783   index_type count[GFC_MAX_DIMENSIONS];
    784   index_type extent[GFC_MAX_DIMENSIONS];
    785   index_type stride[GFC_MAX_DIMENSIONS];
    786   index_type stride0;
    787   index_type dim;
    788   GFC_REAL_17 *dest;
    789   prng_state* rs = get_rand_state();
    790 
    791   dest = x->base_addr;
    792 
    793   dim = GFC_DESCRIPTOR_RANK (x);
    794 
    795   for (index_type n = 0; n < dim; n++)
    796     {
    797       count[n] = 0;
    798       stride[n] = GFC_DESCRIPTOR_STRIDE(x,n);
    799       extent[n] = GFC_DESCRIPTOR_EXTENT(x,n);
    800       if (extent[n] <= 0)
    801         return;
    802     }
    803 
    804   stride0 = stride[0];
    805 
    806   if (unlikely (!rs->init))
    807     init_rand_state (rs, false);
    808 
    809   while (dest)
    810     {
    811       /* random_r17 (dest);  */
    812       uint64_t r1 = prng_next (rs);
    813       uint64_t r2 = prng_next (rs);
    814       rnumber_17 (dest, r1, r2);
    815 
    816       /* Advance to the next element.  */
    817       dest += stride0;
    818       count[0]++;
    819       /* Advance to the next source element.  */
    820       index_type n = 0;
    821       while (count[n] == extent[n])
    822         {
    823           /* When we get to the end of a dimension, reset it and increment
    824              the next dimension.  */
    825           count[n] = 0;
    826           /* We could precalculate these products, but this is a less
    827              frequently used path so probably not worth it.  */
    828           dest -= stride[n] * extent[n];
    829           n++;
    830           if (n == dim)
    831             {
    832               dest = NULL;
    833               break;
    834             }
    835           else
    836             {
    837               count[n]++;
    838               dest += stride[n];
    839             }
    840         }
    841     }
    842 }
    843 
    844 #endif
    845 
    846 
    847 /* Number of elements in master_state array.  */
    848 #define SZU64 (sizeof (master_state.s) / sizeof (uint64_t))
    849 
    850 /* Equivalent number of elements in an array of GFC_INTEGER_{4,8}.  */
    851 #define SZ_IN_INT_4 (SZU64 * (sizeof (uint64_t) / sizeof (GFC_INTEGER_4)))
    852 #define SZ_IN_INT_8 (SZU64 * (sizeof (uint64_t) / sizeof (GFC_INTEGER_8)))
    853 
    854 /* Keys for scrambling the seed in order to avoid poor seeds.  */
    855 
    856 static const uint64_t xor_keys[] = {
    857   0xbd0c5b6e50c2df49ULL, 0xd46061cd46e1df38ULL, 0xbb4f4d4ed6103544ULL,
    858   0x114a583d0756ad39ULL
    859 };
    860 
    861 
    862 /* Since a XOR cipher is symmetric, we need only one routine, and we
    863    can use it both for encryption and decryption.  */
    864 
    865 static void
    866 scramble_seed (uint64_t *dest, const uint64_t *src)
    867 {
    868   for (size_t i = 0; i < SZU64; i++)
    869     dest[i] = src[i] ^ xor_keys[i];
    870 }
    871 
    872 
    873 /* random_seed is used to seed the PRNG with either a default
    874    set of seeds or user specified set of seeds.  random_seed
    875    must be called with no argument or exactly one argument.  */
    876 
    877 void
    878 random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
    879 {
    880   uint64_t seed[SZU64];
    881 
    882   /* Check that we only have one argument present.  */
    883   if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1)
    884     runtime_error ("RANDOM_SEED should have at most one argument present.");
    885 
    886   if (size != NULL)
    887     *size = SZ_IN_INT_4;
    888 
    889   prng_state* rs = get_rand_state();
    890 
    891   /* Return the seed to GET data.  */
    892   if (get != NULL)
    893     {
    894       /* If the rank of the array is not 1, abort.  */
    895       if (GFC_DESCRIPTOR_RANK (get) != 1)
    896 	runtime_error ("Array rank of GET is not 1.");
    897 
    898       /* If the array is too small, abort.  */
    899       if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ_IN_INT_4)
    900 	runtime_error ("Array size of GET is too small.");
    901 
    902       if (!rs->init)
    903 	init_rand_state (rs, false);
    904 
    905       /* Unscramble the seed.  */
    906       scramble_seed (seed, rs->s);
    907 
    908       /*  Then copy it back to the user variable.  */
    909       for (size_t i = 0; i < SZ_IN_INT_4 ; i++)
    910 	memcpy (&(get->base_addr[(SZ_IN_INT_4 - 1 - i) *
    911 				 GFC_DESCRIPTOR_STRIDE(get,0)]),
    912 		(unsigned char*) seed + i * sizeof(GFC_UINTEGER_4),
    913                sizeof(GFC_UINTEGER_4));
    914     }
    915 
    916   else
    917     {
    918   __gthread_mutex_lock (&random_lock);
    919 
    920   /* From the standard: "If no argument is present, the processor assigns
    921      a processor-dependent value to the seed."  */
    922   if (size == NULL && put == NULL && get == NULL)
    923     {
    924       master_state.init = false;
    925       init_rand_state (rs, true);
    926     }
    927 
    928   if (put != NULL)
    929     {
    930       /* If the rank of the array is not 1, abort.  */
    931       if (GFC_DESCRIPTOR_RANK (put) != 1)
    932         runtime_error ("Array rank of PUT is not 1.");
    933 
    934       /* If the array is too small, abort.  */
    935       if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ_IN_INT_4)
    936         runtime_error ("Array size of PUT is too small.");
    937 
    938       /*  We copy the seed given by the user.  */
    939       for (size_t i = 0; i < SZ_IN_INT_4; i++)
    940 	memcpy ((unsigned char*) seed + i * sizeof(GFC_UINTEGER_4),
    941 		&(put->base_addr[(SZ_IN_INT_4 - 1 - i) *
    942 				 GFC_DESCRIPTOR_STRIDE(put,0)]),
    943 		sizeof(GFC_UINTEGER_4));
    944 
    945       /* We put it after scrambling the bytes, to paper around users who
    946 	 provide seeds with quality only in the lower or upper part.  */
    947       scramble_seed (master_state.s, seed);
    948       master_state.init = true;
    949       init_rand_state (rs, true);
    950     }
    951 
    952   __gthread_mutex_unlock (&random_lock);
    953     }
    954 }
    955 iexport(random_seed_i4);
    956 
    957 
    958 void
    959 random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
    960 {
    961   uint64_t seed[SZU64];
    962 
    963   /* Check that we only have one argument present.  */
    964   if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1)
    965     runtime_error ("RANDOM_SEED should have at most one argument present.");
    966 
    967   if (size != NULL)
    968     *size = SZ_IN_INT_8;
    969 
    970   prng_state* rs = get_rand_state();
    971 
    972   /* Return the seed to GET data.  */
    973   if (get != NULL)
    974     {
    975       /* If the rank of the array is not 1, abort.  */
    976       if (GFC_DESCRIPTOR_RANK (get) != 1)
    977 	runtime_error ("Array rank of GET is not 1.");
    978 
    979       /* If the array is too small, abort.  */
    980       if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ_IN_INT_8)
    981 	runtime_error ("Array size of GET is too small.");
    982 
    983       if (!rs->init)
    984 	init_rand_state (rs, false);
    985 
    986       /* Unscramble the seed.  */
    987       scramble_seed (seed, rs->s);
    988 
    989       /*  This code now should do correct strides.  */
    990       for (size_t i = 0; i < SZ_IN_INT_8; i++)
    991 	memcpy (&(get->base_addr[i * GFC_DESCRIPTOR_STRIDE(get,0)]), &seed[i],
    992 		sizeof (GFC_UINTEGER_8));
    993     }
    994 
    995   else
    996     {
    997   __gthread_mutex_lock (&random_lock);
    998 
    999   /* From the standard: "If no argument is present, the processor assigns
   1000      a processor-dependent value to the seed."  */
   1001   if (size == NULL && put == NULL && get == NULL)
   1002     {
   1003       master_state.init = false;
   1004       init_rand_state (rs, true);
   1005     }
   1006 
   1007   if (put != NULL)
   1008     {
   1009       /* If the rank of the array is not 1, abort.  */
   1010       if (GFC_DESCRIPTOR_RANK (put) != 1)
   1011         runtime_error ("Array rank of PUT is not 1.");
   1012 
   1013       /* If the array is too small, abort.  */
   1014       if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ_IN_INT_8)
   1015         runtime_error ("Array size of PUT is too small.");
   1016 
   1017       /*  This code now should do correct strides.  */
   1018       for (size_t i = 0; i < SZ_IN_INT_8; i++)
   1019 	memcpy (&seed[i], &(put->base_addr[i * GFC_DESCRIPTOR_STRIDE(put,0)]),
   1020 		sizeof (GFC_UINTEGER_8));
   1021 
   1022       scramble_seed (master_state.s, seed);
   1023       master_state.init = true;
   1024       init_rand_state (rs, true);
   1025      }
   1026 
   1027 
   1028   __gthread_mutex_unlock (&random_lock);
   1029     }
   1030 }
   1031 iexport(random_seed_i8);
   1032 
   1033 
   1034 #if !defined __GTHREAD_MUTEX_INIT || defined __GTHREADS
   1035 static void __attribute__((constructor))
   1036 constructor_random (void)
   1037 {
   1038 #ifndef __GTHREAD_MUTEX_INIT
   1039   __GTHREAD_MUTEX_INIT_FUNCTION (&random_lock);
   1040 #endif
   1041   if (__gthread_active_p ())
   1042     __gthread_key_create (&rand_state_key, &free);
   1043 }
   1044 #endif
   1045 
   1046 #ifdef __GTHREADS
   1047 static void __attribute__((destructor))
   1048 destructor_random (void)
   1049 {
   1050   if (__gthread_active_p ())
   1051     __gthread_key_delete (rand_state_key);
   1052 }
   1053 #endif
   1054