1/* 2 * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> 3 * 4 * Based on the public domain implementation of small noncryptographic PRNG 5 * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27#include "utils.h" 28#include "utils-prng.h" 29 30#if defined(HAVE_GCC_VECTOR_EXTENSIONS) && defined(__SSE2__) 31#include <xmmintrin.h> 32#endif 33 34void smallprng_srand_r (smallprng_t *x, uint32_t seed) 35{ 36 uint32_t i; 37 x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; 38 for (i = 0; i < 20; ++i) 39 smallprng_rand_r (x); 40} 41 42/* 43 * Set a 32-bit seed for PRNG 44 * 45 * LCG is used here for generating independent seeds for different 46 * smallprng instances (in the case if smallprng is also used for 47 * generating these seeds, "Big Crush" test from TestU01 detects 48 * some problems in the glued 'prng_rand_128_r' output data). 49 * Actually we might be even better using some cryptographic 50 * hash for this purpose, but LCG seems to be also enough for 51 * passing "Big Crush". 52 */ 53void prng_srand_r (prng_t *x, uint32_t seed) 54{ 55#ifdef HAVE_GCC_VECTOR_EXTENSIONS 56 int i; 57 prng_rand_128_data_t dummy; 58 smallprng_srand_r (&x->p0, seed); 59 x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed; 60 x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345); 61 x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345); 62 x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345); 63 x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345); 64 for (i = 0; i < 20; ++i) 65 prng_rand_128_r (x, &dummy); 66#else 67 smallprng_srand_r (&x->p0, seed); 68 smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345)); 69 smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345)); 70 smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345)); 71 smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345)); 72#endif 73} 74 75static force_inline void 76store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) 77{ 78#ifdef HAVE_GCC_VECTOR_EXTENSIONS 79 if (aligned) 80 { 81 *(uint8x16 *)addr = d->vb; 82 return; 83 } 84 else 85 { 86#ifdef __SSE2__ 87 /* workaround for http://gcc.gnu.org/PR55614 */ 88 _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d)); 89 return; 90#endif 91 } 92#endif 93 /* we could try something better for unaligned writes (packed attribute), 94 * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */ 95 memcpy (addr, d, 16); 96} 97 98/* 99 * Helper function and the actual code for "prng_randmemset_r" function 100 */ 101static force_inline void 102randmemset_internal (prng_t *prng, 103 uint8_t *buf, 104 size_t size, 105 prng_randmemset_flags_t flags, 106 int aligned) 107{ 108 prng_t local_prng = *prng; 109 prng_rand_128_data_t randdata; 110 size_t i; 111 112 while (size >= 16) 113 { 114 prng_rand_128_data_t t; 115 if (flags == 0) 116 { 117 prng_rand_128_r (&local_prng, &randdata); 118 } 119 else 120 { 121 prng_rand_128_r (&local_prng, &t); 122 prng_rand_128_r (&local_prng, &randdata); 123#ifdef HAVE_GCC_VECTOR_EXTENSIONS 124 if (flags & RANDMEMSET_MORE_FF) 125 { 126 const uint8x16 const_C0 = 127 { 128 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 129 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0 130 }; 131 randdata.vb |= (t.vb >= const_C0); 132 } 133 if (flags & RANDMEMSET_MORE_00) 134 { 135 const uint8x16 const_40 = 136 { 137 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 138 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 139 }; 140 randdata.vb &= (t.vb >= const_40); 141 } 142 if (flags & RANDMEMSET_MORE_FFFFFFFF) 143 { 144 const uint32x4 const_C0000000 = 145 { 146 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 147 }; 148 randdata.vw |= ((t.vw << 30) >= const_C0000000); 149 } 150 if (flags & RANDMEMSET_MORE_00000000) 151 { 152 const uint32x4 const_40000000 = 153 { 154 0x40000000, 0x40000000, 0x40000000, 0x40000000 155 }; 156 randdata.vw &= ((t.vw << 30) >= const_40000000); 157 } 158#else 159 #define PROCESS_ONE_LANE(i) \ 160 if (flags & RANDMEMSET_MORE_FF) \ 161 { \ 162 uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \ 163 mask_ff |= mask_ff >> 1; \ 164 mask_ff |= mask_ff >> 2; \ 165 mask_ff |= mask_ff >> 4; \ 166 randdata.w[i] |= mask_ff; \ 167 } \ 168 if (flags & RANDMEMSET_MORE_00) \ 169 { \ 170 uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \ 171 mask_00 |= mask_00 >> 1; \ 172 mask_00 |= mask_00 >> 2; \ 173 mask_00 |= mask_00 >> 4; \ 174 randdata.w[i] &= mask_00; \ 175 } \ 176 if (flags & RANDMEMSET_MORE_FFFFFFFF) \ 177 { \ 178 int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ 179 0x80000000; \ 180 randdata.w[i] |= mask_ff >> 31; \ 181 } \ 182 if (flags & RANDMEMSET_MORE_00000000) \ 183 { \ 184 int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ 185 0x80000000; \ 186 randdata.w[i] &= mask_00 >> 31; \ 187 } 188 189 PROCESS_ONE_LANE (0) 190 PROCESS_ONE_LANE (1) 191 PROCESS_ONE_LANE (2) 192 PROCESS_ONE_LANE (3) 193#endif 194 } 195 if (is_little_endian ()) 196 { 197 store_rand_128_data (buf, &randdata, aligned); 198 buf += 16; 199 } 200 else 201 { 202 203#ifndef __has_builtin 204#define __has_builtin(x) 0 205#endif 206 207#ifdef HAVE_GCC_VECTOR_EXTENSIONS 208# if __has_builtin(__builtin_shufflevector) 209 randdata.vb = 210 __builtin_shufflevector (randdata.vb, randdata.vb, 211 3, 2, 1, 0, 7, 6 , 5, 4, 212 11, 10, 9, 8, 15, 14, 13, 12); 213# else 214 static const uint8x16 bswap_shufflemask = 215 { 216 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 217 }; 218 randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask); 219# endif 220 221 store_rand_128_data (buf, &randdata, aligned); 222 buf += 16; 223#else 224 uint8_t t1, t2, t3, t4; 225 #define STORE_ONE_LANE(i) \ 226 t1 = randdata.b[i * 4 + 3]; \ 227 t2 = randdata.b[i * 4 + 2]; \ 228 t3 = randdata.b[i * 4 + 1]; \ 229 t4 = randdata.b[i * 4 + 0]; \ 230 *buf++ = t1; \ 231 *buf++ = t2; \ 232 *buf++ = t3; \ 233 *buf++ = t4; 234 235 STORE_ONE_LANE (0) 236 STORE_ONE_LANE (1) 237 STORE_ONE_LANE (2) 238 STORE_ONE_LANE (3) 239#endif 240 } 241 size -= 16; 242 } 243 i = 0; 244 while (i < size) 245 { 246 uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; 247 if (flags != 0) 248 { 249 uint8_t t = prng_rand_r (&local_prng) & 0xFF; 250 if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0)) 251 randbyte = 0xFF; 252 if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) 253 randbyte = 0x00; 254 if (i % 4 == 0 && i + 4 <= size) 255 { 256 t = prng_rand_r (&local_prng) & 0xFF; 257 if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) 258 { 259 memset(&buf[i], 0xFF, 4); 260 i += 4; 261 continue; 262 } 263 if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) 264 { 265 memset(&buf[i], 0x00, 4); 266 i += 4; 267 continue; 268 } 269 } 270 } 271 buf[i] = randbyte; 272 i++; 273 } 274 *prng = local_prng; 275} 276 277/* 278 * Fill memory buffer with random data. Flags argument may be used 279 * to tweak some statistics properties: 280 * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 281 * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF 282 * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters 283 * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters 284 */ 285void prng_randmemset_r (prng_t *prng, 286 void *voidbuf, 287 size_t size, 288 prng_randmemset_flags_t flags) 289{ 290 uint8_t *buf = (uint8_t *)voidbuf; 291 if ((uintptr_t)buf & 15) 292 { 293 /* unaligned buffer */ 294 if (flags == 0) 295 randmemset_internal (prng, buf, size, 0, 0); 296 else if (flags == RANDMEMSET_MORE_00_AND_FF) 297 randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0); 298 else 299 randmemset_internal (prng, buf, size, flags, 0); 300 } 301 else 302 { 303 /* aligned buffer */ 304 if (flags == 0) 305 randmemset_internal (prng, buf, size, 0, 1); 306 else if (flags == RANDMEMSET_MORE_00_AND_FF) 307 randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1); 308 else 309 randmemset_internal (prng, buf, size, flags, 1); 310 } 311} 312