Home | History | Annotate | Line # | Download | only in tests
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #include "seqgen.h"
     12 #include "mem.h"
     13 #include <string.h>
     14 
     15 #define MIN(a, b)  ((a) < (b) ? (a) : (b))
     16 
     17 static const size_t kMatchBytes = 128;
     18 
     19 #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
     20 static BYTE SEQ_randByte(unsigned* src)
     21 {
     22     static const U32 prime1 = 2654435761U;
     23     static const U32 prime2 = 2246822519U;
     24     U32 rand32 = *src;
     25     rand32 *= prime1;
     26     rand32 ^= prime2;
     27     rand32  = SEQ_rotl32(rand32, 13);
     28     *src = rand32;
     29     return (BYTE)(rand32 >> 5);
     30 }
     31 
     32 SEQ_stream SEQ_initStream(unsigned seed)
     33 {
     34     SEQ_stream stream;
     35     stream.state = 0;
     36     XXH64_reset(&stream.xxh, 0);
     37     stream.seed = seed;
     38     return stream;
     39 }
     40 
     41 /* Generates a single guard byte, then match length + 1 of a different byte,
     42  * then another guard byte.
     43  */
     44 static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
     45                                   SEQ_outBuffer* out)
     46 {
     47     typedef enum {
     48         ml_first_byte = 0,
     49         ml_match_bytes,
     50         ml_last_byte,
     51     } ml_state;
     52     BYTE* const ostart = (BYTE*)out->dst;
     53     BYTE* const oend = ostart + out->size;
     54     BYTE* op = ostart + out->pos;
     55 
     56     switch ((ml_state)stream->state) {
     57     case ml_first_byte:
     58         /* Generate a single byte and pick a different byte for the match */
     59         if (op >= oend) {
     60             stream->bytesLeft = 1;
     61             break;
     62         }
     63         *op = SEQ_randByte(&stream->seed) & 0xFF;
     64         do {
     65             stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
     66         } while (*op == stream->saved);
     67         ++op;
     68         /* State transition */
     69         stream->state = ml_match_bytes;
     70         stream->bytesLeft = value + 1;
     71     /* fall-through */
     72     case ml_match_bytes: {
     73         /* Copy matchLength + 1 bytes to the output buffer */
     74         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
     75         if (setLength > 0) {
     76             memset(op, stream->saved, setLength);
     77             op += setLength;
     78             stream->bytesLeft -= setLength;
     79         }
     80         if (stream->bytesLeft > 0)
     81             break;
     82         /* State transition */
     83         stream->state = ml_last_byte;
     84     }
     85     /* fall-through */
     86     case ml_last_byte:
     87         /* Generate a single byte and pick a different byte for the match */
     88         if (op >= oend) {
     89             stream->bytesLeft = 1;
     90             break;
     91         }
     92         do {
     93             *op = SEQ_randByte(&stream->seed) & 0xFF;
     94         } while (*op == stream->saved);
     95         ++op;
     96         /* State transition */
     97     /* fall-through */
     98     default:
     99         stream->state = 0;
    100         stream->bytesLeft = 0;
    101         break;
    102     }
    103     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
    104     out->pos = op - ostart;
    105     return stream->bytesLeft;
    106 }
    107 
    108 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
    109  * Generates literal length - kMatchBytes random bytes < 128.
    110  * Generates another kMatchBytes using the saved seed to generate a match.
    111  * This way the match is easy to find for the compressors.
    112  */
    113 static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
    114 {
    115     typedef enum {
    116         ll_start = 0,
    117         ll_run_bytes,
    118         ll_literals,
    119         ll_run_match,
    120     } ll_state;
    121     BYTE* const ostart = (BYTE*)out->dst;
    122     BYTE* const oend = ostart + out->size;
    123     BYTE* op = ostart + out->pos;
    124 
    125     switch ((ll_state)stream->state) {
    126     case ll_start:
    127         stream->state = ll_run_bytes;
    128         stream->saved = stream->seed;
    129         stream->bytesLeft = MIN(kMatchBytes, value);
    130     /* fall-through */
    131     case ll_run_bytes:
    132         while (stream->bytesLeft > 0 && op < oend) {
    133             *op++ = SEQ_randByte(&stream->seed) | 0x80;
    134             --stream->bytesLeft;
    135         }
    136         if (stream->bytesLeft > 0)
    137             break;
    138         /* State transition */
    139         stream->state = ll_literals;
    140         stream->bytesLeft = value - MIN(kMatchBytes, value);
    141     /* fall-through */
    142     case ll_literals:
    143         while (stream->bytesLeft > 0 && op < oend) {
    144             *op++ = SEQ_randByte(&stream->seed) & 0x7F;
    145             --stream->bytesLeft;
    146         }
    147         if (stream->bytesLeft > 0)
    148             break;
    149         /* State transition */
    150         stream->state = ll_run_match;
    151         stream->bytesLeft = MIN(kMatchBytes, value);
    152     /* fall-through */
    153     case ll_run_match: {
    154         while (stream->bytesLeft > 0 && op < oend) {
    155             *op++ = SEQ_randByte(&stream->saved) | 0x80;
    156             --stream->bytesLeft;
    157         }
    158         if (stream->bytesLeft > 0)
    159             break;
    160     }
    161     /* fall-through */
    162     default:
    163         stream->state = 0;
    164         stream->bytesLeft = 0;
    165         break;
    166     }
    167     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
    168     out->pos = op - ostart;
    169     return stream->bytesLeft;
    170 }
    171 
    172 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
    173  * Generates offset - kMatchBytes of zeros to get a large offset without
    174  * polluting the hash tables.
    175  * Generates another kMatchBytes using the saved seed to generate a with the
    176  * required offset.
    177  */
    178 static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
    179 {
    180     typedef enum {
    181         of_start = 0,
    182         of_run_bytes,
    183         of_offset,
    184         of_run_match,
    185     } of_state;
    186     BYTE* const ostart = (BYTE*)out->dst;
    187     BYTE* const oend = ostart + out->size;
    188     BYTE* op = ostart + out->pos;
    189 
    190     switch ((of_state)stream->state) {
    191     case of_start:
    192         stream->state = of_run_bytes;
    193         stream->saved = stream->seed;
    194         stream->bytesLeft = MIN(value, kMatchBytes);
    195     /* fall-through */
    196     case of_run_bytes: {
    197         while (stream->bytesLeft > 0 && op < oend) {
    198             *op++ = SEQ_randByte(&stream->seed) | 0x80;
    199             --stream->bytesLeft;
    200         }
    201         if (stream->bytesLeft > 0)
    202             break;
    203         /* State transition */
    204         stream->state = of_offset;
    205         stream->bytesLeft = value - MIN(value, kMatchBytes);
    206     }
    207     /* fall-through */
    208     case of_offset: {
    209         /* Copy matchLength + 1 bytes to the output buffer */
    210         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
    211         if (setLength > 0) {
    212             memset(op, 0, setLength);
    213             op += setLength;
    214             stream->bytesLeft -= setLength;
    215         }
    216         if (stream->bytesLeft > 0)
    217             break;
    218         /* State transition */
    219         stream->state = of_run_match;
    220         stream->bytesLeft = MIN(value, kMatchBytes);
    221     }
    222     /* fall-through */
    223     case of_run_match: {
    224         while (stream->bytesLeft > 0 && op < oend) {
    225             *op++ = SEQ_randByte(&stream->saved) | 0x80;
    226             --stream->bytesLeft;
    227         }
    228         if (stream->bytesLeft > 0)
    229             break;
    230     }
    231     /* fall-through */
    232     default:
    233         stream->state = 0;
    234         stream->bytesLeft = 0;
    235         break;
    236     }
    237     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
    238     out->pos = op - ostart;
    239     return stream->bytesLeft;
    240 }
    241 
    242 /* Returns the number of bytes left to generate.
    243  * Must pass the same type/value until it returns 0.
    244  */
    245 size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
    246 {
    247     switch (type) {
    248         case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
    249         case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
    250         case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
    251         case SEQ_gen_max: /* fall-through */
    252         default: return 0;
    253     }
    254 }
    255 
    256 /* Returns the xxhash of the data produced so far */
    257 XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
    258 {
    259     return XXH64_digest(&stream->xxh);
    260 }
    261