Home | History | Annotate | Line # | Download | only in tests
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #include <limits.h>
     12 #include <math.h>
     13 #include <stddef.h>
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include <string.h>
     17 #include <time.h>  /* time(), for seed random initialization */
     18 
     19 #include "util.h"
     20 #include "timefn.h"   /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
     21 #include "zstd.h"
     22 #include "zstd_internal.h"
     23 #include "mem.h"
     24 #define ZDICT_STATIC_LINKING_ONLY
     25 #include "zdict.h"
     26 
     27 /* Direct access to internal compression functions is required */
     28 #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */
     29 #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */
     30 
     31 #define XXH_STATIC_LINKING_ONLY
     32 #include "xxhash.h"     /* XXH64 */
     33 
     34 #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
     35 # define inline  /* disable */
     36 #endif
     37 
     38 /*-************************************
     39 *  DISPLAY Macros
     40 **************************************/
     41 #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
     42 #define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
     43 static U32 g_displayLevel = 2;
     44 
     45 #define DISPLAYUPDATE(...)                                                     \
     46     do {                                                                       \
     47         if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) ||           \
     48             (g_displayLevel >= 4)) {                                           \
     49             g_displayClock = UTIL_getTime();                                   \
     50             DISPLAY(__VA_ARGS__);                                              \
     51             if (g_displayLevel >= 4) fflush(stderr);                           \
     52         }                                                                      \
     53     } while (0)
     54 
     55 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
     56 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
     57 
     58 #define CHECKERR(code)                                                         \
     59     do {                                                                       \
     60         if (ZSTD_isError(code)) {                                              \
     61             DISPLAY("Error occurred while generating data: %s\n",              \
     62                     ZSTD_getErrorName(code));                                  \
     63             exit(1);                                                           \
     64         }                                                                      \
     65     } while (0)
     66 
     67 
     68 /*-*******************************************************
     69 *  Random function
     70 *********************************************************/
     71 static U32 RAND(U32* src)
     72 {
     73 #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
     74     static const U32 prime1 = 2654435761U;
     75     static const U32 prime2 = 2246822519U;
     76     U32 rand32 = *src;
     77     rand32 *= prime1;
     78     rand32 += prime2;
     79     rand32  = RAND_rotl32(rand32, 13);
     80     *src = rand32;
     81     return RAND_rotl32(rand32, 27);
     82 #undef RAND_rotl32
     83 }
     84 
     85 #define DISTSIZE (8192)
     86 
     87 /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
     88 static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
     89 {
     90     size_t i;
     91     BYTE* op = ptr;
     92 
     93     for (i = 0; i < size; i++) {
     94         op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
     95     }
     96 }
     97 
     98 /* Write `size` random bytes into `ptr` */
     99 static void RAND_buffer(U32* seed, void* ptr, size_t size)
    100 {
    101     size_t i;
    102     BYTE* op = ptr;
    103 
    104     for (i = 0; i + 4 <= size; i += 4) {
    105         MEM_writeLE32(op + i, RAND(seed));
    106     }
    107     for (; i < size; i++) {
    108         op[i] = RAND(seed) & 0xff;
    109     }
    110 }
    111 
    112 /* Write `size` bytes into `ptr` following the distribution `dist` */
    113 static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
    114 {
    115     size_t i;
    116     BYTE* op = ptr;
    117 
    118     for (i = 0; i < size; i++) {
    119         op[i] = dist[RAND(seed) % DISTSIZE];
    120     }
    121 }
    122 
    123 /* Generate a random distribution where the frequency of each symbol follows a
    124  * geometric distribution defined by `weight`
    125  * `dist` should have size at least `DISTSIZE` */
    126 static void RAND_genDist(U32* seed, BYTE* dist, double weight)
    127 {
    128     size_t i = 0;
    129     size_t statesLeft = DISTSIZE;
    130     BYTE symb = (BYTE) (RAND(seed) % 256);
    131     BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
    132 
    133     while (i < DISTSIZE) {
    134         size_t states = ((size_t)(weight * (double)statesLeft)) + 1;
    135         size_t j;
    136         for (j = 0; j < states && i < DISTSIZE; j++, i++) {
    137             dist[i] = symb;
    138         }
    139 
    140         symb += step;
    141         statesLeft -= states;
    142     }
    143 }
    144 
    145 /* Generates a random number in the range [min, max) */
    146 static inline U32 RAND_range(U32* seed, U32 min, U32 max)
    147 {
    148     return (RAND(seed) % (max-min)) + min;
    149 }
    150 
    151 #define ROUND(x) ((U32)(x + 0.5))
    152 
    153 /* Generates a random number in an exponential distribution with mean `mean` */
    154 static double RAND_exp(U32* seed, double mean)
    155 {
    156     double const u = RAND(seed) / (double) UINT_MAX;
    157     return log(1-u) * (-mean);
    158 }
    159 
    160 /*-*******************************************************
    161 *  Constants and Structs
    162 *********************************************************/
    163 const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"};
    164 
    165 #define MAX_DECOMPRESSED_SIZE_LOG 20
    166 #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
    167 
    168 #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
    169 
    170 #define MIN_SEQ_LEN (3)
    171 #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
    172 
    173 #ifndef MAX_PATH
    174     #ifdef PATH_MAX
    175         #define MAX_PATH PATH_MAX
    176     #else
    177         #define MAX_PATH 256
    178     #endif
    179 #endif
    180 
    181 BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
    182 BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
    183 BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
    184 
    185 SeqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
    186 BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
    187 BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
    188 BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
    189 BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
    190 
    191 U64 WKSP[HUF_WORKSPACE_SIZE_U64];
    192 
    193 typedef struct {
    194     size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
    195     unsigned windowSize; /* contentSize >= windowSize means single segment */
    196 } frameHeader_t;
    197 
    198 /* For repeat modes */
    199 typedef struct {
    200     U32 rep[ZSTD_REP_NUM];
    201 
    202     int hufInit;
    203     /* the distribution used in the previous block for repeat mode */
    204     BYTE hufDist[DISTSIZE];
    205     HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
    206 
    207     int fseInit;
    208     FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
    209     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
    210     FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
    211 
    212     /* Symbols that were present in the previous distribution, for use with
    213      * set_repeat */
    214     BYTE litlengthSymbolSet[36];
    215     BYTE offsetSymbolSet[29];
    216     BYTE matchlengthSymbolSet[53];
    217 } cblockStats_t;
    218 
    219 typedef struct {
    220     void* data;
    221     void* dataStart;
    222     void* dataEnd;
    223 
    224     void* src;
    225     void* srcStart;
    226     void* srcEnd;
    227 
    228     frameHeader_t header;
    229 
    230     cblockStats_t stats;
    231     cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
    232 } frame_t;
    233 
    234 typedef struct {
    235     int useDict;
    236     U32 dictID;
    237     size_t dictContentSize;
    238     BYTE* dictContent;
    239 } dictInfo;
    240 
    241 typedef enum {
    242   gt_frame = 0,  /* generate frames */
    243   gt_block,      /* generate compressed blocks without block/frame headers */
    244 } genType_e;
    245 
    246 #ifndef MIN
    247     #define MIN(a, b) ((a) < (b) ? (a) : (b))
    248 #endif
    249 
    250 typedef enum {
    251   lt_raw,
    252   lt_rle,
    253   lt_compressed,
    254 } literalType_e;
    255 
    256 /*-*******************************************************
    257 *  Global variables (set from command line)
    258 *********************************************************/
    259 U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG;  /* <= 20 */
    260 U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX;                       /* <= 128 KB */
    261 
    262 /*-*******************************************************
    263 *  Generator Functions
    264 *********************************************************/
    265 
    266 struct {
    267     int contentSize; /* force the content size to be present */
    268     blockType_e *blockType; /* force specific block type */
    269     literalType_e *literalType; /* force specific literals type */
    270     int frame_header_only; /* generate only frame header */
    271     int no_magic; /* do not generate magic number */
    272 } opts;
    273 
    274 /* Generate and write a random frame header */
    275 static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
    276 {
    277     BYTE* const op = frame->data;
    278     size_t pos = 0;
    279     frameHeader_t fh;
    280 
    281     BYTE windowByte = 0;
    282 
    283     int singleSegment = 0;
    284     int contentSizeFlag = 0;
    285     int fcsCode = 0;
    286 
    287     memset(&fh, 0, sizeof(fh));
    288 
    289     /* generate window size */
    290     {
    291         /* Follow window algorithm from specification */
    292         int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
    293         int const mantissa = RAND(seed) % 8;
    294         windowByte = (BYTE) ((exponent << 3) | mantissa);
    295         fh.windowSize = (1U << (exponent + 10));
    296         fh.windowSize += fh.windowSize / 8 * mantissa;
    297     }
    298 
    299     {
    300         /* Generate random content size */
    301         int force_block_type = opts.blockType != NULL;
    302         size_t highBit;
    303         if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
    304             /* do content of at least 128 bytes */
    305             highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
    306         } else if (force_block_type) {
    307             if ((RAND(seed) & 3) || (*(opts.blockType) == bt_rle)) {
    308                 /* do small content */
    309                 highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
    310             } else {
    311                 /* 0 size frame */
    312                 highBit = 0;
    313             }
    314         } else if (RAND(seed) & 3) {
    315             /* do small content */
    316             highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
    317         } else {
    318             /* 0 size frame */
    319             highBit = 0;
    320         }
    321         fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
    322 
    323         /* provide size sometimes */
    324         contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
    325 
    326         if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
    327             /* do single segment sometimes */
    328             fh.windowSize = (U32) fh.contentSize;
    329             singleSegment = 1;
    330         }
    331     }
    332 
    333     if (contentSizeFlag) {
    334         /* Determine how large fcs field has to be */
    335         int minFcsCode = (fh.contentSize >= 256) +
    336                                (fh.contentSize >= 65536 + 256) +
    337                                (fh.contentSize > 0xFFFFFFFFU);
    338         if (!singleSegment && !minFcsCode) {
    339             minFcsCode = 1;
    340         }
    341         fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
    342         if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
    343     }
    344 
    345     /* write out the header */
    346     if (!opts.no_magic) {
    347         MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
    348         pos += 4;
    349     }
    350 
    351     {
    352         /*
    353          * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
    354          * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
    355          * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
    356          * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
    357          * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
    358          */
    359         int const dictBits = info.useDict ? 3 : 0;
    360         BYTE const frameHeaderDescriptor =
    361                 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
    362         op[pos++] = frameHeaderDescriptor;
    363     }
    364 
    365     if (!singleSegment) {
    366         op[pos++] = windowByte;
    367     }
    368     if (info.useDict) {
    369         MEM_writeLE32(op + pos, (U32) info.dictID);
    370         pos += 4;
    371     }
    372     if (contentSizeFlag) {
    373         switch (fcsCode) {
    374         default: /* Impossible */
    375         case 0: op[pos++] = (BYTE) fh.contentSize; break;
    376         case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
    377         case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
    378         case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
    379         }
    380     }
    381 
    382     DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize);
    383     DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
    384     DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
    385     DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
    386 
    387     frame->data = op + pos;
    388     frame->header = fh;
    389 }
    390 
    391 /* Write a literal block in either raw or RLE form, return the literals size */
    392 static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
    393 {
    394     int force_literal_type = opts.literalType != NULL;
    395     int const type = (force_literal_type) ? *(opts.literalType) : RAND(seed) % 2;
    396 
    397     BYTE* op = (BYTE*)frame->data;
    398     int const sizeFormatDesc = RAND(seed) % 8;
    399     size_t litSize;
    400     size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
    401 
    402     if (sizeFormatDesc == 0) {
    403         /* Size_FormatDesc = ?0 */
    404         maxLitSize = MIN(maxLitSize, 31);
    405     } else if (sizeFormatDesc <= 4) {
    406         /* Size_FormatDesc = 01 */
    407         maxLitSize = MIN(maxLitSize, 4095);
    408     } else {
    409         /* Size_Format = 11 */
    410         maxLitSize = MIN(maxLitSize, 1048575);
    411     }
    412 
    413     litSize = RAND(seed) % (maxLitSize + 1);
    414     if (frame->src == frame->srcStart && litSize == 0) {
    415         litSize = 1; /* no empty literals if there's nothing preceding this block */
    416     }
    417     if (litSize + 3 > contentSize) {
    418         litSize = contentSize; /* no matches shorter than 3 are allowed */
    419     }
    420     /* use smallest size format that fits */
    421     if (litSize < 32) {
    422         op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
    423         op += 1;
    424     } else if (litSize < 4096) {
    425         op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
    426         op[1] = (litSize >> 4) & 0xff;
    427         op += 2;
    428     } else {
    429         op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
    430         op[1] = (litSize >> 4) & 0xff;
    431         op[2] = (litSize >> 12) & 0xff;
    432         op += 3;
    433     }
    434 
    435     if (type == 0) {
    436         /* Raw literals */
    437         DISPLAYLEVEL(4, "   raw literals\n");
    438 
    439         RAND_buffer(seed, LITERAL_BUFFER, litSize);
    440         memcpy(op, LITERAL_BUFFER, litSize);
    441         op += litSize;
    442     } else {
    443         /* RLE literals */
    444         BYTE const symb = (BYTE) (RAND(seed) % 256);
    445 
    446         DISPLAYLEVEL(4, "   rle literals: 0x%02x\n", (unsigned)symb);
    447 
    448         memset(LITERAL_BUFFER, symb, litSize);
    449         op[0] = symb;
    450         op++;
    451     }
    452 
    453     frame->data = op;
    454 
    455     return litSize;
    456 }
    457 
    458 /* Generate a Huffman header for the given source */
    459 static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
    460                                  const void* src, size_t srcSize)
    461 {
    462     BYTE* const ostart = (BYTE*)dst;
    463     BYTE* op = ostart;
    464 
    465     unsigned huffLog = 11;
    466     unsigned maxSymbolValue = 255;
    467 
    468     unsigned count[HUF_SYMBOLVALUE_MAX+1];
    469 
    470     /* Scan input and build symbol stats */
    471     {   size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP));
    472         assert(!HIST_isError(largest));
    473         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; }   /* single symbol, rle */
    474         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
    475     }
    476 
    477     /* Build Huffman Tree */
    478     /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
    479     huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
    480     DISPLAYLEVEL(6, "     huffman log: %u\n", huffLog);
    481     {   size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
    482         CHECKERR(maxBits);
    483         huffLog = (U32)maxBits;
    484     }
    485 
    486     /* Write table description header */
    487     {   size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
    488         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
    489         op += hSize;
    490     }
    491 
    492     return op - ostart;
    493 }
    494 
    495 /* Write a Huffman coded literals block and return the literals size */
    496 static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
    497 {
    498     BYTE* origop = (BYTE*)frame->data;
    499     BYTE* opend = (BYTE*)frame->dataEnd;
    500     BYTE* op;
    501     BYTE* const ostart = origop;
    502     int const sizeFormat = RAND(seed) % 4;
    503     size_t litSize;
    504     size_t hufHeaderSize = 0;
    505     size_t compressedSize = 0;
    506     size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
    507 
    508     SymbolEncodingType_e hType;
    509 
    510     if (contentSize < 64) {
    511         /* make sure we get reasonably-sized literals for compression */
    512         return ERROR(GENERIC);
    513     }
    514 
    515     DISPLAYLEVEL(4, "   compressed literals\n");
    516 
    517     switch (sizeFormat) {
    518     case 0: /* fall through, size is the same as case 1 */
    519     case 1:
    520         maxLitSize = MIN(maxLitSize, 1023);
    521         origop += 3;
    522         break;
    523     case 2:
    524         maxLitSize = MIN(maxLitSize, 16383);
    525         origop += 4;
    526         break;
    527     case 3:
    528         maxLitSize = MIN(maxLitSize, 262143);
    529         origop += 5;
    530         break;
    531     default:; /* impossible */
    532     }
    533 
    534     do {
    535         op = origop;
    536         do {
    537             litSize = RAND(seed) % (maxLitSize + 1);
    538         } while (litSize < 32); /* avoid small literal sizes */
    539         if (litSize + 3 > contentSize) {
    540             litSize = contentSize; /* no matches shorter than 3 are allowed */
    541         }
    542 
    543         /* most of the time generate a new distribution */
    544         if ((RAND(seed) & 3) || !frame->stats.hufInit) {
    545             do {
    546                 if (RAND(seed) & 3) {
    547                     /* add 10 to ensure some compressibility */
    548                     double const weight = ((RAND(seed) % 90) + 10) / 100.0;
    549 
    550                     DISPLAYLEVEL(5, "    distribution weight: %d%%\n",
    551                                  (int)(weight * 100));
    552 
    553                     RAND_genDist(seed, frame->stats.hufDist, weight);
    554                 } else {
    555                     /* sometimes do restricted range literals to force
    556                      * non-huffman headers */
    557                     DISPLAYLEVEL(5, "    small range literals\n");
    558                     RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
    559                                        15);
    560                 }
    561                 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
    562                                 litSize);
    563 
    564                 /* generate the header from the distribution instead of the
    565                  * actual data to avoid bugs with symbols that were in the
    566                  * distribution but never showed up in the output */
    567                 hufHeaderSize = writeHufHeader(
    568                         seed, frame->stats.hufTable, op, opend - op,
    569                         frame->stats.hufDist, DISTSIZE);
    570                 CHECKERR(hufHeaderSize);
    571                 /* repeat until a valid header is written */
    572             } while (hufHeaderSize == 0);
    573             op += hufHeaderSize;
    574             hType = set_compressed;
    575 
    576             frame->stats.hufInit = 1;
    577         } else {
    578             /* repeat the distribution/table from last time */
    579             DISPLAYLEVEL(5, "    huffman repeat stats\n");
    580             RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
    581                             litSize);
    582             hufHeaderSize = 0;
    583             hType = set_repeat;
    584         }
    585 
    586         do {
    587             compressedSize =
    588                     sizeFormat == 0
    589                             ? HUF_compress1X_usingCTable(
    590                                       op, opend - op, LITERAL_BUFFER, litSize,
    591                                       frame->stats.hufTable, /* flags */ 0)
    592                             : HUF_compress4X_usingCTable(
    593                                       op, opend - op, LITERAL_BUFFER, litSize,
    594                                       frame->stats.hufTable, /* flags */ 0);
    595             CHECKERR(compressedSize);
    596             /* this only occurs when it could not compress or similar */
    597         } while (compressedSize <= 0);
    598 
    599         op += compressedSize;
    600 
    601         compressedSize += hufHeaderSize;
    602         DISPLAYLEVEL(5, "    regenerated size: %u\n", (unsigned)litSize);
    603         DISPLAYLEVEL(5, "    compressed size: %u\n", (unsigned)compressedSize);
    604         if (compressedSize >= litSize) {
    605             DISPLAYLEVEL(5, "     trying again\n");
    606             /* if we have to try again, reset the stats so we don't accidentally
    607              * try to repeat a distribution we just made */
    608             frame->stats = frame->oldStats;
    609         } else {
    610             break;
    611         }
    612     } while (1);
    613 
    614     /* write header */
    615     switch (sizeFormat) {
    616     case 0: /* fall through, size is the same as case 1 */
    617     case 1: {
    618         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    619                            ((U32)compressedSize << 14);
    620         MEM_writeLE24(ostart, header);
    621         break;
    622     }
    623     case 2: {
    624         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    625                            ((U32)compressedSize << 18);
    626         MEM_writeLE32(ostart, header);
    627         break;
    628     }
    629     case 3: {
    630         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    631                            ((U32)compressedSize << 22);
    632         MEM_writeLE32(ostart, header);
    633         ostart[4] = (BYTE)(compressedSize >> 10);
    634         break;
    635     }
    636     default:; /* impossible */
    637     }
    638 
    639     frame->data = op;
    640     return litSize;
    641 }
    642 
    643 static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
    644 {
    645     int select_compressed = 0;
    646     if (opts.literalType) {
    647         select_compressed = *(opts.literalType) == lt_compressed;
    648     } else {
    649         /* only do compressed for larger segments to avoid compressibility issues */
    650         select_compressed = RAND(seed) & 7 && contentSize >= 64;
    651     }
    652 
    653     if (select_compressed) {
    654         return writeLiteralsBlockCompressed(seed, frame, contentSize);
    655     } else {
    656         return writeLiteralsBlockSimple(seed, frame, contentSize);
    657     }
    658 }
    659 
    660 static inline void initSeqStore(SeqStore_t *seqStore) {
    661     seqStore->maxNbSeq = MAX_NB_SEQ;
    662     seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
    663     seqStore->sequencesStart = SEQUENCE_BUFFER;
    664     seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
    665     seqStore->llCode = SEQUENCE_LLCODE;
    666     seqStore->mlCode = SEQUENCE_MLCODE;
    667     seqStore->ofCode = SEQUENCE_OFCODE;
    668 
    669     ZSTD_resetSeqStore(seqStore);
    670 }
    671 
    672 /* Randomly generate sequence commands */
    673 static U32
    674 generateSequences(U32* seed, frame_t* frame, SeqStore_t* seqStore,
    675                   size_t contentSize, size_t literalsSize, dictInfo info)
    676 {
    677     /* The total length of all the matches */
    678     size_t const remainingMatch = contentSize - literalsSize;
    679     size_t excessMatch = 0;
    680     U32 numSequences = 0;
    681     U32 i;
    682 
    683     const BYTE* literals = LITERAL_BUFFER;
    684     BYTE* srcPtr = frame->src;
    685 
    686     if (literalsSize != contentSize) {
    687         /* each match must be at least MIN_SEQ_LEN, so this is the maximum
    688          * number of sequences we can have */
    689         U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
    690         numSequences = (RAND(seed) % maxSequences) + 1;
    691 
    692         /* the extra match lengths we have to allocate to each sequence */
    693         excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
    694     }
    695 
    696     DISPLAYLEVEL(5, "    total match lengths: %u\n", (unsigned)remainingMatch);
    697     for (i = 0; i < numSequences; i++) {
    698         /* Generate match and literal lengths by exponential distribution to
    699          * ensure nice numbers */
    700         U32 matchLen =
    701                 MIN_SEQ_LEN +
    702                 ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i)));
    703         U32 literalLen =
    704                 (RAND(seed) & 7)
    705                         ? ROUND(RAND_exp(seed,
    706                                          (double)literalsSize /
    707                                                  (double)(numSequences - i)))
    708                         : 0;
    709         /* actual offset, code to send, and point to copy up to when shifting
    710          * codes in the repeat offsets history */
    711         U32 offset, offBase, repIndex;
    712 
    713         /* bounds checks */
    714         matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
    715         literalLen = MIN(literalLen, (U32) literalsSize);
    716         if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
    717         if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
    718 
    719         memcpy(srcPtr, literals, literalLen);
    720         srcPtr += literalLen;
    721         do {
    722             if (RAND(seed) & 7) {
    723                 /* do a normal offset */
    724                 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
    725                 offset = (RAND(seed) %
    726                           MIN(frame->header.windowSize,
    727                               (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
    728                          1;
    729                 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
    730                     /* need to occasionally generate offsets that go past the start */
    731                     /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
    732                     U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
    733                     offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
    734                     if (offset > frame->header.windowSize) {
    735                         if (lenPastStart < MIN_SEQ_LEN) {
    736                             /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
    737                             /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
    738                             /* make sure lenPastStart does not go past dictionary start though */
    739                             lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
    740                             offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
    741                         }
    742                         {   U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
    743                             matchLen = MIN(matchLen, matchLenBound);
    744                         }
    745                     }
    746                 }
    747                 offBase = OFFSET_TO_OFFBASE(offset);
    748                 repIndex = 2;
    749             } else {
    750                 /* do a repeat offset */
    751                 U32 const randomRepIndex = RAND(seed) % 3;
    752                 offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1);  /* expects values between 1 & 3 */
    753                 if (literalLen > 0) {
    754                     offset = frame->stats.rep[randomRepIndex];
    755                     repIndex = randomRepIndex;
    756                 } else {
    757                     /* special case : literalLen == 0 */
    758                     offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1
    759                                            : frame->stats.rep[randomRepIndex + 1];
    760                     repIndex = MIN(2, randomRepIndex + 1);
    761                 }
    762             }
    763         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
    764 
    765         {   BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize);
    766             size_t j;
    767             for (j = 0; j < matchLen; j++) {
    768                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
    769                     /* copy from dictionary instead of literals */
    770                     size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
    771                     *srcPtr = *(dictEnd - dictOffset);
    772                 }
    773                 else {
    774                     *srcPtr = *(srcPtr-offset);
    775                 }
    776                 srcPtr++;
    777         }   }
    778 
    779         {   int r;
    780             for (r = repIndex; r > 0; r--) {
    781                 frame->stats.rep[r] = frame->stats.rep[r - 1];
    782             }
    783             frame->stats.rep[0] = offset;
    784         }
    785 
    786         DISPLAYLEVEL(6, "      LL: %5u OF: %5u ML: %5u",
    787                     (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen);
    788         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
    789                      (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
    790         DISPLAYLEVEL(6, "\n");
    791         if (OFFBASE_IS_REPCODE(offBase)) {  /* expects sumtype numeric representation of ZSTD_storeSeq() */
    792             DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
    793         }
    794         /* use libzstd sequence handling */
    795         ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
    796                       offBase, matchLen);
    797 
    798         literalsSize -= literalLen;
    799         excessMatch -= (matchLen - MIN_SEQ_LEN);
    800         literals += literalLen;
    801     }
    802 
    803     memcpy(srcPtr, literals, literalsSize);
    804     srcPtr += literalsSize;
    805     DISPLAYLEVEL(6, "      excess literals: %5u ", (unsigned)literalsSize);
    806     DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
    807     DISPLAYLEVEL(6, "\n");
    808 
    809     return numSequences;
    810 }
    811 
    812 static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
    813 {
    814     size_t i;
    815 
    816     memset(set, 0, (size_t)maxSymbolValue+1);
    817 
    818     for (i = 0; i < len; i++) {
    819         set[symbols[i]] = 1;
    820     }
    821 }
    822 
    823 static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
    824 {
    825     size_t i;
    826 
    827     for (i = 0; i < len; i++) {
    828         if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
    829             return 0;
    830         }
    831     }
    832     return 1;
    833 }
    834 
    835 static size_t writeSequences(U32* seed, frame_t* frame, SeqStore_t* seqStorePtr,
    836                              size_t nbSeq)
    837 {
    838     /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
    839     unsigned count[MaxSeq+1];
    840     S16 norm[MaxSeq+1];
    841     FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
    842     FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
    843     FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
    844     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
    845     const SeqDef* const sequences = seqStorePtr->sequencesStart;
    846     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
    847     const BYTE* const llCodeTable = seqStorePtr->llCode;
    848     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
    849     BYTE* const oend = (BYTE*)frame->dataEnd;
    850     BYTE* op = (BYTE*)frame->data;
    851     BYTE* seqHead;
    852     BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)];
    853 
    854     /* literals compressing block removed so that can be done separately */
    855 
    856     /* Sequences Header */
    857     if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
    858     if (nbSeq < 128) *op++ = (BYTE)nbSeq;
    859     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
    860     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
    861 
    862     if (nbSeq==0) {
    863         frame->data = op;
    864         return 0;
    865     }
    866 
    867     /* seqHead : flags for FSE encoding type */
    868     seqHead = op++;
    869 
    870     /* convert length/distances into codes */
    871     ZSTD_seqToCodes(seqStorePtr);
    872 
    873     /* CTable for Literal Lengths */
    874     {   unsigned max = MaxLL;
    875         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    876         assert(!HIST_isError(mostFrequent));
    877         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    878                    isSymbolSubset(llCodeTable, nbSeq,
    879                                   frame->stats.litlengthSymbolSet, 35)) {
    880             /* maybe do repeat mode if we're allowed to */
    881             LLtype = set_repeat;
    882         } else if (mostFrequent == nbSeq) {
    883             /* do RLE if we have the chance */
    884             *op++ = llCodeTable[0];
    885             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
    886             LLtype = set_rle;
    887         } else if (!(RAND(seed) & 3)) {
    888             /* maybe use the default distribution */
    889             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)));
    890             LLtype = set_basic;
    891         } else {
    892             /* fall back on a full table */
    893             size_t nbSeq_1 = nbSeq;
    894             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
    895             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
    896             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    897             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    898               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    899               op += NCountSize; }
    900             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)));
    901             LLtype = set_compressed;
    902     }   }
    903 
    904     /* CTable for Offsets */
    905     /* see Literal Lengths for descriptions of mode choices */
    906     {   unsigned max = MaxOff;
    907         size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    908         assert(!HIST_isError(mostFrequent));
    909         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    910                    isSymbolSubset(ofCodeTable, nbSeq,
    911                                   frame->stats.offsetSymbolSet, 28)) {
    912             Offtype = set_repeat;
    913         } else if (mostFrequent == nbSeq) {
    914             *op++ = ofCodeTable[0];
    915             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
    916             Offtype = set_rle;
    917         } else if (!(RAND(seed) & 3)) {
    918             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
    919             Offtype = set_basic;
    920         } else {
    921             size_t nbSeq_1 = nbSeq;
    922             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
    923             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
    924             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    925             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    926               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    927               op += NCountSize; }
    928             FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
    929             Offtype = set_compressed;
    930     }   }
    931 
    932     /* CTable for MatchLengths */
    933     /* see Literal Lengths for descriptions of mode choices */
    934     {   unsigned max = MaxML;
    935         size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    936         assert(!HIST_isError(mostFrequent));
    937         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    938                    isSymbolSubset(mlCodeTable, nbSeq,
    939                                   frame->stats.matchlengthSymbolSet, 52)) {
    940             MLtype = set_repeat;
    941         } else if (mostFrequent == nbSeq) {
    942             *op++ = *mlCodeTable;
    943             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
    944             MLtype = set_rle;
    945         } else if (!(RAND(seed) & 3)) {
    946             /* sometimes do default distribution */
    947             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
    948             MLtype = set_basic;
    949         } else {
    950             /* fall back on table */
    951             size_t nbSeq_1 = nbSeq;
    952             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
    953             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
    954             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    955             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    956               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    957               op += NCountSize; }
    958             FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
    959             MLtype = set_compressed;
    960     }   }
    961     frame->stats.fseInit = 1;
    962     initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
    963     initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
    964     initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
    965 
    966     DISPLAYLEVEL(5, "    LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype);
    967 
    968     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
    969 
    970     /* Encoding Sequences */
    971     {   BIT_CStream_t blockStream;
    972         FSE_CState_t  stateMatchLength;
    973         FSE_CState_t  stateOffsetBits;
    974         FSE_CState_t  stateLitLength;
    975 
    976         RETURN_ERROR_IF(
    977             ERR_isError(BIT_initCStream(&blockStream, op, oend-op)),
    978             dstSize_tooSmall, "not enough space remaining");
    979 
    980         /* first symbols */
    981         FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
    982         FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
    983         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
    984         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
    985         if (MEM_32bits()) BIT_flushBits(&blockStream);
    986         BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
    987         if (MEM_32bits()) BIT_flushBits(&blockStream);
    988         BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
    989         BIT_flushBits(&blockStream);
    990 
    991         {   size_t n;
    992             for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
    993                 BYTE const llCode = llCodeTable[n];
    994                 BYTE const ofCode = ofCodeTable[n];
    995                 BYTE const mlCode = mlCodeTable[n];
    996                 U32  const llBits = LL_bits[llCode];
    997                 U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
    998                 U32  const mlBits = ML_bits[mlCode];
    999                                                                                 /* (7)*/  /* (7)*/
   1000                 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
   1001                 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
   1002                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
   1003                 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
   1004                 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
   1005                     BIT_flushBits(&blockStream);                                /* (7)*/
   1006                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
   1007                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
   1008                 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
   1009                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
   1010                 BIT_addBits(&blockStream, sequences[n].offBase, ofBits);         /* 31 */
   1011                 BIT_flushBits(&blockStream);                                    /* (7)*/
   1012         }   }
   1013 
   1014         FSE_flushCState(&blockStream, &stateMatchLength);
   1015         FSE_flushCState(&blockStream, &stateOffsetBits);
   1016         FSE_flushCState(&blockStream, &stateLitLength);
   1017 
   1018         {   size_t const streamSize = BIT_closeCStream(&blockStream);
   1019             if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
   1020             op += streamSize;
   1021     }   }
   1022 
   1023     frame->data = op;
   1024 
   1025     return 0;
   1026 }
   1027 
   1028 static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
   1029                                   size_t literalsSize, dictInfo info)
   1030 {
   1031     SeqStore_t seqStore;
   1032     size_t numSequences;
   1033 
   1034 
   1035     initSeqStore(&seqStore);
   1036 
   1037     /* randomly generate sequences */
   1038     numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
   1039     /* write them out to the frame data */
   1040     CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
   1041 
   1042     return numSequences;
   1043 }
   1044 
   1045 static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
   1046 {
   1047     BYTE* const blockStart = (BYTE*)frame->data;
   1048     size_t literalsSize;
   1049     size_t nbSeq;
   1050 
   1051     DISPLAYLEVEL(4, "  compressed block:\n");
   1052 
   1053     literalsSize = writeLiteralsBlock(seed, frame, contentSize);
   1054 
   1055     DISPLAYLEVEL(4, "   literals size: %u\n", (unsigned)literalsSize);
   1056 
   1057     nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
   1058 
   1059     DISPLAYLEVEL(4, "   number of sequences: %u\n", (unsigned)nbSeq);
   1060 
   1061     return (BYTE*)frame->data - blockStart;
   1062 }
   1063 
   1064 static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
   1065                        int lastBlock, dictInfo info)
   1066 {
   1067     int force_block_type = opts.blockType != NULL;
   1068     int const blockTypeDesc = (force_block_type) ? *(opts.blockType) : RAND(seed) % 8;
   1069     size_t blockSize;
   1070     int blockType;
   1071 
   1072     BYTE *const header = (BYTE*)frame->data;
   1073     BYTE *op = header + 3;
   1074 
   1075     DISPLAYLEVEL(4, " block:\n");
   1076     DISPLAYLEVEL(4, "  block content size: %u\n", (unsigned)contentSize);
   1077     DISPLAYLEVEL(4, "  last block: %s\n", lastBlock ? "yes" : "no");
   1078 
   1079     if (blockTypeDesc == 0) {
   1080         /* Raw data frame */
   1081 
   1082         RAND_buffer(seed, frame->src, contentSize);
   1083         memcpy(op, frame->src, contentSize);
   1084 
   1085         op += contentSize;
   1086         blockType = 0;
   1087         blockSize = contentSize;
   1088     } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
   1089         /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
   1090         BYTE const symbol = RAND(seed) & 0xff;
   1091 
   1092         op[0] = symbol;
   1093         memset(frame->src, symbol, contentSize);
   1094 
   1095         op++;
   1096         blockType = 1;
   1097         blockSize = contentSize;
   1098     } else {
   1099         /* compressed, most common */
   1100         size_t compressedSize;
   1101         blockType = 2;
   1102 
   1103         frame->oldStats = frame->stats;
   1104 
   1105         frame->data = op;
   1106         compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
   1107         if (compressedSize >= contentSize && !force_block_type) {   /* compressed block must be strictly smaller than uncompressed one */
   1108             blockType = 0;
   1109             memcpy(op, frame->src, contentSize);
   1110 
   1111             op += contentSize;
   1112             blockSize = contentSize; /* fall back on raw block if data doesn't
   1113                                         compress */
   1114 
   1115             frame->stats = frame->oldStats; /* don't update the stats */
   1116         } else {
   1117             op += compressedSize;
   1118             blockSize = compressedSize;
   1119         }
   1120     }
   1121     frame->src = (BYTE*)frame->src + contentSize;
   1122 
   1123     DISPLAYLEVEL(4, "  block type: %s\n", BLOCK_TYPES[blockType]);
   1124     DISPLAYLEVEL(4, "  block size field: %u\n", (unsigned)blockSize);
   1125 
   1126     header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
   1127     MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
   1128 
   1129     frame->data = op;
   1130 }
   1131 
   1132 static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
   1133 {
   1134     size_t contentLeft = frame->header.contentSize;
   1135     size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
   1136     while (1) {
   1137         /* 1 in 4 chance of ending frame */
   1138         int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
   1139         size_t blockContentSize;
   1140         if (lastBlock) {
   1141             blockContentSize = contentLeft;
   1142         } else {
   1143             if (contentLeft > 0 && (RAND(seed) & 7)) {
   1144                 /* some variable size block */
   1145                 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
   1146             } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
   1147                 /* some full size block */
   1148                 blockContentSize = maxBlockSize;
   1149             } else {
   1150                 /* some empty block */
   1151                 blockContentSize = 0;
   1152             }
   1153         }
   1154 
   1155         writeBlock(seed, frame, blockContentSize, lastBlock, info);
   1156 
   1157         contentLeft -= blockContentSize;
   1158         if (lastBlock) break;
   1159     }
   1160 }
   1161 
   1162 static void writeChecksum(frame_t* frame)
   1163 {
   1164     /* write checksum so implementations can verify their output */
   1165     U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
   1166     DISPLAYLEVEL(3, "  checksum: %08x\n", (unsigned)digest);
   1167     MEM_writeLE32(frame->data, (U32)digest);
   1168     frame->data = (BYTE*)frame->data + 4;
   1169 }
   1170 
   1171 static void outputBuffer(const void* buf, size_t size, const char* const path)
   1172 {
   1173     /* write data out to file */
   1174     const BYTE* ip = (const BYTE*)buf;
   1175     FILE* out;
   1176     if (path) {
   1177         out = fopen(path, "wb");
   1178     } else {
   1179         out = stdout;
   1180     }
   1181     if (!out) {
   1182         fprintf(stderr, "Failed to open file at %s: ", path);
   1183         perror(NULL);
   1184         exit(1);
   1185     }
   1186 
   1187     {   size_t fsize = size;
   1188         size_t written = 0;
   1189         while (written < fsize) {
   1190             written += fwrite(ip + written, 1, fsize - written, out);
   1191             if (ferror(out)) {
   1192                 fprintf(stderr, "Failed to write to file at %s: ", path);
   1193                 perror(NULL);
   1194                 exit(1);
   1195             }
   1196         }
   1197     }
   1198 
   1199     if (path) {
   1200         fclose(out);
   1201     }
   1202 }
   1203 
   1204 static void initFrame(frame_t* fr)
   1205 {
   1206     memset(fr, 0, sizeof(*fr));
   1207     fr->data = fr->dataStart = FRAME_BUFFER;
   1208     fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
   1209     fr->src = fr->srcStart = CONTENT_BUFFER;
   1210     fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
   1211 
   1212     /* init repeat codes */
   1213     fr->stats.rep[0] = 1;
   1214     fr->stats.rep[1] = 4;
   1215     fr->stats.rep[2] = 8;
   1216 }
   1217 
   1218 /**
   1219  * Generated a single zstd compressed block with no block/frame header.
   1220  * Returns the final seed.
   1221  */
   1222 static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
   1223 {
   1224     size_t blockContentSize;
   1225     int blockWritten = 0;
   1226     BYTE* op;
   1227     DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed);
   1228     initFrame(frame);
   1229     op = (BYTE*)frame->data;
   1230 
   1231     while (!blockWritten) {
   1232         size_t cSize;
   1233         /* generate window size */
   1234         {   int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
   1235             int const mantissa = RAND(&seed) % 8;
   1236             frame->header.windowSize = (1U << (exponent + 10));
   1237             frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
   1238         }
   1239 
   1240         /* generate content size */
   1241         {   size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
   1242             if (RAND(&seed) & 15) {
   1243                 /* some full size blocks */
   1244                 blockContentSize = maxBlockSize;
   1245             } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
   1246                 /* some small blocks <= 128 bytes*/
   1247                 blockContentSize = RAND(&seed) % (1U << 7);
   1248             } else {
   1249                 /* some variable size blocks */
   1250                 blockContentSize = RAND(&seed) % maxBlockSize;
   1251             }
   1252         }
   1253 
   1254         /* try generating a compressed block */
   1255         frame->oldStats = frame->stats;
   1256         frame->data = op;
   1257         cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
   1258         if (cSize >= blockContentSize) {  /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
   1259             /* data doesn't compress -- try again */
   1260             frame->stats = frame->oldStats; /* don't update the stats */
   1261             DISPLAYLEVEL(5, "   can't compress block : try again \n");
   1262         } else {
   1263             blockWritten = 1;
   1264             DISPLAYLEVEL(4, "   block size: %u \n", (unsigned)cSize);
   1265             frame->src = (BYTE*)frame->src + blockContentSize;
   1266         }
   1267     }
   1268     return seed;
   1269 }
   1270 
   1271 /* Return the final seed */
   1272 static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
   1273 {
   1274     /* generate a complete frame */
   1275     DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
   1276     initFrame(fr);
   1277 
   1278 
   1279     writeFrameHeader(&seed, fr, info);
   1280     if (opts.frame_header_only)
   1281         return seed;
   1282 
   1283     writeBlocks(&seed, fr, info);
   1284     writeChecksum(fr);
   1285 
   1286     return seed;
   1287 }
   1288 
   1289 /*_*******************************************************
   1290 *  Dictionary Helper Functions
   1291 *********************************************************/
   1292 /* returns 0 if successful, otherwise returns 1 upon error */
   1293 static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
   1294 {
   1295     /* allocate space for samples */
   1296     int ret = 0;
   1297     unsigned const numSamples = 4;
   1298     size_t sampleSizes[4];
   1299     BYTE* const samples = malloc(5000*sizeof(BYTE));
   1300     if (samples == NULL) {
   1301         DISPLAY("Error: could not allocate space for samples\n");
   1302         return 1;
   1303     }
   1304 
   1305     /* generate samples */
   1306     {   unsigned literalValue = 1;
   1307         unsigned samplesPos = 0;
   1308         size_t currSize = 1;
   1309         while (literalValue <= 4) {
   1310             sampleSizes[literalValue - 1] = currSize;
   1311             {   size_t k;
   1312                 for (k = 0; k < currSize; k++) {
   1313                     *(samples + (samplesPos++)) = (BYTE)literalValue;
   1314             }   }
   1315             literalValue++;
   1316             currSize *= 16;
   1317     }   }
   1318 
   1319     {   size_t dictWriteSize = 0;
   1320         ZDICT_params_t zdictParams;
   1321         size_t const headerSize = MAX(dictSize/4, 256);
   1322         size_t const dictContentSize = dictSize - headerSize;
   1323         BYTE* const dictContent = fullDict + headerSize;
   1324         if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
   1325             DISPLAY("Error: dictionary size is too small\n");
   1326             ret = 1;
   1327             goto exitGenRandomDict;
   1328         }
   1329 
   1330         /* init dictionary params */
   1331         memset(&zdictParams, 0, sizeof(zdictParams));
   1332         zdictParams.dictID = dictID;
   1333         zdictParams.notificationLevel = 1;
   1334 
   1335         /* fill in dictionary content */
   1336         RAND_buffer(&seed, (void*)dictContent, dictContentSize);
   1337 
   1338         /* finalize dictionary with random samples */
   1339         dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
   1340                                     dictContent, dictContentSize,
   1341                                     samples, sampleSizes, numSamples,
   1342                                     zdictParams);
   1343 
   1344         if (ZDICT_isError(dictWriteSize)) {
   1345             DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
   1346             ret = 1;
   1347         }
   1348     }
   1349 
   1350 exitGenRandomDict:
   1351     free(samples);
   1352     return ret;
   1353 }
   1354 
   1355 static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
   1356     /* allocate space statically */
   1357     dictInfo dictOp;
   1358     memset(&dictOp, 0, sizeof(dictOp));
   1359     dictOp.useDict = useDict;
   1360     dictOp.dictContentSize = dictContentSize;
   1361     dictOp.dictContent = dictContent;
   1362     dictOp.dictID = dictID;
   1363     return dictOp;
   1364 }
   1365 
   1366 /*-*******************************************************
   1367 *  Test Mode
   1368 *********************************************************/
   1369 
   1370 BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
   1371 
   1372 static size_t testDecodeSimple(frame_t* fr)
   1373 {
   1374     /* test decoding the generated data with the simple API */
   1375     size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1376                            fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
   1377 
   1378     if (ZSTD_isError(ret)) return ret;
   1379 
   1380     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
   1381                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
   1382         return ERROR(corruption_detected);
   1383     }
   1384 
   1385     return ret;
   1386 }
   1387 
   1388 static size_t testDecodeStreaming(frame_t* fr)
   1389 {
   1390     /* test decoding the generated data with the streaming API */
   1391     ZSTD_DStream* zd = ZSTD_createDStream();
   1392     ZSTD_inBuffer in;
   1393     ZSTD_outBuffer out;
   1394     size_t ret;
   1395 
   1396     if (!zd) return ERROR(memory_allocation);
   1397 
   1398     in.src = fr->dataStart;
   1399     in.pos = 0;
   1400     in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
   1401 
   1402     out.dst = DECOMPRESSED_BUFFER;
   1403     out.pos = 0;
   1404     out.size = ZSTD_DStreamOutSize();
   1405 
   1406     ZSTD_initDStream(zd);
   1407     while (1) {
   1408         ret = ZSTD_decompressStream(zd, &out, &in);
   1409         if (ZSTD_isError(ret)) goto cleanup; /* error */
   1410         if (ret == 0) break; /* frame is done */
   1411 
   1412         /* force decoding to be done in chunks */
   1413         out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
   1414     }
   1415 
   1416     ret = out.pos;
   1417 
   1418     if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
   1419         return ERROR(corruption_detected);
   1420     }
   1421 
   1422 cleanup:
   1423     ZSTD_freeDStream(zd);
   1424     return ret;
   1425 }
   1426 
   1427 static size_t testDecodeWithDict(U32 seed, genType_e genType)
   1428 {
   1429     /* create variables */
   1430     size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
   1431     U32 const dictID = RAND(&seed);
   1432     size_t errorDetected = 0;
   1433     BYTE* const fullDict = malloc(dictSize);
   1434     if (fullDict == NULL) {
   1435         return ERROR(GENERIC);
   1436     }
   1437 
   1438     /* generate random dictionary */
   1439     if (genRandomDict(dictID, seed, dictSize, fullDict)) {  /* return 0 on success */
   1440         errorDetected = ERROR(GENERIC);
   1441         goto dictTestCleanup;
   1442     }
   1443 
   1444 
   1445     {   frame_t fr;
   1446         dictInfo info;
   1447         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
   1448         size_t ret;
   1449 
   1450         /* get dict info */
   1451         {   size_t const headerSize = MAX(dictSize/4, 256);
   1452             size_t const dictContentSize = dictSize-headerSize;
   1453             BYTE* const dictContent = fullDict+headerSize;
   1454             info = initDictInfo(1, dictContentSize, dictContent, dictID);
   1455         }
   1456 
   1457         /* manually decompress and check difference */
   1458         if (genType == gt_frame) {
   1459             /* Test frame */
   1460             generateFrame(seed, &fr, info);
   1461             ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1462                                             fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
   1463                                             fullDict, dictSize);
   1464         } else {
   1465             /* Test block */
   1466             generateCompressedBlock(seed, &fr, info);
   1467             ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
   1468             if (ZSTD_isError(ret)) {
   1469                 errorDetected = ret;
   1470                 ZSTD_freeDCtx(dctx);
   1471                 goto dictTestCleanup;
   1472             }
   1473             ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1474                                        fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
   1475         }
   1476         ZSTD_freeDCtx(dctx);
   1477 
   1478         if (ZSTD_isError(ret)) {
   1479             errorDetected = ret;
   1480             goto dictTestCleanup;
   1481         }
   1482 
   1483         if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
   1484             errorDetected = ERROR(corruption_detected);
   1485             goto dictTestCleanup;
   1486         }
   1487     }
   1488 
   1489 dictTestCleanup:
   1490     free(fullDict);
   1491     return errorDetected;
   1492 }
   1493 
   1494 static size_t testDecodeRawBlock(frame_t* fr)
   1495 {
   1496     ZSTD_DCtx* dctx = ZSTD_createDCtx();
   1497     size_t ret = ZSTD_decompressBegin(dctx);
   1498     if (ZSTD_isError(ret)) return ret;
   1499 
   1500     ret = ZSTD_decompressBlock_deprecated(
   1501             dctx,
   1502             DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1503             fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
   1504     ZSTD_freeDCtx(dctx);
   1505     if (ZSTD_isError(ret)) return ret;
   1506 
   1507     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
   1508                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
   1509         return ERROR(corruption_detected);
   1510     }
   1511 
   1512     return ret;
   1513 }
   1514 
   1515 static int runBlockTest(U32* seed)
   1516 {
   1517     frame_t fr;
   1518     U32 const seedCopy = *seed;
   1519     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1520         *seed = generateCompressedBlock(*seed, &fr, info);
   1521     }
   1522 
   1523     {   size_t const r = testDecodeRawBlock(&fr);
   1524         if (ZSTD_isError(r)) {
   1525             DISPLAY("Error in block mode on test seed %u: %s\n",
   1526                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1527             return 1;
   1528         }
   1529     }
   1530 
   1531     {   size_t const r = testDecodeWithDict(*seed, gt_block);
   1532         if (ZSTD_isError(r)) {
   1533             DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
   1534                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1535             return 1;
   1536         }
   1537     }
   1538     return 0;
   1539 }
   1540 
   1541 static int runFrameTest(U32* seed)
   1542 {
   1543     frame_t fr;
   1544     U32 const seedCopy = *seed;
   1545     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1546         *seed = generateFrame(*seed, &fr, info);
   1547     }
   1548 
   1549     {   size_t const r = testDecodeSimple(&fr);
   1550         if (ZSTD_isError(r)) {
   1551             DISPLAY("Error in simple mode on test seed %u: %s\n",
   1552                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1553             return 1;
   1554         }
   1555     }
   1556     {   size_t const r = testDecodeStreaming(&fr);
   1557         if (ZSTD_isError(r)) {
   1558             DISPLAY("Error in streaming mode on test seed %u: %s\n",
   1559                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1560             return 1;
   1561         }
   1562     }
   1563     {   size_t const r = testDecodeWithDict(*seed, gt_frame);  /* avoid big dictionaries */
   1564         if (ZSTD_isError(r)) {
   1565             DISPLAY("Error in dictionary mode on test seed %u: %s\n",
   1566                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1567             return 1;
   1568         }
   1569     }
   1570     return 0;
   1571 }
   1572 
   1573 static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
   1574                        genType_e genType)
   1575 {
   1576     unsigned fnum;
   1577 
   1578     UTIL_time_t const startClock = UTIL_getTime();
   1579     U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
   1580 
   1581     if (numFiles == 0 && !testDurationS) numFiles = 1;
   1582 
   1583     DISPLAY("seed: %u\n", (unsigned)seed);
   1584 
   1585     for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
   1586         if (fnum < numFiles)
   1587             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1588         else
   1589             DISPLAYUPDATE("\r%u           ", fnum);
   1590 
   1591         {   int const ret = (genType == gt_frame) ?
   1592                             runFrameTest(&seed) :
   1593                             runBlockTest(&seed);
   1594             if (ret) return ret;
   1595         }
   1596     }
   1597 
   1598     DISPLAY("\r%u tests completed: ", fnum);
   1599     DISPLAY("OK\n");
   1600 
   1601     return 0;
   1602 }
   1603 
   1604 /*-*******************************************************
   1605 *  File I/O
   1606 *********************************************************/
   1607 
   1608 static int generateFile(U32 seed, const char* const path,
   1609                         const char* const origPath, genType_e genType)
   1610 {
   1611     frame_t fr;
   1612 
   1613     DISPLAY("seed: %u\n", (unsigned)seed);
   1614 
   1615     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1616         if (genType == gt_frame) {
   1617             generateFrame(seed, &fr, info);
   1618         } else {
   1619             generateCompressedBlock(seed, &fr, info);
   1620         }
   1621     }
   1622     outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
   1623     if (origPath) {
   1624         outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
   1625     }
   1626     return 0;
   1627 }
   1628 
   1629 static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
   1630                           const char* const origPath, genType_e genType)
   1631 {
   1632     char outPath[MAX_PATH];
   1633     unsigned fnum;
   1634 
   1635     DISPLAY("seed: %u\n", (unsigned)seed);
   1636 
   1637     for (fnum = 0; fnum < numFiles; fnum++) {
   1638         frame_t fr;
   1639 
   1640         DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1641 
   1642         {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1643             if (genType == gt_frame) {
   1644                 seed = generateFrame(seed, &fr, info);
   1645             } else {
   1646                 seed = generateCompressedBlock(seed, &fr, info);
   1647             }
   1648         }
   1649 
   1650         if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
   1651             DISPLAY("Error: path too long\n");
   1652             return 1;
   1653         }
   1654         outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
   1655 
   1656         if (origPath) {
   1657             if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
   1658                 DISPLAY("Error: path too long\n");
   1659                 return 1;
   1660             }
   1661             outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
   1662         }
   1663     }
   1664 
   1665     DISPLAY("\r%u/%u      \n", fnum, numFiles);
   1666 
   1667     return 0;
   1668 }
   1669 
   1670 static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
   1671                                   const char* const origPath, const size_t dictSize,
   1672                                   genType_e genType)
   1673 {
   1674     char outPath[MAX_PATH];
   1675     BYTE* fullDict;
   1676     U32 const dictID = RAND(&seed);
   1677     int errorDetected = 0;
   1678 
   1679     if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
   1680         DISPLAY("Error: path too long\n");
   1681         return 1;
   1682     }
   1683 
   1684     /* allocate space for the dictionary */
   1685     fullDict = malloc(dictSize);
   1686     if (fullDict == NULL) {
   1687         DISPLAY("Error: could not allocate space for full dictionary.\n");
   1688         return 1;
   1689     }
   1690 
   1691     /* randomly generate the dictionary */
   1692     {   int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
   1693         if (ret != 0) {
   1694             errorDetected = ret;
   1695             goto dictCleanup;
   1696         }
   1697     }
   1698 
   1699     /* write out dictionary */
   1700     if (numFiles != 0) {
   1701         if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
   1702             DISPLAY("Error: dictionary path too long\n");
   1703             errorDetected = 1;
   1704             goto dictCleanup;
   1705         }
   1706         outputBuffer(fullDict, dictSize, outPath);
   1707     }
   1708     else {
   1709         outputBuffer(fullDict, dictSize, "dictionary");
   1710     }
   1711 
   1712     /* generate random compressed/decompressed files */
   1713     {   unsigned fnum;
   1714         for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
   1715             frame_t fr;
   1716             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1717             {
   1718                 size_t const headerSize = MAX(dictSize/4, 256);
   1719                 size_t const dictContentSize = dictSize-headerSize;
   1720                 BYTE* const dictContent = fullDict+headerSize;
   1721                 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
   1722                 if (genType == gt_frame) {
   1723                     seed = generateFrame(seed, &fr, info);
   1724                 } else {
   1725                     seed = generateCompressedBlock(seed, &fr, info);
   1726                 }
   1727             }
   1728 
   1729             if (numFiles != 0) {
   1730                 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
   1731                     DISPLAY("Error: path too long\n");
   1732                     errorDetected = 1;
   1733                     goto dictCleanup;
   1734                 }
   1735                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
   1736 
   1737                 if (origPath) {
   1738                     if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
   1739                         DISPLAY("Error: path too long\n");
   1740                         errorDetected = 1;
   1741                         goto dictCleanup;
   1742                     }
   1743                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
   1744                 }
   1745             }
   1746             else {
   1747                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
   1748                 if (origPath) {
   1749                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
   1750                 }
   1751             }
   1752         }
   1753     }
   1754 
   1755 dictCleanup:
   1756     free(fullDict);
   1757     return errorDetected;
   1758 }
   1759 
   1760 
   1761 /*_*******************************************************
   1762 *  Command line
   1763 *********************************************************/
   1764 static U32 makeSeed(void)
   1765 {
   1766     U32 t = (U32) time(NULL);
   1767     return XXH32(&t, sizeof(t), 0) % 65536;
   1768 }
   1769 
   1770 static unsigned readInt(const char** argument)
   1771 {
   1772     unsigned val = 0;
   1773     while ((**argument>='0') && (**argument<='9')) {
   1774         val *= 10;
   1775         val += **argument - '0';
   1776         (*argument)++;
   1777     }
   1778     return val;
   1779 }
   1780 
   1781 static void usage(const char* programName)
   1782 {
   1783     DISPLAY( "Usage :\n");
   1784     DISPLAY( "      %s [args]\n", programName);
   1785     DISPLAY( "\n");
   1786     DISPLAY( "Arguments :\n");
   1787     DISPLAY( " -p<path> : select output path (default:stdout)\n");
   1788     DISPLAY( "                in multiple files mode this should be a directory\n");
   1789     DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
   1790     DISPLAY( "                in multiple files mode this should be a directory\n");
   1791     DISPLAY( " -s#      : select seed (default:random based on time)\n");
   1792     DISPLAY( " -n#      : number of files to generate (default:1)\n");
   1793     DISPLAY( " -t       : activate test mode (test files against libzstd instead of outputting them)\n");
   1794     DISPLAY( " -T#      : length of time to run tests for\n");
   1795     DISPLAY( " -v       : increase verbosity level (default:0, max:7)\n");
   1796     DISPLAY( " -h/H     : display help/long help and exit\n");
   1797 }
   1798 
   1799 static void advancedUsage(const char* programName)
   1800 {
   1801     usage(programName);
   1802     DISPLAY( "\n");
   1803     DISPLAY( "Advanced arguments        :\n");
   1804     DISPLAY( " --content-size           : always include the content size in the frame header\n");
   1805     DISPLAY( " --use-dict=#             : include a dictionary used to decompress the corpus\n");
   1806     DISPLAY( " --gen-blocks             : generate raw compressed blocks without block/frame headers\n");
   1807     DISPLAY( " --max-block-size-log=#   : max block size log, must be in range [2, 17]\n");
   1808     DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
   1809     DISPLAY( "                            (this is ignored with gen-blocks)\n");
   1810     DISPLAY( " --block-type=#           : force certain block type (raw=0, rle=1, compressed=2)\n");
   1811     DISPLAY( " --frame-header-only      : dump only frame header\n");
   1812     DISPLAY( " --no-magic               : do not add magic number\n");
   1813 }
   1814 
   1815 /*! readU32FromChar() :
   1816     @return : unsigned integer value read from input in `char` format
   1817     allows and interprets K, KB, KiB, M, MB and MiB suffix.
   1818     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
   1819     Note : function result can overflow if digit string > MAX_UINT */
   1820 static unsigned readU32FromChar(const char** stringPtr)
   1821 {
   1822     unsigned result = 0;
   1823     while ((**stringPtr >='0') && (**stringPtr <='9'))
   1824         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
   1825     if ((**stringPtr=='K') || (**stringPtr=='M')) {
   1826         result <<= 10;
   1827         if (**stringPtr=='M') result <<= 10;
   1828         (*stringPtr)++ ;
   1829         if (**stringPtr=='i') (*stringPtr)++;
   1830         if (**stringPtr=='B') (*stringPtr)++;
   1831     }
   1832     return result;
   1833 }
   1834 
   1835 /** longCommandWArg() :
   1836  *  check if *stringPtr is the same as longCommand.
   1837  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
   1838  *  @return 0 and doesn't modify *stringPtr otherwise.
   1839  */
   1840 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
   1841 {
   1842     size_t const comSize = strlen(longCommand);
   1843     int const result = !strncmp(*stringPtr, longCommand, comSize);
   1844     if (result) *stringPtr += comSize;
   1845     return result;
   1846 }
   1847 
   1848 int main(int argc, char** argv)
   1849 {
   1850     U32 seed = 0;
   1851     int seedset = 0;
   1852     unsigned numFiles = 0;
   1853     unsigned testDuration = 0;
   1854     int testMode = 0;
   1855     const char* path = NULL;
   1856     const char* origPath = NULL;
   1857     int useDict = 0;
   1858     unsigned dictSize = (10 << 10); /* 10 kB default */
   1859     genType_e genType = gt_frame;
   1860 
   1861     int argNb;
   1862 
   1863     /* Check command line */
   1864     for (argNb=1; argNb<argc; argNb++) {
   1865         const char* argument = argv[argNb];
   1866         if(!argument) continue;   /* Protection if argument empty */
   1867 
   1868         /* Handle commands. Aggregated commands are allowed */
   1869         if (argument[0]=='-') {
   1870             argument++;
   1871             while (*argument!=0) {
   1872                 switch(*argument)
   1873                 {
   1874                 case 'h':
   1875                     usage(argv[0]);
   1876                     return 0;
   1877                 case 'H':
   1878                     advancedUsage(argv[0]);
   1879                     return 0;
   1880                 case 'v':
   1881                     argument++;
   1882                     g_displayLevel++;
   1883                     break;
   1884                 case 's':
   1885                     argument++;
   1886                     seedset=1;
   1887                     seed = readInt(&argument);
   1888                     break;
   1889                 case 'n':
   1890                     argument++;
   1891                     numFiles = readInt(&argument);
   1892                     break;
   1893                 case 'T':
   1894                     argument++;
   1895                     testDuration = readInt(&argument);
   1896                     if (*argument == 'm') {
   1897                         testDuration *= 60;
   1898                         argument++;
   1899                         if (*argument == 'n') argument++;
   1900                     }
   1901                     break;
   1902                 case 'o':
   1903                     argument++;
   1904                     origPath = argument;
   1905                     argument += strlen(argument);
   1906                     break;
   1907                 case 'p':
   1908                     argument++;
   1909                     path = argument;
   1910                     argument += strlen(argument);
   1911                     break;
   1912                 case 't':
   1913                     argument++;
   1914                     testMode = 1;
   1915                     break;
   1916                 case '-':
   1917                     argument++;
   1918                     if (strcmp(argument, "content-size") == 0) {
   1919                         opts.contentSize = 1;
   1920                     } else if (longCommandWArg(&argument, "use-dict=")) {
   1921                         dictSize = readU32FromChar(&argument);
   1922                         useDict = 1;
   1923                     } else if (strcmp(argument, "gen-blocks") == 0) {
   1924                         genType = gt_block;
   1925                     } else if (longCommandWArg(&argument, "max-block-size-log=")) {
   1926                         U32 value = readU32FromChar(&argument);
   1927                         if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
   1928                             g_maxBlockSize = 1U << value;
   1929                         }
   1930                     } else if (longCommandWArg(&argument, "max-content-size-log=")) {
   1931                         U32 value = readU32FromChar(&argument);
   1932                         g_maxDecompressedSizeLog =
   1933                                 MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
   1934                     } else if (longCommandWArg(&argument, "block-type=")) {
   1935                         U32 value = readU32FromChar(&argument);
   1936                         opts.blockType = malloc(sizeof(blockType_e));
   1937                         *(opts.blockType) = value;
   1938                     } else if (longCommandWArg(&argument, "literal-type=")) {
   1939                         U32 value = readU32FromChar(&argument);
   1940                         opts.literalType = malloc(sizeof(literalType_e));
   1941                         *(opts.literalType) = value;
   1942                     } else if (strcmp(argument, "frame-header-only") == 0) {
   1943                         opts.frame_header_only = 1;
   1944                     } else if (strcmp(argument, "no-magic") == 0) {
   1945                         opts.no_magic = 1;
   1946                     } else {
   1947                         advancedUsage(argv[0]);
   1948                         return 1;
   1949                     }
   1950                     argument += strlen(argument);
   1951                     break;
   1952                 default:
   1953                     usage(argv[0]);
   1954                     return 1;
   1955     }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
   1956 
   1957     if (opts.blockType) {
   1958         if ((opts.contentSize == 0) && (*(opts.blockType) == bt_rle)) {
   1959             DISPLAY("Error: content-size has to be used together with blockType=1 (rle block)\n");
   1960             return 1;
   1961         }
   1962 
   1963         if (opts.literalType && (*(opts.blockType) != bt_compressed)) {
   1964             DISPLAY("Error: literal-type can be used only with blockType=2 (compressed block)\n");
   1965             return 1;
   1966         }
   1967     }
   1968 
   1969     if (!seedset) {
   1970         seed = makeSeed();
   1971     }
   1972 
   1973     if (testMode) {
   1974         return runTestMode(seed, numFiles, testDuration, genType);
   1975     } else {
   1976         if (testDuration) {
   1977             DISPLAY("Error: -T requires test mode (-t)\n\n");
   1978             usage(argv[0]);
   1979             return 1;
   1980         }
   1981     }
   1982 
   1983     if (!path) {
   1984         DISPLAY("Error: path is required in file generation mode\n");
   1985         usage(argv[0]);
   1986         return 1;
   1987     }
   1988 
   1989     if (numFiles == 0 && useDict == 0) {
   1990         return generateFile(seed, path, origPath, genType);
   1991     } else if (useDict == 0){
   1992         return generateCorpus(seed, numFiles, path, origPath, genType);
   1993     } else {
   1994         /* should generate files with a dictionary */
   1995         return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
   1996     }
   1997 
   1998 }
   1999