Home | History | Annotate | Line # | Download | only in tests
      1  1.1  christos /*
      2  1.1  christos  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  1.1  christos  * All rights reserved.
      4  1.1  christos  *
      5  1.1  christos  * This source code is licensed under both the BSD-style license (found in the
      6  1.1  christos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  1.1  christos  * in the COPYING file in the root directory of this source tree).
      8  1.1  christos  * You may select, at your option, one of the above-listed licenses.
      9  1.1  christos  */
     10  1.1  christos 
     11  1.1  christos #include <limits.h>
     12  1.1  christos #include <math.h>
     13  1.1  christos #include <stddef.h>
     14  1.1  christos #include <stdio.h>
     15  1.1  christos #include <stdlib.h>
     16  1.1  christos #include <string.h>
     17  1.1  christos #include <time.h>  /* time(), for seed random initialization */
     18  1.1  christos 
     19  1.1  christos #include "util.h"
     20  1.1  christos #include "timefn.h"   /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
     21  1.1  christos #include "zstd.h"
     22  1.1  christos #include "zstd_internal.h"
     23  1.1  christos #include "mem.h"
     24  1.1  christos #define ZDICT_STATIC_LINKING_ONLY
     25  1.1  christos #include "zdict.h"
     26  1.1  christos 
     27  1.1  christos /* Direct access to internal compression functions is required */
     28  1.1  christos #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */
     29  1.1  christos #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */
     30  1.1  christos 
     31  1.1  christos #define XXH_STATIC_LINKING_ONLY
     32  1.1  christos #include "xxhash.h"     /* XXH64 */
     33  1.1  christos 
     34  1.1  christos #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
     35  1.1  christos # define inline  /* disable */
     36  1.1  christos #endif
     37  1.1  christos 
     38  1.1  christos /*-************************************
     39  1.1  christos *  DISPLAY Macros
     40  1.1  christos **************************************/
     41  1.1  christos #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
     42  1.1  christos #define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
     43  1.1  christos static U32 g_displayLevel = 2;
     44  1.1  christos 
     45  1.1  christos #define DISPLAYUPDATE(...)                                                     \
     46  1.1  christos     do {                                                                       \
     47  1.1  christos         if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) ||           \
     48  1.1  christos             (g_displayLevel >= 4)) {                                           \
     49  1.1  christos             g_displayClock = UTIL_getTime();                                   \
     50  1.1  christos             DISPLAY(__VA_ARGS__);                                              \
     51  1.1  christos             if (g_displayLevel >= 4) fflush(stderr);                           \
     52  1.1  christos         }                                                                      \
     53  1.1  christos     } while (0)
     54  1.1  christos 
     55  1.1  christos static const U64 g_refreshRate = SEC_TO_MICRO / 6;
     56  1.1  christos static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
     57  1.1  christos 
     58  1.1  christos #define CHECKERR(code)                                                         \
     59  1.1  christos     do {                                                                       \
     60  1.1  christos         if (ZSTD_isError(code)) {                                              \
     61  1.1  christos             DISPLAY("Error occurred while generating data: %s\n",              \
     62  1.1  christos                     ZSTD_getErrorName(code));                                  \
     63  1.1  christos             exit(1);                                                           \
     64  1.1  christos         }                                                                      \
     65  1.1  christos     } while (0)
     66  1.1  christos 
     67  1.1  christos 
     68  1.1  christos /*-*******************************************************
     69  1.1  christos *  Random function
     70  1.1  christos *********************************************************/
     71  1.1  christos static U32 RAND(U32* src)
     72  1.1  christos {
     73  1.1  christos #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
     74  1.1  christos     static const U32 prime1 = 2654435761U;
     75  1.1  christos     static const U32 prime2 = 2246822519U;
     76  1.1  christos     U32 rand32 = *src;
     77  1.1  christos     rand32 *= prime1;
     78  1.1  christos     rand32 += prime2;
     79  1.1  christos     rand32  = RAND_rotl32(rand32, 13);
     80  1.1  christos     *src = rand32;
     81  1.1  christos     return RAND_rotl32(rand32, 27);
     82  1.1  christos #undef RAND_rotl32
     83  1.1  christos }
     84  1.1  christos 
     85  1.1  christos #define DISTSIZE (8192)
     86  1.1  christos 
     87  1.1  christos /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
     88  1.1  christos static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
     89  1.1  christos {
     90  1.1  christos     size_t i;
     91  1.1  christos     BYTE* op = ptr;
     92  1.1  christos 
     93  1.1  christos     for (i = 0; i < size; i++) {
     94  1.1  christos         op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
     95  1.1  christos     }
     96  1.1  christos }
     97  1.1  christos 
     98  1.1  christos /* Write `size` random bytes into `ptr` */
     99  1.1  christos static void RAND_buffer(U32* seed, void* ptr, size_t size)
    100  1.1  christos {
    101  1.1  christos     size_t i;
    102  1.1  christos     BYTE* op = ptr;
    103  1.1  christos 
    104  1.1  christos     for (i = 0; i + 4 <= size; i += 4) {
    105  1.1  christos         MEM_writeLE32(op + i, RAND(seed));
    106  1.1  christos     }
    107  1.1  christos     for (; i < size; i++) {
    108  1.1  christos         op[i] = RAND(seed) & 0xff;
    109  1.1  christos     }
    110  1.1  christos }
    111  1.1  christos 
    112  1.1  christos /* Write `size` bytes into `ptr` following the distribution `dist` */
    113  1.1  christos static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
    114  1.1  christos {
    115  1.1  christos     size_t i;
    116  1.1  christos     BYTE* op = ptr;
    117  1.1  christos 
    118  1.1  christos     for (i = 0; i < size; i++) {
    119  1.1  christos         op[i] = dist[RAND(seed) % DISTSIZE];
    120  1.1  christos     }
    121  1.1  christos }
    122  1.1  christos 
    123  1.1  christos /* Generate a random distribution where the frequency of each symbol follows a
    124  1.1  christos  * geometric distribution defined by `weight`
    125  1.1  christos  * `dist` should have size at least `DISTSIZE` */
    126  1.1  christos static void RAND_genDist(U32* seed, BYTE* dist, double weight)
    127  1.1  christos {
    128  1.1  christos     size_t i = 0;
    129  1.1  christos     size_t statesLeft = DISTSIZE;
    130  1.1  christos     BYTE symb = (BYTE) (RAND(seed) % 256);
    131  1.1  christos     BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
    132  1.1  christos 
    133  1.1  christos     while (i < DISTSIZE) {
    134  1.1  christos         size_t states = ((size_t)(weight * (double)statesLeft)) + 1;
    135  1.1  christos         size_t j;
    136  1.1  christos         for (j = 0; j < states && i < DISTSIZE; j++, i++) {
    137  1.1  christos             dist[i] = symb;
    138  1.1  christos         }
    139  1.1  christos 
    140  1.1  christos         symb += step;
    141  1.1  christos         statesLeft -= states;
    142  1.1  christos     }
    143  1.1  christos }
    144  1.1  christos 
    145  1.1  christos /* Generates a random number in the range [min, max) */
    146  1.1  christos static inline U32 RAND_range(U32* seed, U32 min, U32 max)
    147  1.1  christos {
    148  1.1  christos     return (RAND(seed) % (max-min)) + min;
    149  1.1  christos }
    150  1.1  christos 
    151  1.1  christos #define ROUND(x) ((U32)(x + 0.5))
    152  1.1  christos 
    153  1.1  christos /* Generates a random number in an exponential distribution with mean `mean` */
    154  1.1  christos static double RAND_exp(U32* seed, double mean)
    155  1.1  christos {
    156  1.1  christos     double const u = RAND(seed) / (double) UINT_MAX;
    157  1.1  christos     return log(1-u) * (-mean);
    158  1.1  christos }
    159  1.1  christos 
    160  1.1  christos /*-*******************************************************
    161  1.1  christos *  Constants and Structs
    162  1.1  christos *********************************************************/
    163  1.1  christos const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"};
    164  1.1  christos 
    165  1.1  christos #define MAX_DECOMPRESSED_SIZE_LOG 20
    166  1.1  christos #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
    167  1.1  christos 
    168  1.1  christos #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
    169  1.1  christos 
    170  1.1  christos #define MIN_SEQ_LEN (3)
    171  1.1  christos #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
    172  1.1  christos 
    173  1.1  christos #ifndef MAX_PATH
    174  1.1  christos     #ifdef PATH_MAX
    175  1.1  christos         #define MAX_PATH PATH_MAX
    176  1.1  christos     #else
    177  1.1  christos         #define MAX_PATH 256
    178  1.1  christos     #endif
    179  1.1  christos #endif
    180  1.1  christos 
    181  1.1  christos BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
    182  1.1  christos BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
    183  1.1  christos BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
    184  1.1  christos 
    185  1.1  christos seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
    186  1.1  christos BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
    187  1.1  christos BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
    188  1.1  christos BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
    189  1.1  christos BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
    190  1.1  christos 
    191  1.1  christos U64 WKSP[HUF_WORKSPACE_SIZE_U64];
    192  1.1  christos 
    193  1.1  christos typedef struct {
    194  1.1  christos     size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
    195  1.1  christos     unsigned windowSize; /* contentSize >= windowSize means single segment */
    196  1.1  christos } frameHeader_t;
    197  1.1  christos 
    198  1.1  christos /* For repeat modes */
    199  1.1  christos typedef struct {
    200  1.1  christos     U32 rep[ZSTD_REP_NUM];
    201  1.1  christos 
    202  1.1  christos     int hufInit;
    203  1.1  christos     /* the distribution used in the previous block for repeat mode */
    204  1.1  christos     BYTE hufDist[DISTSIZE];
    205  1.1  christos     HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
    206  1.1  christos 
    207  1.1  christos     int fseInit;
    208  1.1  christos     FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
    209  1.1  christos     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
    210  1.1  christos     FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
    211  1.1  christos 
    212  1.1  christos     /* Symbols that were present in the previous distribution, for use with
    213  1.1  christos      * set_repeat */
    214  1.1  christos     BYTE litlengthSymbolSet[36];
    215  1.1  christos     BYTE offsetSymbolSet[29];
    216  1.1  christos     BYTE matchlengthSymbolSet[53];
    217  1.1  christos } cblockStats_t;
    218  1.1  christos 
    219  1.1  christos typedef struct {
    220  1.1  christos     void* data;
    221  1.1  christos     void* dataStart;
    222  1.1  christos     void* dataEnd;
    223  1.1  christos 
    224  1.1  christos     void* src;
    225  1.1  christos     void* srcStart;
    226  1.1  christos     void* srcEnd;
    227  1.1  christos 
    228  1.1  christos     frameHeader_t header;
    229  1.1  christos 
    230  1.1  christos     cblockStats_t stats;
    231  1.1  christos     cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
    232  1.1  christos } frame_t;
    233  1.1  christos 
    234  1.1  christos typedef struct {
    235  1.1  christos     int useDict;
    236  1.1  christos     U32 dictID;
    237  1.1  christos     size_t dictContentSize;
    238  1.1  christos     BYTE* dictContent;
    239  1.1  christos } dictInfo;
    240  1.1  christos 
    241  1.1  christos typedef enum {
    242  1.1  christos   gt_frame = 0,  /* generate frames */
    243  1.1  christos   gt_block,      /* generate compressed blocks without block/frame headers */
    244  1.1  christos } genType_e;
    245  1.1  christos 
    246  1.1  christos #ifndef MIN
    247  1.1  christos     #define MIN(a, b) ((a) < (b) ? (a) : (b))
    248  1.1  christos #endif
    249  1.1  christos 
    250  1.1  christos /*-*******************************************************
    251  1.1  christos *  Global variables (set from command line)
    252  1.1  christos *********************************************************/
    253  1.1  christos U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG;  /* <= 20 */
    254  1.1  christos U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX;                       /* <= 128 KB */
    255  1.1  christos 
    256  1.1  christos /*-*******************************************************
    257  1.1  christos *  Generator Functions
    258  1.1  christos *********************************************************/
    259  1.1  christos 
    260  1.1  christos struct {
    261  1.1  christos     int contentSize; /* force the content size to be present */
    262  1.1  christos } opts; /* advanced options on generation */
    263  1.1  christos 
    264  1.1  christos /* Generate and write a random frame header */
    265  1.1  christos static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
    266  1.1  christos {
    267  1.1  christos     BYTE* const op = frame->data;
    268  1.1  christos     size_t pos = 0;
    269  1.1  christos     frameHeader_t fh;
    270  1.1  christos 
    271  1.1  christos     BYTE windowByte = 0;
    272  1.1  christos 
    273  1.1  christos     int singleSegment = 0;
    274  1.1  christos     int contentSizeFlag = 0;
    275  1.1  christos     int fcsCode = 0;
    276  1.1  christos 
    277  1.1  christos     memset(&fh, 0, sizeof(fh));
    278  1.1  christos 
    279  1.1  christos     /* generate window size */
    280  1.1  christos     {
    281  1.1  christos         /* Follow window algorithm from specification */
    282  1.1  christos         int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
    283  1.1  christos         int const mantissa = RAND(seed) % 8;
    284  1.1  christos         windowByte = (BYTE) ((exponent << 3) | mantissa);
    285  1.1  christos         fh.windowSize = (1U << (exponent + 10));
    286  1.1  christos         fh.windowSize += fh.windowSize / 8 * mantissa;
    287  1.1  christos     }
    288  1.1  christos 
    289  1.1  christos     {
    290  1.1  christos         /* Generate random content size */
    291  1.1  christos         size_t highBit;
    292  1.1  christos         if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
    293  1.1  christos             /* do content of at least 128 bytes */
    294  1.1  christos             highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
    295  1.1  christos         } else if (RAND(seed) & 3) {
    296  1.1  christos             /* do small content */
    297  1.1  christos             highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
    298  1.1  christos         } else {
    299  1.1  christos             /* 0 size frame */
    300  1.1  christos             highBit = 0;
    301  1.1  christos         }
    302  1.1  christos         fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
    303  1.1  christos 
    304  1.1  christos         /* provide size sometimes */
    305  1.1  christos         contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
    306  1.1  christos 
    307  1.1  christos         if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
    308  1.1  christos             /* do single segment sometimes */
    309  1.1  christos             fh.windowSize = (U32) fh.contentSize;
    310  1.1  christos             singleSegment = 1;
    311  1.1  christos         }
    312  1.1  christos     }
    313  1.1  christos 
    314  1.1  christos     if (contentSizeFlag) {
    315  1.1  christos         /* Determine how large fcs field has to be */
    316  1.1  christos         int minFcsCode = (fh.contentSize >= 256) +
    317  1.1  christos                                (fh.contentSize >= 65536 + 256) +
    318  1.1  christos                                (fh.contentSize > 0xFFFFFFFFU);
    319  1.1  christos         if (!singleSegment && !minFcsCode) {
    320  1.1  christos             minFcsCode = 1;
    321  1.1  christos         }
    322  1.1  christos         fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
    323  1.1  christos         if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
    324  1.1  christos     }
    325  1.1  christos 
    326  1.1  christos     /* write out the header */
    327  1.1  christos     MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
    328  1.1  christos     pos += 4;
    329  1.1  christos 
    330  1.1  christos     {
    331  1.1  christos         /*
    332  1.1  christos          * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
    333  1.1  christos          * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
    334  1.1  christos          * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
    335  1.1  christos          * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
    336  1.1  christos          * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
    337  1.1  christos          */
    338  1.1  christos         int const dictBits = info.useDict ? 3 : 0;
    339  1.1  christos         BYTE const frameHeaderDescriptor =
    340  1.1  christos                 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
    341  1.1  christos         op[pos++] = frameHeaderDescriptor;
    342  1.1  christos     }
    343  1.1  christos 
    344  1.1  christos     if (!singleSegment) {
    345  1.1  christos         op[pos++] = windowByte;
    346  1.1  christos     }
    347  1.1  christos     if (info.useDict) {
    348  1.1  christos         MEM_writeLE32(op + pos, (U32) info.dictID);
    349  1.1  christos         pos += 4;
    350  1.1  christos     }
    351  1.1  christos     if (contentSizeFlag) {
    352  1.1  christos         switch (fcsCode) {
    353  1.1  christos         default: /* Impossible */
    354  1.1  christos         case 0: op[pos++] = (BYTE) fh.contentSize; break;
    355  1.1  christos         case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
    356  1.1  christos         case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
    357  1.1  christos         case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
    358  1.1  christos         }
    359  1.1  christos     }
    360  1.1  christos 
    361  1.1  christos     DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize);
    362  1.1  christos     DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
    363  1.1  christos     DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
    364  1.1  christos     DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
    365  1.1  christos 
    366  1.1  christos     frame->data = op + pos;
    367  1.1  christos     frame->header = fh;
    368  1.1  christos }
    369  1.1  christos 
    370  1.1  christos /* Write a literal block in either raw or RLE form, return the literals size */
    371  1.1  christos static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
    372  1.1  christos {
    373  1.1  christos     BYTE* op = (BYTE*)frame->data;
    374  1.1  christos     int const type = RAND(seed) % 2;
    375  1.1  christos     int const sizeFormatDesc = RAND(seed) % 8;
    376  1.1  christos     size_t litSize;
    377  1.1  christos     size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
    378  1.1  christos 
    379  1.1  christos     if (sizeFormatDesc == 0) {
    380  1.1  christos         /* Size_FormatDesc = ?0 */
    381  1.1  christos         maxLitSize = MIN(maxLitSize, 31);
    382  1.1  christos     } else if (sizeFormatDesc <= 4) {
    383  1.1  christos         /* Size_FormatDesc = 01 */
    384  1.1  christos         maxLitSize = MIN(maxLitSize, 4095);
    385  1.1  christos     } else {
    386  1.1  christos         /* Size_Format = 11 */
    387  1.1  christos         maxLitSize = MIN(maxLitSize, 1048575);
    388  1.1  christos     }
    389  1.1  christos 
    390  1.1  christos     litSize = RAND(seed) % (maxLitSize + 1);
    391  1.1  christos     if (frame->src == frame->srcStart && litSize == 0) {
    392  1.1  christos         litSize = 1; /* no empty literals if there's nothing preceding this block */
    393  1.1  christos     }
    394  1.1  christos     if (litSize + 3 > contentSize) {
    395  1.1  christos         litSize = contentSize; /* no matches shorter than 3 are allowed */
    396  1.1  christos     }
    397  1.1  christos     /* use smallest size format that fits */
    398  1.1  christos     if (litSize < 32) {
    399  1.1  christos         op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
    400  1.1  christos         op += 1;
    401  1.1  christos     } else if (litSize < 4096) {
    402  1.1  christos         op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
    403  1.1  christos         op[1] = (litSize >> 4) & 0xff;
    404  1.1  christos         op += 2;
    405  1.1  christos     } else {
    406  1.1  christos         op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
    407  1.1  christos         op[1] = (litSize >> 4) & 0xff;
    408  1.1  christos         op[2] = (litSize >> 12) & 0xff;
    409  1.1  christos         op += 3;
    410  1.1  christos     }
    411  1.1  christos 
    412  1.1  christos     if (type == 0) {
    413  1.1  christos         /* Raw literals */
    414  1.1  christos         DISPLAYLEVEL(4, "   raw literals\n");
    415  1.1  christos 
    416  1.1  christos         RAND_buffer(seed, LITERAL_BUFFER, litSize);
    417  1.1  christos         memcpy(op, LITERAL_BUFFER, litSize);
    418  1.1  christos         op += litSize;
    419  1.1  christos     } else {
    420  1.1  christos         /* RLE literals */
    421  1.1  christos         BYTE const symb = (BYTE) (RAND(seed) % 256);
    422  1.1  christos 
    423  1.1  christos         DISPLAYLEVEL(4, "   rle literals: 0x%02x\n", (unsigned)symb);
    424  1.1  christos 
    425  1.1  christos         memset(LITERAL_BUFFER, symb, litSize);
    426  1.1  christos         op[0] = symb;
    427  1.1  christos         op++;
    428  1.1  christos     }
    429  1.1  christos 
    430  1.1  christos     frame->data = op;
    431  1.1  christos 
    432  1.1  christos     return litSize;
    433  1.1  christos }
    434  1.1  christos 
    435  1.1  christos /* Generate a Huffman header for the given source */
    436  1.1  christos static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
    437  1.1  christos                                  const void* src, size_t srcSize)
    438  1.1  christos {
    439  1.1  christos     BYTE* const ostart = (BYTE*)dst;
    440  1.1  christos     BYTE* op = ostart;
    441  1.1  christos 
    442  1.1  christos     unsigned huffLog = 11;
    443  1.1  christos     unsigned maxSymbolValue = 255;
    444  1.1  christos 
    445  1.1  christos     unsigned count[HUF_SYMBOLVALUE_MAX+1];
    446  1.1  christos 
    447  1.1  christos     /* Scan input and build symbol stats */
    448  1.1  christos     {   size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP));
    449  1.1  christos         assert(!HIST_isError(largest));
    450  1.1  christos         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; }   /* single symbol, rle */
    451  1.1  christos         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
    452  1.1  christos     }
    453  1.1  christos 
    454  1.1  christos     /* Build Huffman Tree */
    455  1.1  christos     /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
    456  1.1  christos     huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
    457  1.1  christos     DISPLAYLEVEL(6, "     huffman log: %u\n", huffLog);
    458  1.1  christos     {   size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
    459  1.1  christos         CHECKERR(maxBits);
    460  1.1  christos         huffLog = (U32)maxBits;
    461  1.1  christos     }
    462  1.1  christos 
    463  1.1  christos     /* Write table description header */
    464  1.1  christos     {   size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
    465  1.1  christos         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
    466  1.1  christos         op += hSize;
    467  1.1  christos     }
    468  1.1  christos 
    469  1.1  christos     return op - ostart;
    470  1.1  christos }
    471  1.1  christos 
    472  1.1  christos /* Write a Huffman coded literals block and return the literals size */
    473  1.1  christos static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
    474  1.1  christos {
    475  1.1  christos     BYTE* origop = (BYTE*)frame->data;
    476  1.1  christos     BYTE* opend = (BYTE*)frame->dataEnd;
    477  1.1  christos     BYTE* op;
    478  1.1  christos     BYTE* const ostart = origop;
    479  1.1  christos     int const sizeFormat = RAND(seed) % 4;
    480  1.1  christos     size_t litSize;
    481  1.1  christos     size_t hufHeaderSize = 0;
    482  1.1  christos     size_t compressedSize = 0;
    483  1.1  christos     size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
    484  1.1  christos 
    485  1.1  christos     symbolEncodingType_e hType;
    486  1.1  christos 
    487  1.1  christos     if (contentSize < 64) {
    488  1.1  christos         /* make sure we get reasonably-sized literals for compression */
    489  1.1  christos         return ERROR(GENERIC);
    490  1.1  christos     }
    491  1.1  christos 
    492  1.1  christos     DISPLAYLEVEL(4, "   compressed literals\n");
    493  1.1  christos 
    494  1.1  christos     switch (sizeFormat) {
    495  1.1  christos     case 0: /* fall through, size is the same as case 1 */
    496  1.1  christos     case 1:
    497  1.1  christos         maxLitSize = MIN(maxLitSize, 1023);
    498  1.1  christos         origop += 3;
    499  1.1  christos         break;
    500  1.1  christos     case 2:
    501  1.1  christos         maxLitSize = MIN(maxLitSize, 16383);
    502  1.1  christos         origop += 4;
    503  1.1  christos         break;
    504  1.1  christos     case 3:
    505  1.1  christos         maxLitSize = MIN(maxLitSize, 262143);
    506  1.1  christos         origop += 5;
    507  1.1  christos         break;
    508  1.1  christos     default:; /* impossible */
    509  1.1  christos     }
    510  1.1  christos 
    511  1.1  christos     do {
    512  1.1  christos         op = origop;
    513  1.1  christos         do {
    514  1.1  christos             litSize = RAND(seed) % (maxLitSize + 1);
    515  1.1  christos         } while (litSize < 32); /* avoid small literal sizes */
    516  1.1  christos         if (litSize + 3 > contentSize) {
    517  1.1  christos             litSize = contentSize; /* no matches shorter than 3 are allowed */
    518  1.1  christos         }
    519  1.1  christos 
    520  1.1  christos         /* most of the time generate a new distribution */
    521  1.1  christos         if ((RAND(seed) & 3) || !frame->stats.hufInit) {
    522  1.1  christos             do {
    523  1.1  christos                 if (RAND(seed) & 3) {
    524  1.1  christos                     /* add 10 to ensure some compressibility */
    525  1.1  christos                     double const weight = ((RAND(seed) % 90) + 10) / 100.0;
    526  1.1  christos 
    527  1.1  christos                     DISPLAYLEVEL(5, "    distribution weight: %d%%\n",
    528  1.1  christos                                  (int)(weight * 100));
    529  1.1  christos 
    530  1.1  christos                     RAND_genDist(seed, frame->stats.hufDist, weight);
    531  1.1  christos                 } else {
    532  1.1  christos                     /* sometimes do restricted range literals to force
    533  1.1  christos                      * non-huffman headers */
    534  1.1  christos                     DISPLAYLEVEL(5, "    small range literals\n");
    535  1.1  christos                     RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
    536  1.1  christos                                        15);
    537  1.1  christos                 }
    538  1.1  christos                 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
    539  1.1  christos                                 litSize);
    540  1.1  christos 
    541  1.1  christos                 /* generate the header from the distribution instead of the
    542  1.1  christos                  * actual data to avoid bugs with symbols that were in the
    543  1.1  christos                  * distribution but never showed up in the output */
    544  1.1  christos                 hufHeaderSize = writeHufHeader(
    545  1.1  christos                         seed, frame->stats.hufTable, op, opend - op,
    546  1.1  christos                         frame->stats.hufDist, DISTSIZE);
    547  1.1  christos                 CHECKERR(hufHeaderSize);
    548  1.1  christos                 /* repeat until a valid header is written */
    549  1.1  christos             } while (hufHeaderSize == 0);
    550  1.1  christos             op += hufHeaderSize;
    551  1.1  christos             hType = set_compressed;
    552  1.1  christos 
    553  1.1  christos             frame->stats.hufInit = 1;
    554  1.1  christos         } else {
    555  1.1  christos             /* repeat the distribution/table from last time */
    556  1.1  christos             DISPLAYLEVEL(5, "    huffman repeat stats\n");
    557  1.1  christos             RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
    558  1.1  christos                             litSize);
    559  1.1  christos             hufHeaderSize = 0;
    560  1.1  christos             hType = set_repeat;
    561  1.1  christos         }
    562  1.1  christos 
    563  1.1  christos         do {
    564  1.1  christos             compressedSize =
    565  1.1  christos                     sizeFormat == 0
    566  1.1  christos                             ? HUF_compress1X_usingCTable(
    567  1.1  christos                                       op, opend - op, LITERAL_BUFFER, litSize,
    568  1.1  christos                                       frame->stats.hufTable, /* flags */ 0)
    569  1.1  christos                             : HUF_compress4X_usingCTable(
    570  1.1  christos                                       op, opend - op, LITERAL_BUFFER, litSize,
    571  1.1  christos                                       frame->stats.hufTable, /* flags */ 0);
    572  1.1  christos             CHECKERR(compressedSize);
    573  1.1  christos             /* this only occurs when it could not compress or similar */
    574  1.1  christos         } while (compressedSize <= 0);
    575  1.1  christos 
    576  1.1  christos         op += compressedSize;
    577  1.1  christos 
    578  1.1  christos         compressedSize += hufHeaderSize;
    579  1.1  christos         DISPLAYLEVEL(5, "    regenerated size: %u\n", (unsigned)litSize);
    580  1.1  christos         DISPLAYLEVEL(5, "    compressed size: %u\n", (unsigned)compressedSize);
    581  1.1  christos         if (compressedSize >= litSize) {
    582  1.1  christos             DISPLAYLEVEL(5, "     trying again\n");
    583  1.1  christos             /* if we have to try again, reset the stats so we don't accidentally
    584  1.1  christos              * try to repeat a distribution we just made */
    585  1.1  christos             frame->stats = frame->oldStats;
    586  1.1  christos         } else {
    587  1.1  christos             break;
    588  1.1  christos         }
    589  1.1  christos     } while (1);
    590  1.1  christos 
    591  1.1  christos     /* write header */
    592  1.1  christos     switch (sizeFormat) {
    593  1.1  christos     case 0: /* fall through, size is the same as case 1 */
    594  1.1  christos     case 1: {
    595  1.1  christos         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    596  1.1  christos                            ((U32)compressedSize << 14);
    597  1.1  christos         MEM_writeLE24(ostart, header);
    598  1.1  christos         break;
    599  1.1  christos     }
    600  1.1  christos     case 2: {
    601  1.1  christos         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    602  1.1  christos                            ((U32)compressedSize << 18);
    603  1.1  christos         MEM_writeLE32(ostart, header);
    604  1.1  christos         break;
    605  1.1  christos     }
    606  1.1  christos     case 3: {
    607  1.1  christos         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
    608  1.1  christos                            ((U32)compressedSize << 22);
    609  1.1  christos         MEM_writeLE32(ostart, header);
    610  1.1  christos         ostart[4] = (BYTE)(compressedSize >> 10);
    611  1.1  christos         break;
    612  1.1  christos     }
    613  1.1  christos     default:; /* impossible */
    614  1.1  christos     }
    615  1.1  christos 
    616  1.1  christos     frame->data = op;
    617  1.1  christos     return litSize;
    618  1.1  christos }
    619  1.1  christos 
    620  1.1  christos static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
    621  1.1  christos {
    622  1.1  christos     /* only do compressed for larger segments to avoid compressibility issues */
    623  1.1  christos     if (RAND(seed) & 7 && contentSize >= 64) {
    624  1.1  christos         return writeLiteralsBlockCompressed(seed, frame, contentSize);
    625  1.1  christos     } else {
    626  1.1  christos         return writeLiteralsBlockSimple(seed, frame, contentSize);
    627  1.1  christos     }
    628  1.1  christos }
    629  1.1  christos 
    630  1.1  christos static inline void initSeqStore(seqStore_t *seqStore) {
    631  1.1  christos     seqStore->maxNbSeq = MAX_NB_SEQ;
    632  1.1  christos     seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
    633  1.1  christos     seqStore->sequencesStart = SEQUENCE_BUFFER;
    634  1.1  christos     seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
    635  1.1  christos     seqStore->llCode = SEQUENCE_LLCODE;
    636  1.1  christos     seqStore->mlCode = SEQUENCE_MLCODE;
    637  1.1  christos     seqStore->ofCode = SEQUENCE_OFCODE;
    638  1.1  christos 
    639  1.1  christos     ZSTD_resetSeqStore(seqStore);
    640  1.1  christos }
    641  1.1  christos 
    642  1.1  christos /* Randomly generate sequence commands */
    643  1.1  christos static U32
    644  1.1  christos generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
    645  1.1  christos                   size_t contentSize, size_t literalsSize, dictInfo info)
    646  1.1  christos {
    647  1.1  christos     /* The total length of all the matches */
    648  1.1  christos     size_t const remainingMatch = contentSize - literalsSize;
    649  1.1  christos     size_t excessMatch = 0;
    650  1.1  christos     U32 numSequences = 0;
    651  1.1  christos     U32 i;
    652  1.1  christos 
    653  1.1  christos     const BYTE* literals = LITERAL_BUFFER;
    654  1.1  christos     BYTE* srcPtr = frame->src;
    655  1.1  christos 
    656  1.1  christos     if (literalsSize != contentSize) {
    657  1.1  christos         /* each match must be at least MIN_SEQ_LEN, so this is the maximum
    658  1.1  christos          * number of sequences we can have */
    659  1.1  christos         U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
    660  1.1  christos         numSequences = (RAND(seed) % maxSequences) + 1;
    661  1.1  christos 
    662  1.1  christos         /* the extra match lengths we have to allocate to each sequence */
    663  1.1  christos         excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
    664  1.1  christos     }
    665  1.1  christos 
    666  1.1  christos     DISPLAYLEVEL(5, "    total match lengths: %u\n", (unsigned)remainingMatch);
    667  1.1  christos     for (i = 0; i < numSequences; i++) {
    668  1.1  christos         /* Generate match and literal lengths by exponential distribution to
    669  1.1  christos          * ensure nice numbers */
    670  1.1  christos         U32 matchLen =
    671  1.1  christos                 MIN_SEQ_LEN +
    672  1.1  christos                 ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i)));
    673  1.1  christos         U32 literalLen =
    674  1.1  christos                 (RAND(seed) & 7)
    675  1.1  christos                         ? ROUND(RAND_exp(seed,
    676  1.1  christos                                          (double)literalsSize /
    677  1.1  christos                                                  (double)(numSequences - i)))
    678  1.1  christos                         : 0;
    679  1.1  christos         /* actual offset, code to send, and point to copy up to when shifting
    680  1.1  christos          * codes in the repeat offsets history */
    681  1.1  christos         U32 offset, offBase, repIndex;
    682  1.1  christos 
    683  1.1  christos         /* bounds checks */
    684  1.1  christos         matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
    685  1.1  christos         literalLen = MIN(literalLen, (U32) literalsSize);
    686  1.1  christos         if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
    687  1.1  christos         if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
    688  1.1  christos 
    689  1.1  christos         memcpy(srcPtr, literals, literalLen);
    690  1.1  christos         srcPtr += literalLen;
    691  1.1  christos         do {
    692  1.1  christos             if (RAND(seed) & 7) {
    693  1.1  christos                 /* do a normal offset */
    694  1.1  christos                 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
    695  1.1  christos                 offset = (RAND(seed) %
    696  1.1  christos                           MIN(frame->header.windowSize,
    697  1.1  christos                               (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
    698  1.1  christos                          1;
    699  1.1  christos                 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
    700  1.1  christos                     /* need to occasionally generate offsets that go past the start */
    701  1.1  christos                     /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
    702  1.1  christos                     U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
    703  1.1  christos                     offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
    704  1.1  christos                     if (offset > frame->header.windowSize) {
    705  1.1  christos                         if (lenPastStart < MIN_SEQ_LEN) {
    706  1.1  christos                             /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
    707  1.1  christos                             /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
    708  1.1  christos                             /* make sure lenPastStart does not go past dictionary start though */
    709  1.1  christos                             lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
    710  1.1  christos                             offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
    711  1.1  christos                         }
    712  1.1  christos                         {   U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
    713  1.1  christos                             matchLen = MIN(matchLen, matchLenBound);
    714  1.1  christos                         }
    715  1.1  christos                     }
    716  1.1  christos                 }
    717  1.1  christos                 offBase = OFFSET_TO_OFFBASE(offset);
    718  1.1  christos                 repIndex = 2;
    719  1.1  christos             } else {
    720  1.1  christos                 /* do a repeat offset */
    721  1.1  christos                 U32 const randomRepIndex = RAND(seed) % 3;
    722  1.1  christos                 offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1);  /* expects values between 1 & 3 */
    723  1.1  christos                 if (literalLen > 0) {
    724  1.1  christos                     offset = frame->stats.rep[randomRepIndex];
    725  1.1  christos                     repIndex = randomRepIndex;
    726  1.1  christos                 } else {
    727  1.1  christos                     /* special case : literalLen == 0 */
    728  1.1  christos                     offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1
    729  1.1  christos                                            : frame->stats.rep[randomRepIndex + 1];
    730  1.1  christos                     repIndex = MIN(2, randomRepIndex + 1);
    731  1.1  christos                 }
    732  1.1  christos             }
    733  1.1  christos         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
    734  1.1  christos 
    735  1.1  christos         {   BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize);
    736  1.1  christos             size_t j;
    737  1.1  christos             for (j = 0; j < matchLen; j++) {
    738  1.1  christos                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
    739  1.1  christos                     /* copy from dictionary instead of literals */
    740  1.1  christos                     size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
    741  1.1  christos                     *srcPtr = *(dictEnd - dictOffset);
    742  1.1  christos                 }
    743  1.1  christos                 else {
    744  1.1  christos                     *srcPtr = *(srcPtr-offset);
    745  1.1  christos                 }
    746  1.1  christos                 srcPtr++;
    747  1.1  christos         }   }
    748  1.1  christos 
    749  1.1  christos         {   int r;
    750  1.1  christos             for (r = repIndex; r > 0; r--) {
    751  1.1  christos                 frame->stats.rep[r] = frame->stats.rep[r - 1];
    752  1.1  christos             }
    753  1.1  christos             frame->stats.rep[0] = offset;
    754  1.1  christos         }
    755  1.1  christos 
    756  1.1  christos         DISPLAYLEVEL(6, "      LL: %5u OF: %5u ML: %5u",
    757  1.1  christos                     (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen);
    758  1.1  christos         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
    759  1.1  christos                      (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
    760  1.1  christos         DISPLAYLEVEL(6, "\n");
    761  1.1  christos         if (OFFBASE_IS_REPCODE(offBase)) {  /* expects sumtype numeric representation of ZSTD_storeSeq() */
    762  1.1  christos             DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
    763  1.1  christos         }
    764  1.1  christos         /* use libzstd sequence handling */
    765  1.1  christos         ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
    766  1.1  christos                       offBase, matchLen);
    767  1.1  christos 
    768  1.1  christos         literalsSize -= literalLen;
    769  1.1  christos         excessMatch -= (matchLen - MIN_SEQ_LEN);
    770  1.1  christos         literals += literalLen;
    771  1.1  christos     }
    772  1.1  christos 
    773  1.1  christos     memcpy(srcPtr, literals, literalsSize);
    774  1.1  christos     srcPtr += literalsSize;
    775  1.1  christos     DISPLAYLEVEL(6, "      excess literals: %5u ", (unsigned)literalsSize);
    776  1.1  christos     DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
    777  1.1  christos     DISPLAYLEVEL(6, "\n");
    778  1.1  christos 
    779  1.1  christos     return numSequences;
    780  1.1  christos }
    781  1.1  christos 
    782  1.1  christos static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
    783  1.1  christos {
    784  1.1  christos     size_t i;
    785  1.1  christos 
    786  1.1  christos     memset(set, 0, (size_t)maxSymbolValue+1);
    787  1.1  christos 
    788  1.1  christos     for (i = 0; i < len; i++) {
    789  1.1  christos         set[symbols[i]] = 1;
    790  1.1  christos     }
    791  1.1  christos }
    792  1.1  christos 
    793  1.1  christos static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
    794  1.1  christos {
    795  1.1  christos     size_t i;
    796  1.1  christos 
    797  1.1  christos     for (i = 0; i < len; i++) {
    798  1.1  christos         if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
    799  1.1  christos             return 0;
    800  1.1  christos         }
    801  1.1  christos     }
    802  1.1  christos     return 1;
    803  1.1  christos }
    804  1.1  christos 
    805  1.1  christos static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
    806  1.1  christos                              size_t nbSeq)
    807  1.1  christos {
    808  1.1  christos     /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
    809  1.1  christos     unsigned count[MaxSeq+1];
    810  1.1  christos     S16 norm[MaxSeq+1];
    811  1.1  christos     FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
    812  1.1  christos     FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
    813  1.1  christos     FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
    814  1.1  christos     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
    815  1.1  christos     const seqDef* const sequences = seqStorePtr->sequencesStart;
    816  1.1  christos     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
    817  1.1  christos     const BYTE* const llCodeTable = seqStorePtr->llCode;
    818  1.1  christos     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
    819  1.1  christos     BYTE* const oend = (BYTE*)frame->dataEnd;
    820  1.1  christos     BYTE* op = (BYTE*)frame->data;
    821  1.1  christos     BYTE* seqHead;
    822  1.1  christos     BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)];
    823  1.1  christos 
    824  1.1  christos     /* literals compressing block removed so that can be done separately */
    825  1.1  christos 
    826  1.1  christos     /* Sequences Header */
    827  1.1  christos     if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
    828  1.1  christos     if (nbSeq < 128) *op++ = (BYTE)nbSeq;
    829  1.1  christos     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
    830  1.1  christos     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
    831  1.1  christos 
    832  1.1  christos     if (nbSeq==0) {
    833  1.1  christos         frame->data = op;
    834  1.1  christos         return 0;
    835  1.1  christos     }
    836  1.1  christos 
    837  1.1  christos     /* seqHead : flags for FSE encoding type */
    838  1.1  christos     seqHead = op++;
    839  1.1  christos 
    840  1.1  christos     /* convert length/distances into codes */
    841  1.1  christos     ZSTD_seqToCodes(seqStorePtr);
    842  1.1  christos 
    843  1.1  christos     /* CTable for Literal Lengths */
    844  1.1  christos     {   unsigned max = MaxLL;
    845  1.1  christos         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    846  1.1  christos         assert(!HIST_isError(mostFrequent));
    847  1.1  christos         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    848  1.1  christos                    isSymbolSubset(llCodeTable, nbSeq,
    849  1.1  christos                                   frame->stats.litlengthSymbolSet, 35)) {
    850  1.1  christos             /* maybe do repeat mode if we're allowed to */
    851  1.1  christos             LLtype = set_repeat;
    852  1.1  christos         } else if (mostFrequent == nbSeq) {
    853  1.1  christos             /* do RLE if we have the chance */
    854  1.1  christos             *op++ = llCodeTable[0];
    855  1.1  christos             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
    856  1.1  christos             LLtype = set_rle;
    857  1.1  christos         } else if (!(RAND(seed) & 3)) {
    858  1.1  christos             /* maybe use the default distribution */
    859  1.1  christos             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)));
    860  1.1  christos             LLtype = set_basic;
    861  1.1  christos         } else {
    862  1.1  christos             /* fall back on a full table */
    863  1.1  christos             size_t nbSeq_1 = nbSeq;
    864  1.1  christos             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
    865  1.1  christos             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
    866  1.1  christos             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    867  1.1  christos             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    868  1.1  christos               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    869  1.1  christos               op += NCountSize; }
    870  1.1  christos             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)));
    871  1.1  christos             LLtype = set_compressed;
    872  1.1  christos     }   }
    873  1.1  christos 
    874  1.1  christos     /* CTable for Offsets */
    875  1.1  christos     /* see Literal Lengths for descriptions of mode choices */
    876  1.1  christos     {   unsigned max = MaxOff;
    877  1.1  christos         size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    878  1.1  christos         assert(!HIST_isError(mostFrequent));
    879  1.1  christos         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    880  1.1  christos                    isSymbolSubset(ofCodeTable, nbSeq,
    881  1.1  christos                                   frame->stats.offsetSymbolSet, 28)) {
    882  1.1  christos             Offtype = set_repeat;
    883  1.1  christos         } else if (mostFrequent == nbSeq) {
    884  1.1  christos             *op++ = ofCodeTable[0];
    885  1.1  christos             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
    886  1.1  christos             Offtype = set_rle;
    887  1.1  christos         } else if (!(RAND(seed) & 3)) {
    888  1.1  christos             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
    889  1.1  christos             Offtype = set_basic;
    890  1.1  christos         } else {
    891  1.1  christos             size_t nbSeq_1 = nbSeq;
    892  1.1  christos             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
    893  1.1  christos             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
    894  1.1  christos             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    895  1.1  christos             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    896  1.1  christos               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    897  1.1  christos               op += NCountSize; }
    898  1.1  christos             FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
    899  1.1  christos             Offtype = set_compressed;
    900  1.1  christos     }   }
    901  1.1  christos 
    902  1.1  christos     /* CTable for MatchLengths */
    903  1.1  christos     /* see Literal Lengths for descriptions of mode choices */
    904  1.1  christos     {   unsigned max = MaxML;
    905  1.1  christos         size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
    906  1.1  christos         assert(!HIST_isError(mostFrequent));
    907  1.1  christos         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
    908  1.1  christos                    isSymbolSubset(mlCodeTable, nbSeq,
    909  1.1  christos                                   frame->stats.matchlengthSymbolSet, 52)) {
    910  1.1  christos             MLtype = set_repeat;
    911  1.1  christos         } else if (mostFrequent == nbSeq) {
    912  1.1  christos             *op++ = *mlCodeTable;
    913  1.1  christos             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
    914  1.1  christos             MLtype = set_rle;
    915  1.1  christos         } else if (!(RAND(seed) & 3)) {
    916  1.1  christos             /* sometimes do default distribution */
    917  1.1  christos             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
    918  1.1  christos             MLtype = set_basic;
    919  1.1  christos         } else {
    920  1.1  christos             /* fall back on table */
    921  1.1  christos             size_t nbSeq_1 = nbSeq;
    922  1.1  christos             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
    923  1.1  christos             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
    924  1.1  christos             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
    925  1.1  christos             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
    926  1.1  christos               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
    927  1.1  christos               op += NCountSize; }
    928  1.1  christos             FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
    929  1.1  christos             MLtype = set_compressed;
    930  1.1  christos     }   }
    931  1.1  christos     frame->stats.fseInit = 1;
    932  1.1  christos     initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
    933  1.1  christos     initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
    934  1.1  christos     initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
    935  1.1  christos 
    936  1.1  christos     DISPLAYLEVEL(5, "    LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype);
    937  1.1  christos 
    938  1.1  christos     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
    939  1.1  christos 
    940  1.1  christos     /* Encoding Sequences */
    941  1.1  christos     {   BIT_CStream_t blockStream;
    942  1.1  christos         FSE_CState_t  stateMatchLength;
    943  1.1  christos         FSE_CState_t  stateOffsetBits;
    944  1.1  christos         FSE_CState_t  stateLitLength;
    945  1.1  christos 
    946  1.1  christos         RETURN_ERROR_IF(
    947  1.1  christos             ERR_isError(BIT_initCStream(&blockStream, op, oend-op)),
    948  1.1  christos             dstSize_tooSmall, "not enough space remaining");
    949  1.1  christos 
    950  1.1  christos         /* first symbols */
    951  1.1  christos         FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
    952  1.1  christos         FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
    953  1.1  christos         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
    954  1.1  christos         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
    955  1.1  christos         if (MEM_32bits()) BIT_flushBits(&blockStream);
    956  1.1  christos         BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
    957  1.1  christos         if (MEM_32bits()) BIT_flushBits(&blockStream);
    958  1.1  christos         BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
    959  1.1  christos         BIT_flushBits(&blockStream);
    960  1.1  christos 
    961  1.1  christos         {   size_t n;
    962  1.1  christos             for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
    963  1.1  christos                 BYTE const llCode = llCodeTable[n];
    964  1.1  christos                 BYTE const ofCode = ofCodeTable[n];
    965  1.1  christos                 BYTE const mlCode = mlCodeTable[n];
    966  1.1  christos                 U32  const llBits = LL_bits[llCode];
    967  1.1  christos                 U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
    968  1.1  christos                 U32  const mlBits = ML_bits[mlCode];
    969  1.1  christos                                                                                 /* (7)*/  /* (7)*/
    970  1.1  christos                 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
    971  1.1  christos                 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
    972  1.1  christos                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
    973  1.1  christos                 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
    974  1.1  christos                 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
    975  1.1  christos                     BIT_flushBits(&blockStream);                                /* (7)*/
    976  1.1  christos                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
    977  1.1  christos                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
    978  1.1  christos                 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
    979  1.1  christos                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
    980  1.1  christos                 BIT_addBits(&blockStream, sequences[n].offBase, ofBits);         /* 31 */
    981  1.1  christos                 BIT_flushBits(&blockStream);                                    /* (7)*/
    982  1.1  christos         }   }
    983  1.1  christos 
    984  1.1  christos         FSE_flushCState(&blockStream, &stateMatchLength);
    985  1.1  christos         FSE_flushCState(&blockStream, &stateOffsetBits);
    986  1.1  christos         FSE_flushCState(&blockStream, &stateLitLength);
    987  1.1  christos 
    988  1.1  christos         {   size_t const streamSize = BIT_closeCStream(&blockStream);
    989  1.1  christos             if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
    990  1.1  christos             op += streamSize;
    991  1.1  christos     }   }
    992  1.1  christos 
    993  1.1  christos     frame->data = op;
    994  1.1  christos 
    995  1.1  christos     return 0;
    996  1.1  christos }
    997  1.1  christos 
    998  1.1  christos static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
    999  1.1  christos                                   size_t literalsSize, dictInfo info)
   1000  1.1  christos {
   1001  1.1  christos     seqStore_t seqStore;
   1002  1.1  christos     size_t numSequences;
   1003  1.1  christos 
   1004  1.1  christos 
   1005  1.1  christos     initSeqStore(&seqStore);
   1006  1.1  christos 
   1007  1.1  christos     /* randomly generate sequences */
   1008  1.1  christos     numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
   1009  1.1  christos     /* write them out to the frame data */
   1010  1.1  christos     CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
   1011  1.1  christos 
   1012  1.1  christos     return numSequences;
   1013  1.1  christos }
   1014  1.1  christos 
   1015  1.1  christos static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
   1016  1.1  christos {
   1017  1.1  christos     BYTE* const blockStart = (BYTE*)frame->data;
   1018  1.1  christos     size_t literalsSize;
   1019  1.1  christos     size_t nbSeq;
   1020  1.1  christos 
   1021  1.1  christos     DISPLAYLEVEL(4, "  compressed block:\n");
   1022  1.1  christos 
   1023  1.1  christos     literalsSize = writeLiteralsBlock(seed, frame, contentSize);
   1024  1.1  christos 
   1025  1.1  christos     DISPLAYLEVEL(4, "   literals size: %u\n", (unsigned)literalsSize);
   1026  1.1  christos 
   1027  1.1  christos     nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
   1028  1.1  christos 
   1029  1.1  christos     DISPLAYLEVEL(4, "   number of sequences: %u\n", (unsigned)nbSeq);
   1030  1.1  christos 
   1031  1.1  christos     return (BYTE*)frame->data - blockStart;
   1032  1.1  christos }
   1033  1.1  christos 
   1034  1.1  christos static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
   1035  1.1  christos                        int lastBlock, dictInfo info)
   1036  1.1  christos {
   1037  1.1  christos     int const blockTypeDesc = RAND(seed) % 8;
   1038  1.1  christos     size_t blockSize;
   1039  1.1  christos     int blockType;
   1040  1.1  christos 
   1041  1.1  christos     BYTE *const header = (BYTE*)frame->data;
   1042  1.1  christos     BYTE *op = header + 3;
   1043  1.1  christos 
   1044  1.1  christos     DISPLAYLEVEL(4, " block:\n");
   1045  1.1  christos     DISPLAYLEVEL(4, "  block content size: %u\n", (unsigned)contentSize);
   1046  1.1  christos     DISPLAYLEVEL(4, "  last block: %s\n", lastBlock ? "yes" : "no");
   1047  1.1  christos 
   1048  1.1  christos     if (blockTypeDesc == 0) {
   1049  1.1  christos         /* Raw data frame */
   1050  1.1  christos 
   1051  1.1  christos         RAND_buffer(seed, frame->src, contentSize);
   1052  1.1  christos         memcpy(op, frame->src, contentSize);
   1053  1.1  christos 
   1054  1.1  christos         op += contentSize;
   1055  1.1  christos         blockType = 0;
   1056  1.1  christos         blockSize = contentSize;
   1057  1.1  christos     } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
   1058  1.1  christos         /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
   1059  1.1  christos         BYTE const symbol = RAND(seed) & 0xff;
   1060  1.1  christos 
   1061  1.1  christos         op[0] = symbol;
   1062  1.1  christos         memset(frame->src, symbol, contentSize);
   1063  1.1  christos 
   1064  1.1  christos         op++;
   1065  1.1  christos         blockType = 1;
   1066  1.1  christos         blockSize = contentSize;
   1067  1.1  christos     } else {
   1068  1.1  christos         /* compressed, most common */
   1069  1.1  christos         size_t compressedSize;
   1070  1.1  christos         blockType = 2;
   1071  1.1  christos 
   1072  1.1  christos         frame->oldStats = frame->stats;
   1073  1.1  christos 
   1074  1.1  christos         frame->data = op;
   1075  1.1  christos         compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
   1076  1.1  christos         if (compressedSize >= contentSize) {   /* compressed block must be strictly smaller than uncompressed one */
   1077  1.1  christos             blockType = 0;
   1078  1.1  christos             memcpy(op, frame->src, contentSize);
   1079  1.1  christos 
   1080  1.1  christos             op += contentSize;
   1081  1.1  christos             blockSize = contentSize; /* fall back on raw block if data doesn't
   1082  1.1  christos                                         compress */
   1083  1.1  christos 
   1084  1.1  christos             frame->stats = frame->oldStats; /* don't update the stats */
   1085  1.1  christos         } else {
   1086  1.1  christos             op += compressedSize;
   1087  1.1  christos             blockSize = compressedSize;
   1088  1.1  christos         }
   1089  1.1  christos     }
   1090  1.1  christos     frame->src = (BYTE*)frame->src + contentSize;
   1091  1.1  christos 
   1092  1.1  christos     DISPLAYLEVEL(4, "  block type: %s\n", BLOCK_TYPES[blockType]);
   1093  1.1  christos     DISPLAYLEVEL(4, "  block size field: %u\n", (unsigned)blockSize);
   1094  1.1  christos 
   1095  1.1  christos     header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
   1096  1.1  christos     MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
   1097  1.1  christos 
   1098  1.1  christos     frame->data = op;
   1099  1.1  christos }
   1100  1.1  christos 
   1101  1.1  christos static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
   1102  1.1  christos {
   1103  1.1  christos     size_t contentLeft = frame->header.contentSize;
   1104  1.1  christos     size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
   1105  1.1  christos     while (1) {
   1106  1.1  christos         /* 1 in 4 chance of ending frame */
   1107  1.1  christos         int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
   1108  1.1  christos         size_t blockContentSize;
   1109  1.1  christos         if (lastBlock) {
   1110  1.1  christos             blockContentSize = contentLeft;
   1111  1.1  christos         } else {
   1112  1.1  christos             if (contentLeft > 0 && (RAND(seed) & 7)) {
   1113  1.1  christos                 /* some variable size block */
   1114  1.1  christos                 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
   1115  1.1  christos             } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
   1116  1.1  christos                 /* some full size block */
   1117  1.1  christos                 blockContentSize = maxBlockSize;
   1118  1.1  christos             } else {
   1119  1.1  christos                 /* some empty block */
   1120  1.1  christos                 blockContentSize = 0;
   1121  1.1  christos             }
   1122  1.1  christos         }
   1123  1.1  christos 
   1124  1.1  christos         writeBlock(seed, frame, blockContentSize, lastBlock, info);
   1125  1.1  christos 
   1126  1.1  christos         contentLeft -= blockContentSize;
   1127  1.1  christos         if (lastBlock) break;
   1128  1.1  christos     }
   1129  1.1  christos }
   1130  1.1  christos 
   1131  1.1  christos static void writeChecksum(frame_t* frame)
   1132  1.1  christos {
   1133  1.1  christos     /* write checksum so implementations can verify their output */
   1134  1.1  christos     U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
   1135  1.1  christos     DISPLAYLEVEL(3, "  checksum: %08x\n", (unsigned)digest);
   1136  1.1  christos     MEM_writeLE32(frame->data, (U32)digest);
   1137  1.1  christos     frame->data = (BYTE*)frame->data + 4;
   1138  1.1  christos }
   1139  1.1  christos 
   1140  1.1  christos static void outputBuffer(const void* buf, size_t size, const char* const path)
   1141  1.1  christos {
   1142  1.1  christos     /* write data out to file */
   1143  1.1  christos     const BYTE* ip = (const BYTE*)buf;
   1144  1.1  christos     FILE* out;
   1145  1.1  christos     if (path) {
   1146  1.1  christos         out = fopen(path, "wb");
   1147  1.1  christos     } else {
   1148  1.1  christos         out = stdout;
   1149  1.1  christos     }
   1150  1.1  christos     if (!out) {
   1151  1.1  christos         fprintf(stderr, "Failed to open file at %s: ", path);
   1152  1.1  christos         perror(NULL);
   1153  1.1  christos         exit(1);
   1154  1.1  christos     }
   1155  1.1  christos 
   1156  1.1  christos     {   size_t fsize = size;
   1157  1.1  christos         size_t written = 0;
   1158  1.1  christos         while (written < fsize) {
   1159  1.1  christos             written += fwrite(ip + written, 1, fsize - written, out);
   1160  1.1  christos             if (ferror(out)) {
   1161  1.1  christos                 fprintf(stderr, "Failed to write to file at %s: ", path);
   1162  1.1  christos                 perror(NULL);
   1163  1.1  christos                 exit(1);
   1164  1.1  christos             }
   1165  1.1  christos         }
   1166  1.1  christos     }
   1167  1.1  christos 
   1168  1.1  christos     if (path) {
   1169  1.1  christos         fclose(out);
   1170  1.1  christos     }
   1171  1.1  christos }
   1172  1.1  christos 
   1173  1.1  christos static void initFrame(frame_t* fr)
   1174  1.1  christos {
   1175  1.1  christos     memset(fr, 0, sizeof(*fr));
   1176  1.1  christos     fr->data = fr->dataStart = FRAME_BUFFER;
   1177  1.1  christos     fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
   1178  1.1  christos     fr->src = fr->srcStart = CONTENT_BUFFER;
   1179  1.1  christos     fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
   1180  1.1  christos 
   1181  1.1  christos     /* init repeat codes */
   1182  1.1  christos     fr->stats.rep[0] = 1;
   1183  1.1  christos     fr->stats.rep[1] = 4;
   1184  1.1  christos     fr->stats.rep[2] = 8;
   1185  1.1  christos }
   1186  1.1  christos 
   1187  1.1  christos /**
   1188  1.1  christos  * Generated a single zstd compressed block with no block/frame header.
   1189  1.1  christos  * Returns the final seed.
   1190  1.1  christos  */
   1191  1.1  christos static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
   1192  1.1  christos {
   1193  1.1  christos     size_t blockContentSize;
   1194  1.1  christos     int blockWritten = 0;
   1195  1.1  christos     BYTE* op;
   1196  1.1  christos     DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed);
   1197  1.1  christos     initFrame(frame);
   1198  1.1  christos     op = (BYTE*)frame->data;
   1199  1.1  christos 
   1200  1.1  christos     while (!blockWritten) {
   1201  1.1  christos         size_t cSize;
   1202  1.1  christos         /* generate window size */
   1203  1.1  christos         {   int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
   1204  1.1  christos             int const mantissa = RAND(&seed) % 8;
   1205  1.1  christos             frame->header.windowSize = (1U << (exponent + 10));
   1206  1.1  christos             frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
   1207  1.1  christos         }
   1208  1.1  christos 
   1209  1.1  christos         /* generate content size */
   1210  1.1  christos         {   size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
   1211  1.1  christos             if (RAND(&seed) & 15) {
   1212  1.1  christos                 /* some full size blocks */
   1213  1.1  christos                 blockContentSize = maxBlockSize;
   1214  1.1  christos             } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
   1215  1.1  christos                 /* some small blocks <= 128 bytes*/
   1216  1.1  christos                 blockContentSize = RAND(&seed) % (1U << 7);
   1217  1.1  christos             } else {
   1218  1.1  christos                 /* some variable size blocks */
   1219  1.1  christos                 blockContentSize = RAND(&seed) % maxBlockSize;
   1220  1.1  christos             }
   1221  1.1  christos         }
   1222  1.1  christos 
   1223  1.1  christos         /* try generating a compressed block */
   1224  1.1  christos         frame->oldStats = frame->stats;
   1225  1.1  christos         frame->data = op;
   1226  1.1  christos         cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
   1227  1.1  christos         if (cSize >= blockContentSize) {  /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
   1228  1.1  christos             /* data doesn't compress -- try again */
   1229  1.1  christos             frame->stats = frame->oldStats; /* don't update the stats */
   1230  1.1  christos             DISPLAYLEVEL(5, "   can't compress block : try again \n");
   1231  1.1  christos         } else {
   1232  1.1  christos             blockWritten = 1;
   1233  1.1  christos             DISPLAYLEVEL(4, "   block size: %u \n", (unsigned)cSize);
   1234  1.1  christos             frame->src = (BYTE*)frame->src + blockContentSize;
   1235  1.1  christos         }
   1236  1.1  christos     }
   1237  1.1  christos     return seed;
   1238  1.1  christos }
   1239  1.1  christos 
   1240  1.1  christos /* Return the final seed */
   1241  1.1  christos static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
   1242  1.1  christos {
   1243  1.1  christos     /* generate a complete frame */
   1244  1.1  christos     DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
   1245  1.1  christos     initFrame(fr);
   1246  1.1  christos 
   1247  1.1  christos     writeFrameHeader(&seed, fr, info);
   1248  1.1  christos     writeBlocks(&seed, fr, info);
   1249  1.1  christos     writeChecksum(fr);
   1250  1.1  christos 
   1251  1.1  christos     return seed;
   1252  1.1  christos }
   1253  1.1  christos 
   1254  1.1  christos /*_*******************************************************
   1255  1.1  christos *  Dictionary Helper Functions
   1256  1.1  christos *********************************************************/
   1257  1.1  christos /* returns 0 if successful, otherwise returns 1 upon error */
   1258  1.1  christos static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
   1259  1.1  christos {
   1260  1.1  christos     /* allocate space for samples */
   1261  1.1  christos     int ret = 0;
   1262  1.1  christos     unsigned const numSamples = 4;
   1263  1.1  christos     size_t sampleSizes[4];
   1264  1.1  christos     BYTE* const samples = malloc(5000*sizeof(BYTE));
   1265  1.1  christos     if (samples == NULL) {
   1266  1.1  christos         DISPLAY("Error: could not allocate space for samples\n");
   1267  1.1  christos         return 1;
   1268  1.1  christos     }
   1269  1.1  christos 
   1270  1.1  christos     /* generate samples */
   1271  1.1  christos     {   unsigned literalValue = 1;
   1272  1.1  christos         unsigned samplesPos = 0;
   1273  1.1  christos         size_t currSize = 1;
   1274  1.1  christos         while (literalValue <= 4) {
   1275  1.1  christos             sampleSizes[literalValue - 1] = currSize;
   1276  1.1  christos             {   size_t k;
   1277  1.1  christos                 for (k = 0; k < currSize; k++) {
   1278  1.1  christos                     *(samples + (samplesPos++)) = (BYTE)literalValue;
   1279  1.1  christos             }   }
   1280  1.1  christos             literalValue++;
   1281  1.1  christos             currSize *= 16;
   1282  1.1  christos     }   }
   1283  1.1  christos 
   1284  1.1  christos     {   size_t dictWriteSize = 0;
   1285  1.1  christos         ZDICT_params_t zdictParams;
   1286  1.1  christos         size_t const headerSize = MAX(dictSize/4, 256);
   1287  1.1  christos         size_t const dictContentSize = dictSize - headerSize;
   1288  1.1  christos         BYTE* const dictContent = fullDict + headerSize;
   1289  1.1  christos         if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
   1290  1.1  christos             DISPLAY("Error: dictionary size is too small\n");
   1291  1.1  christos             ret = 1;
   1292  1.1  christos             goto exitGenRandomDict;
   1293  1.1  christos         }
   1294  1.1  christos 
   1295  1.1  christos         /* init dictionary params */
   1296  1.1  christos         memset(&zdictParams, 0, sizeof(zdictParams));
   1297  1.1  christos         zdictParams.dictID = dictID;
   1298  1.1  christos         zdictParams.notificationLevel = 1;
   1299  1.1  christos 
   1300  1.1  christos         /* fill in dictionary content */
   1301  1.1  christos         RAND_buffer(&seed, (void*)dictContent, dictContentSize);
   1302  1.1  christos 
   1303  1.1  christos         /* finalize dictionary with random samples */
   1304  1.1  christos         dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
   1305  1.1  christos                                     dictContent, dictContentSize,
   1306  1.1  christos                                     samples, sampleSizes, numSamples,
   1307  1.1  christos                                     zdictParams);
   1308  1.1  christos 
   1309  1.1  christos         if (ZDICT_isError(dictWriteSize)) {
   1310  1.1  christos             DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
   1311  1.1  christos             ret = 1;
   1312  1.1  christos         }
   1313  1.1  christos     }
   1314  1.1  christos 
   1315  1.1  christos exitGenRandomDict:
   1316  1.1  christos     free(samples);
   1317  1.1  christos     return ret;
   1318  1.1  christos }
   1319  1.1  christos 
   1320  1.1  christos static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
   1321  1.1  christos     /* allocate space statically */
   1322  1.1  christos     dictInfo dictOp;
   1323  1.1  christos     memset(&dictOp, 0, sizeof(dictOp));
   1324  1.1  christos     dictOp.useDict = useDict;
   1325  1.1  christos     dictOp.dictContentSize = dictContentSize;
   1326  1.1  christos     dictOp.dictContent = dictContent;
   1327  1.1  christos     dictOp.dictID = dictID;
   1328  1.1  christos     return dictOp;
   1329  1.1  christos }
   1330  1.1  christos 
   1331  1.1  christos /*-*******************************************************
   1332  1.1  christos *  Test Mode
   1333  1.1  christos *********************************************************/
   1334  1.1  christos 
   1335  1.1  christos BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
   1336  1.1  christos 
   1337  1.1  christos static size_t testDecodeSimple(frame_t* fr)
   1338  1.1  christos {
   1339  1.1  christos     /* test decoding the generated data with the simple API */
   1340  1.1  christos     size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1341  1.1  christos                            fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
   1342  1.1  christos 
   1343  1.1  christos     if (ZSTD_isError(ret)) return ret;
   1344  1.1  christos 
   1345  1.1  christos     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
   1346  1.1  christos                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
   1347  1.1  christos         return ERROR(corruption_detected);
   1348  1.1  christos     }
   1349  1.1  christos 
   1350  1.1  christos     return ret;
   1351  1.1  christos }
   1352  1.1  christos 
   1353  1.1  christos static size_t testDecodeStreaming(frame_t* fr)
   1354  1.1  christos {
   1355  1.1  christos     /* test decoding the generated data with the streaming API */
   1356  1.1  christos     ZSTD_DStream* zd = ZSTD_createDStream();
   1357  1.1  christos     ZSTD_inBuffer in;
   1358  1.1  christos     ZSTD_outBuffer out;
   1359  1.1  christos     size_t ret;
   1360  1.1  christos 
   1361  1.1  christos     if (!zd) return ERROR(memory_allocation);
   1362  1.1  christos 
   1363  1.1  christos     in.src = fr->dataStart;
   1364  1.1  christos     in.pos = 0;
   1365  1.1  christos     in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
   1366  1.1  christos 
   1367  1.1  christos     out.dst = DECOMPRESSED_BUFFER;
   1368  1.1  christos     out.pos = 0;
   1369  1.1  christos     out.size = ZSTD_DStreamOutSize();
   1370  1.1  christos 
   1371  1.1  christos     ZSTD_initDStream(zd);
   1372  1.1  christos     while (1) {
   1373  1.1  christos         ret = ZSTD_decompressStream(zd, &out, &in);
   1374  1.1  christos         if (ZSTD_isError(ret)) goto cleanup; /* error */
   1375  1.1  christos         if (ret == 0) break; /* frame is done */
   1376  1.1  christos 
   1377  1.1  christos         /* force decoding to be done in chunks */
   1378  1.1  christos         out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
   1379  1.1  christos     }
   1380  1.1  christos 
   1381  1.1  christos     ret = out.pos;
   1382  1.1  christos 
   1383  1.1  christos     if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
   1384  1.1  christos         return ERROR(corruption_detected);
   1385  1.1  christos     }
   1386  1.1  christos 
   1387  1.1  christos cleanup:
   1388  1.1  christos     ZSTD_freeDStream(zd);
   1389  1.1  christos     return ret;
   1390  1.1  christos }
   1391  1.1  christos 
   1392  1.1  christos static size_t testDecodeWithDict(U32 seed, genType_e genType)
   1393  1.1  christos {
   1394  1.1  christos     /* create variables */
   1395  1.1  christos     size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
   1396  1.1  christos     U32 const dictID = RAND(&seed);
   1397  1.1  christos     size_t errorDetected = 0;
   1398  1.1  christos     BYTE* const fullDict = malloc(dictSize);
   1399  1.1  christos     if (fullDict == NULL) {
   1400  1.1  christos         return ERROR(GENERIC);
   1401  1.1  christos     }
   1402  1.1  christos 
   1403  1.1  christos     /* generate random dictionary */
   1404  1.1  christos     if (genRandomDict(dictID, seed, dictSize, fullDict)) {  /* return 0 on success */
   1405  1.1  christos         errorDetected = ERROR(GENERIC);
   1406  1.1  christos         goto dictTestCleanup;
   1407  1.1  christos     }
   1408  1.1  christos 
   1409  1.1  christos 
   1410  1.1  christos     {   frame_t fr;
   1411  1.1  christos         dictInfo info;
   1412  1.1  christos         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
   1413  1.1  christos         size_t ret;
   1414  1.1  christos 
   1415  1.1  christos         /* get dict info */
   1416  1.1  christos         {   size_t const headerSize = MAX(dictSize/4, 256);
   1417  1.1  christos             size_t const dictContentSize = dictSize-headerSize;
   1418  1.1  christos             BYTE* const dictContent = fullDict+headerSize;
   1419  1.1  christos             info = initDictInfo(1, dictContentSize, dictContent, dictID);
   1420  1.1  christos         }
   1421  1.1  christos 
   1422  1.1  christos         /* manually decompress and check difference */
   1423  1.1  christos         if (genType == gt_frame) {
   1424  1.1  christos             /* Test frame */
   1425  1.1  christos             generateFrame(seed, &fr, info);
   1426  1.1  christos             ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1427  1.1  christos                                             fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
   1428  1.1  christos                                             fullDict, dictSize);
   1429  1.1  christos         } else {
   1430  1.1  christos             /* Test block */
   1431  1.1  christos             generateCompressedBlock(seed, &fr, info);
   1432  1.1  christos             ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
   1433  1.1  christos             if (ZSTD_isError(ret)) {
   1434  1.1  christos                 errorDetected = ret;
   1435  1.1  christos                 ZSTD_freeDCtx(dctx);
   1436  1.1  christos                 goto dictTestCleanup;
   1437  1.1  christos             }
   1438  1.1  christos             ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1439  1.1  christos                                        fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
   1440  1.1  christos         }
   1441  1.1  christos         ZSTD_freeDCtx(dctx);
   1442  1.1  christos 
   1443  1.1  christos         if (ZSTD_isError(ret)) {
   1444  1.1  christos             errorDetected = ret;
   1445  1.1  christos             goto dictTestCleanup;
   1446  1.1  christos         }
   1447  1.1  christos 
   1448  1.1  christos         if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
   1449  1.1  christos             errorDetected = ERROR(corruption_detected);
   1450  1.1  christos             goto dictTestCleanup;
   1451  1.1  christos         }
   1452  1.1  christos     }
   1453  1.1  christos 
   1454  1.1  christos dictTestCleanup:
   1455  1.1  christos     free(fullDict);
   1456  1.1  christos     return errorDetected;
   1457  1.1  christos }
   1458  1.1  christos 
   1459  1.1  christos static size_t testDecodeRawBlock(frame_t* fr)
   1460  1.1  christos {
   1461  1.1  christos     ZSTD_DCtx* dctx = ZSTD_createDCtx();
   1462  1.1  christos     size_t ret = ZSTD_decompressBegin(dctx);
   1463  1.1  christos     if (ZSTD_isError(ret)) return ret;
   1464  1.1  christos 
   1465  1.1  christos     ret = ZSTD_decompressBlock_deprecated(
   1466  1.1  christos             dctx,
   1467  1.1  christos             DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
   1468  1.1  christos             fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
   1469  1.1  christos     ZSTD_freeDCtx(dctx);
   1470  1.1  christos     if (ZSTD_isError(ret)) return ret;
   1471  1.1  christos 
   1472  1.1  christos     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
   1473  1.1  christos                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
   1474  1.1  christos         return ERROR(corruption_detected);
   1475  1.1  christos     }
   1476  1.1  christos 
   1477  1.1  christos     return ret;
   1478  1.1  christos }
   1479  1.1  christos 
   1480  1.1  christos static int runBlockTest(U32* seed)
   1481  1.1  christos {
   1482  1.1  christos     frame_t fr;
   1483  1.1  christos     U32 const seedCopy = *seed;
   1484  1.1  christos     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1485  1.1  christos         *seed = generateCompressedBlock(*seed, &fr, info);
   1486  1.1  christos     }
   1487  1.1  christos 
   1488  1.1  christos     {   size_t const r = testDecodeRawBlock(&fr);
   1489  1.1  christos         if (ZSTD_isError(r)) {
   1490  1.1  christos             DISPLAY("Error in block mode on test seed %u: %s\n",
   1491  1.1  christos                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1492  1.1  christos             return 1;
   1493  1.1  christos         }
   1494  1.1  christos     }
   1495  1.1  christos 
   1496  1.1  christos     {   size_t const r = testDecodeWithDict(*seed, gt_block);
   1497  1.1  christos         if (ZSTD_isError(r)) {
   1498  1.1  christos             DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
   1499  1.1  christos                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1500  1.1  christos             return 1;
   1501  1.1  christos         }
   1502  1.1  christos     }
   1503  1.1  christos     return 0;
   1504  1.1  christos }
   1505  1.1  christos 
   1506  1.1  christos static int runFrameTest(U32* seed)
   1507  1.1  christos {
   1508  1.1  christos     frame_t fr;
   1509  1.1  christos     U32 const seedCopy = *seed;
   1510  1.1  christos     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1511  1.1  christos         *seed = generateFrame(*seed, &fr, info);
   1512  1.1  christos     }
   1513  1.1  christos 
   1514  1.1  christos     {   size_t const r = testDecodeSimple(&fr);
   1515  1.1  christos         if (ZSTD_isError(r)) {
   1516  1.1  christos             DISPLAY("Error in simple mode on test seed %u: %s\n",
   1517  1.1  christos                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1518  1.1  christos             return 1;
   1519  1.1  christos         }
   1520  1.1  christos     }
   1521  1.1  christos     {   size_t const r = testDecodeStreaming(&fr);
   1522  1.1  christos         if (ZSTD_isError(r)) {
   1523  1.1  christos             DISPLAY("Error in streaming mode on test seed %u: %s\n",
   1524  1.1  christos                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1525  1.1  christos             return 1;
   1526  1.1  christos         }
   1527  1.1  christos     }
   1528  1.1  christos     {   size_t const r = testDecodeWithDict(*seed, gt_frame);  /* avoid big dictionaries */
   1529  1.1  christos         if (ZSTD_isError(r)) {
   1530  1.1  christos             DISPLAY("Error in dictionary mode on test seed %u: %s\n",
   1531  1.1  christos                     (unsigned)seedCopy, ZSTD_getErrorName(r));
   1532  1.1  christos             return 1;
   1533  1.1  christos         }
   1534  1.1  christos     }
   1535  1.1  christos     return 0;
   1536  1.1  christos }
   1537  1.1  christos 
   1538  1.1  christos static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
   1539  1.1  christos                        genType_e genType)
   1540  1.1  christos {
   1541  1.1  christos     unsigned fnum;
   1542  1.1  christos 
   1543  1.1  christos     UTIL_time_t const startClock = UTIL_getTime();
   1544  1.1  christos     U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
   1545  1.1  christos 
   1546  1.1  christos     if (numFiles == 0 && !testDurationS) numFiles = 1;
   1547  1.1  christos 
   1548  1.1  christos     DISPLAY("seed: %u\n", (unsigned)seed);
   1549  1.1  christos 
   1550  1.1  christos     for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
   1551  1.1  christos         if (fnum < numFiles)
   1552  1.1  christos             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1553  1.1  christos         else
   1554  1.1  christos             DISPLAYUPDATE("\r%u           ", fnum);
   1555  1.1  christos 
   1556  1.1  christos         {   int const ret = (genType == gt_frame) ?
   1557  1.1  christos                             runFrameTest(&seed) :
   1558  1.1  christos                             runBlockTest(&seed);
   1559  1.1  christos             if (ret) return ret;
   1560  1.1  christos         }
   1561  1.1  christos     }
   1562  1.1  christos 
   1563  1.1  christos     DISPLAY("\r%u tests completed: ", fnum);
   1564  1.1  christos     DISPLAY("OK\n");
   1565  1.1  christos 
   1566  1.1  christos     return 0;
   1567  1.1  christos }
   1568  1.1  christos 
   1569  1.1  christos /*-*******************************************************
   1570  1.1  christos *  File I/O
   1571  1.1  christos *********************************************************/
   1572  1.1  christos 
   1573  1.1  christos static int generateFile(U32 seed, const char* const path,
   1574  1.1  christos                         const char* const origPath, genType_e genType)
   1575  1.1  christos {
   1576  1.1  christos     frame_t fr;
   1577  1.1  christos 
   1578  1.1  christos     DISPLAY("seed: %u\n", (unsigned)seed);
   1579  1.1  christos 
   1580  1.1  christos     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1581  1.1  christos         if (genType == gt_frame) {
   1582  1.1  christos             generateFrame(seed, &fr, info);
   1583  1.1  christos         } else {
   1584  1.1  christos             generateCompressedBlock(seed, &fr, info);
   1585  1.1  christos         }
   1586  1.1  christos     }
   1587  1.1  christos     outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
   1588  1.1  christos     if (origPath) {
   1589  1.1  christos         outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
   1590  1.1  christos     }
   1591  1.1  christos     return 0;
   1592  1.1  christos }
   1593  1.1  christos 
   1594  1.1  christos static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
   1595  1.1  christos                           const char* const origPath, genType_e genType)
   1596  1.1  christos {
   1597  1.1  christos     char outPath[MAX_PATH];
   1598  1.1  christos     unsigned fnum;
   1599  1.1  christos 
   1600  1.1  christos     DISPLAY("seed: %u\n", (unsigned)seed);
   1601  1.1  christos 
   1602  1.1  christos     for (fnum = 0; fnum < numFiles; fnum++) {
   1603  1.1  christos         frame_t fr;
   1604  1.1  christos 
   1605  1.1  christos         DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1606  1.1  christos 
   1607  1.1  christos         {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
   1608  1.1  christos             if (genType == gt_frame) {
   1609  1.1  christos                 seed = generateFrame(seed, &fr, info);
   1610  1.1  christos             } else {
   1611  1.1  christos                 seed = generateCompressedBlock(seed, &fr, info);
   1612  1.1  christos             }
   1613  1.1  christos         }
   1614  1.1  christos 
   1615  1.1  christos         if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
   1616  1.1  christos             DISPLAY("Error: path too long\n");
   1617  1.1  christos             return 1;
   1618  1.1  christos         }
   1619  1.1  christos         outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
   1620  1.1  christos 
   1621  1.1  christos         if (origPath) {
   1622  1.1  christos             if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
   1623  1.1  christos                 DISPLAY("Error: path too long\n");
   1624  1.1  christos                 return 1;
   1625  1.1  christos             }
   1626  1.1  christos             outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
   1627  1.1  christos         }
   1628  1.1  christos     }
   1629  1.1  christos 
   1630  1.1  christos     DISPLAY("\r%u/%u      \n", fnum, numFiles);
   1631  1.1  christos 
   1632  1.1  christos     return 0;
   1633  1.1  christos }
   1634  1.1  christos 
   1635  1.1  christos static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
   1636  1.1  christos                                   const char* const origPath, const size_t dictSize,
   1637  1.1  christos                                   genType_e genType)
   1638  1.1  christos {
   1639  1.1  christos     char outPath[MAX_PATH];
   1640  1.1  christos     BYTE* fullDict;
   1641  1.1  christos     U32 const dictID = RAND(&seed);
   1642  1.1  christos     int errorDetected = 0;
   1643  1.1  christos 
   1644  1.1  christos     if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
   1645  1.1  christos         DISPLAY("Error: path too long\n");
   1646  1.1  christos         return 1;
   1647  1.1  christos     }
   1648  1.1  christos 
   1649  1.1  christos     /* allocate space for the dictionary */
   1650  1.1  christos     fullDict = malloc(dictSize);
   1651  1.1  christos     if (fullDict == NULL) {
   1652  1.1  christos         DISPLAY("Error: could not allocate space for full dictionary.\n");
   1653  1.1  christos         return 1;
   1654  1.1  christos     }
   1655  1.1  christos 
   1656  1.1  christos     /* randomly generate the dictionary */
   1657  1.1  christos     {   int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
   1658  1.1  christos         if (ret != 0) {
   1659  1.1  christos             errorDetected = ret;
   1660  1.1  christos             goto dictCleanup;
   1661  1.1  christos         }
   1662  1.1  christos     }
   1663  1.1  christos 
   1664  1.1  christos     /* write out dictionary */
   1665  1.1  christos     if (numFiles != 0) {
   1666  1.1  christos         if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
   1667  1.1  christos             DISPLAY("Error: dictionary path too long\n");
   1668  1.1  christos             errorDetected = 1;
   1669  1.1  christos             goto dictCleanup;
   1670  1.1  christos         }
   1671  1.1  christos         outputBuffer(fullDict, dictSize, outPath);
   1672  1.1  christos     }
   1673  1.1  christos     else {
   1674  1.1  christos         outputBuffer(fullDict, dictSize, "dictionary");
   1675  1.1  christos     }
   1676  1.1  christos 
   1677  1.1  christos     /* generate random compressed/decompressed files */
   1678  1.1  christos     {   unsigned fnum;
   1679  1.1  christos         for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
   1680  1.1  christos             frame_t fr;
   1681  1.1  christos             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
   1682  1.1  christos             {
   1683  1.1  christos                 size_t const headerSize = MAX(dictSize/4, 256);
   1684  1.1  christos                 size_t const dictContentSize = dictSize-headerSize;
   1685  1.1  christos                 BYTE* const dictContent = fullDict+headerSize;
   1686  1.1  christos                 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
   1687  1.1  christos                 if (genType == gt_frame) {
   1688  1.1  christos                     seed = generateFrame(seed, &fr, info);
   1689  1.1  christos                 } else {
   1690  1.1  christos                     seed = generateCompressedBlock(seed, &fr, info);
   1691  1.1  christos                 }
   1692  1.1  christos             }
   1693  1.1  christos 
   1694  1.1  christos             if (numFiles != 0) {
   1695  1.1  christos                 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
   1696  1.1  christos                     DISPLAY("Error: path too long\n");
   1697  1.1  christos                     errorDetected = 1;
   1698  1.1  christos                     goto dictCleanup;
   1699  1.1  christos                 }
   1700  1.1  christos                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
   1701  1.1  christos 
   1702  1.1  christos                 if (origPath) {
   1703  1.1  christos                     if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
   1704  1.1  christos                         DISPLAY("Error: path too long\n");
   1705  1.1  christos                         errorDetected = 1;
   1706  1.1  christos                         goto dictCleanup;
   1707  1.1  christos                     }
   1708  1.1  christos                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
   1709  1.1  christos                 }
   1710  1.1  christos             }
   1711  1.1  christos             else {
   1712  1.1  christos                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
   1713  1.1  christos                 if (origPath) {
   1714  1.1  christos                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
   1715  1.1  christos                 }
   1716  1.1  christos             }
   1717  1.1  christos         }
   1718  1.1  christos     }
   1719  1.1  christos 
   1720  1.1  christos dictCleanup:
   1721  1.1  christos     free(fullDict);
   1722  1.1  christos     return errorDetected;
   1723  1.1  christos }
   1724  1.1  christos 
   1725  1.1  christos 
   1726  1.1  christos /*_*******************************************************
   1727  1.1  christos *  Command line
   1728  1.1  christos *********************************************************/
   1729  1.1  christos static U32 makeSeed(void)
   1730  1.1  christos {
   1731  1.1  christos     U32 t = (U32) time(NULL);
   1732  1.1  christos     return XXH32(&t, sizeof(t), 0) % 65536;
   1733  1.1  christos }
   1734  1.1  christos 
   1735  1.1  christos static unsigned readInt(const char** argument)
   1736  1.1  christos {
   1737  1.1  christos     unsigned val = 0;
   1738  1.1  christos     while ((**argument>='0') && (**argument<='9')) {
   1739  1.1  christos         val *= 10;
   1740  1.1  christos         val += **argument - '0';
   1741  1.1  christos         (*argument)++;
   1742  1.1  christos     }
   1743  1.1  christos     return val;
   1744  1.1  christos }
   1745  1.1  christos 
   1746  1.1  christos static void usage(const char* programName)
   1747  1.1  christos {
   1748  1.1  christos     DISPLAY( "Usage :\n");
   1749  1.1  christos     DISPLAY( "      %s [args]\n", programName);
   1750  1.1  christos     DISPLAY( "\n");
   1751  1.1  christos     DISPLAY( "Arguments :\n");
   1752  1.1  christos     DISPLAY( " -p<path> : select output path (default:stdout)\n");
   1753  1.1  christos     DISPLAY( "                in multiple files mode this should be a directory\n");
   1754  1.1  christos     DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
   1755  1.1  christos     DISPLAY( "                in multiple files mode this should be a directory\n");
   1756  1.1  christos     DISPLAY( " -s#      : select seed (default:random based on time)\n");
   1757  1.1  christos     DISPLAY( " -n#      : number of files to generate (default:1)\n");
   1758  1.1  christos     DISPLAY( " -t       : activate test mode (test files against libzstd instead of outputting them)\n");
   1759  1.1  christos     DISPLAY( " -T#      : length of time to run tests for\n");
   1760  1.1  christos     DISPLAY( " -v       : increase verbosity level (default:0, max:7)\n");
   1761  1.1  christos     DISPLAY( " -h/H     : display help/long help and exit\n");
   1762  1.1  christos }
   1763  1.1  christos 
   1764  1.1  christos static void advancedUsage(const char* programName)
   1765  1.1  christos {
   1766  1.1  christos     usage(programName);
   1767  1.1  christos     DISPLAY( "\n");
   1768  1.1  christos     DISPLAY( "Advanced arguments        :\n");
   1769  1.1  christos     DISPLAY( " --content-size           : always include the content size in the frame header\n");
   1770  1.1  christos     DISPLAY( " --use-dict=#             : include a dictionary used to decompress the corpus\n");
   1771  1.1  christos     DISPLAY( " --gen-blocks             : generate raw compressed blocks without block/frame headers\n");
   1772  1.1  christos     DISPLAY( " --max-block-size-log=#   : max block size log, must be in range [2, 17]\n");
   1773  1.1  christos     DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
   1774  1.1  christos     DISPLAY( "                            (this is ignored with gen-blocks)\n");
   1775  1.1  christos }
   1776  1.1  christos 
   1777  1.1  christos /*! readU32FromChar() :
   1778  1.1  christos     @return : unsigned integer value read from input in `char` format
   1779  1.1  christos     allows and interprets K, KB, KiB, M, MB and MiB suffix.
   1780  1.1  christos     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
   1781  1.1  christos     Note : function result can overflow if digit string > MAX_UINT */
   1782  1.1  christos static unsigned readU32FromChar(const char** stringPtr)
   1783  1.1  christos {
   1784  1.1  christos     unsigned result = 0;
   1785  1.1  christos     while ((**stringPtr >='0') && (**stringPtr <='9'))
   1786  1.1  christos         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
   1787  1.1  christos     if ((**stringPtr=='K') || (**stringPtr=='M')) {
   1788  1.1  christos         result <<= 10;
   1789  1.1  christos         if (**stringPtr=='M') result <<= 10;
   1790  1.1  christos         (*stringPtr)++ ;
   1791  1.1  christos         if (**stringPtr=='i') (*stringPtr)++;
   1792  1.1  christos         if (**stringPtr=='B') (*stringPtr)++;
   1793  1.1  christos     }
   1794  1.1  christos     return result;
   1795  1.1  christos }
   1796  1.1  christos 
   1797  1.1  christos /** longCommandWArg() :
   1798  1.1  christos  *  check if *stringPtr is the same as longCommand.
   1799  1.1  christos  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
   1800  1.1  christos  *  @return 0 and doesn't modify *stringPtr otherwise.
   1801  1.1  christos  */
   1802  1.1  christos static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
   1803  1.1  christos {
   1804  1.1  christos     size_t const comSize = strlen(longCommand);
   1805  1.1  christos     int const result = !strncmp(*stringPtr, longCommand, comSize);
   1806  1.1  christos     if (result) *stringPtr += comSize;
   1807  1.1  christos     return result;
   1808  1.1  christos }
   1809  1.1  christos 
   1810  1.1  christos int main(int argc, char** argv)
   1811  1.1  christos {
   1812  1.1  christos     U32 seed = 0;
   1813  1.1  christos     int seedset = 0;
   1814  1.1  christos     unsigned numFiles = 0;
   1815  1.1  christos     unsigned testDuration = 0;
   1816  1.1  christos     int testMode = 0;
   1817  1.1  christos     const char* path = NULL;
   1818  1.1  christos     const char* origPath = NULL;
   1819  1.1  christos     int useDict = 0;
   1820  1.1  christos     unsigned dictSize = (10 << 10); /* 10 kB default */
   1821  1.1  christos     genType_e genType = gt_frame;
   1822  1.1  christos 
   1823  1.1  christos     int argNb;
   1824  1.1  christos 
   1825  1.1  christos     /* Check command line */
   1826  1.1  christos     for (argNb=1; argNb<argc; argNb++) {
   1827  1.1  christos         const char* argument = argv[argNb];
   1828  1.1  christos         if(!argument) continue;   /* Protection if argument empty */
   1829  1.1  christos 
   1830  1.1  christos         /* Handle commands. Aggregated commands are allowed */
   1831  1.1  christos         if (argument[0]=='-') {
   1832  1.1  christos             argument++;
   1833  1.1  christos             while (*argument!=0) {
   1834  1.1  christos                 switch(*argument)
   1835  1.1  christos                 {
   1836  1.1  christos                 case 'h':
   1837  1.1  christos                     usage(argv[0]);
   1838  1.1  christos                     return 0;
   1839  1.1  christos                 case 'H':
   1840  1.1  christos                     advancedUsage(argv[0]);
   1841  1.1  christos                     return 0;
   1842  1.1  christos                 case 'v':
   1843  1.1  christos                     argument++;
   1844  1.1  christos                     g_displayLevel++;
   1845  1.1  christos                     break;
   1846  1.1  christos                 case 's':
   1847  1.1  christos                     argument++;
   1848  1.1  christos                     seedset=1;
   1849  1.1  christos                     seed = readInt(&argument);
   1850  1.1  christos                     break;
   1851  1.1  christos                 case 'n':
   1852  1.1  christos                     argument++;
   1853  1.1  christos                     numFiles = readInt(&argument);
   1854  1.1  christos                     break;
   1855  1.1  christos                 case 'T':
   1856  1.1  christos                     argument++;
   1857  1.1  christos                     testDuration = readInt(&argument);
   1858  1.1  christos                     if (*argument == 'm') {
   1859  1.1  christos                         testDuration *= 60;
   1860  1.1  christos                         argument++;
   1861  1.1  christos                         if (*argument == 'n') argument++;
   1862  1.1  christos                     }
   1863  1.1  christos                     break;
   1864  1.1  christos                 case 'o':
   1865  1.1  christos                     argument++;
   1866  1.1  christos                     origPath = argument;
   1867  1.1  christos                     argument += strlen(argument);
   1868  1.1  christos                     break;
   1869  1.1  christos                 case 'p':
   1870  1.1  christos                     argument++;
   1871  1.1  christos                     path = argument;
   1872  1.1  christos                     argument += strlen(argument);
   1873  1.1  christos                     break;
   1874  1.1  christos                 case 't':
   1875  1.1  christos                     argument++;
   1876  1.1  christos                     testMode = 1;
   1877  1.1  christos                     break;
   1878  1.1  christos                 case '-':
   1879  1.1  christos                     argument++;
   1880  1.1  christos                     if (strcmp(argument, "content-size") == 0) {
   1881  1.1  christos                         opts.contentSize = 1;
   1882  1.1  christos                     } else if (longCommandWArg(&argument, "use-dict=")) {
   1883  1.1  christos                         dictSize = readU32FromChar(&argument);
   1884  1.1  christos                         useDict = 1;
   1885  1.1  christos                     } else if (strcmp(argument, "gen-blocks") == 0) {
   1886  1.1  christos                         genType = gt_block;
   1887  1.1  christos                     } else if (longCommandWArg(&argument, "max-block-size-log=")) {
   1888  1.1  christos                         U32 value = readU32FromChar(&argument);
   1889  1.1  christos                         if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
   1890  1.1  christos                             g_maxBlockSize = 1U << value;
   1891  1.1  christos                         }
   1892  1.1  christos                     } else if (longCommandWArg(&argument, "max-content-size-log=")) {
   1893  1.1  christos                         U32 value = readU32FromChar(&argument);
   1894  1.1  christos                         g_maxDecompressedSizeLog =
   1895  1.1  christos                                 MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
   1896  1.1  christos                     } else {
   1897  1.1  christos                         advancedUsage(argv[0]);
   1898  1.1  christos                         return 1;
   1899  1.1  christos                     }
   1900  1.1  christos                     argument += strlen(argument);
   1901  1.1  christos                     break;
   1902  1.1  christos                 default:
   1903  1.1  christos                     usage(argv[0]);
   1904  1.1  christos                     return 1;
   1905  1.1  christos     }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
   1906  1.1  christos 
   1907  1.1  christos     if (!seedset) {
   1908  1.1  christos         seed = makeSeed();
   1909  1.1  christos     }
   1910  1.1  christos 
   1911  1.1  christos     if (testMode) {
   1912  1.1  christos         return runTestMode(seed, numFiles, testDuration, genType);
   1913  1.1  christos     } else {
   1914  1.1  christos         if (testDuration) {
   1915  1.1  christos             DISPLAY("Error: -T requires test mode (-t)\n\n");
   1916  1.1  christos             usage(argv[0]);
   1917  1.1  christos             return 1;
   1918  1.1  christos         }
   1919  1.1  christos     }
   1920  1.1  christos 
   1921  1.1  christos     if (!path) {
   1922  1.1  christos         DISPLAY("Error: path is required in file generation mode\n");
   1923  1.1  christos         usage(argv[0]);
   1924  1.1  christos         return 1;
   1925  1.1  christos     }
   1926  1.1  christos 
   1927  1.1  christos     if (numFiles == 0 && useDict == 0) {
   1928  1.1  christos         return generateFile(seed, path, origPath, genType);
   1929  1.1  christos     } else if (useDict == 0){
   1930  1.1  christos         return generateCorpus(seed, numFiles, path, origPath, genType);
   1931  1.1  christos     } else {
   1932  1.1  christos         /* should generate files with a dictionary */
   1933  1.1  christos         return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
   1934  1.1  christos     }
   1935  1.1  christos 
   1936  1.1  christos }
   1937