Home | History | Annotate | Line # | Download | only in fuzz
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 /**
     12  * This fuzz target performs a zstd round-trip test by generating an arbitrary
     13  * array of sequences, generating the associated source buffer, calling
     14  * ZSTD_compressSequences(), and then decompresses and compares the result with
     15  * the original generated source buffer.
     16  */
     17 
     18 #define ZSTD_STATIC_LINKING_ONLY
     19 
     20 #include <stddef.h>
     21 #include <stdlib.h>
     22 #include <stdio.h>
     23 #include <string.h>
     24 #include <time.h>
     25 #include "fuzz_helpers.h"
     26 #include "zstd_helpers.h"
     27 #include "fuzz_data_producer.h"
     28 #include "fuzz_third_party_seq_prod.h"
     29 
     30 static ZSTD_CCtx* cctx = NULL;
     31 static ZSTD_DCtx* dctx = NULL;
     32 static void* literalsBuffer = NULL;
     33 static void* generatedSrc = NULL;
     34 static ZSTD_Sequence* generatedSequences = NULL;
     35 
     36 static void* dictBuffer = NULL;
     37 static ZSTD_CDict* cdict = NULL;
     38 static ZSTD_DDict* ddict = NULL;
     39 
     40 #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
     41 #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
     42 #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
     43 #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
     44 #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
     45 
     46 /* Deterministic random number generator */
     47 #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
     48 static uint32_t FUZZ_RDG_rand(uint32_t* src)
     49 {
     50     static const uint32_t prime1 = 2654435761U;
     51     static const uint32_t prime2 = 2246822519U;
     52     uint32_t rand32 = *src;
     53     rand32 *= prime1;
     54     rand32 ^= prime2;
     55     rand32  = FUZZ_RDG_rotl32(rand32, 13);
     56     *src = rand32;
     57     return rand32 >> 5;
     58 }
     59 
     60 /* Make a pseudorandom string - this simple function exists to avoid
     61  * taking a dependency on datagen.h to have RDG_genBuffer().
     62  */
     63 static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
     64     const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
     65     uint32_t seed = FUZZ_dataProducer_uint32(producer);
     66     if (size) {
     67         for (size_t n = 0; n < size; n++) {
     68             int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
     69             str[n] = charset[key];
     70         }
     71     }
     72     return str;
     73 }
     74 
     75 /* Returns size of source buffer */
     76 static size_t decodeSequences(void* dst, size_t nbSequences,
     77                               size_t literalsSize,
     78                               const void* dict, size_t dictSize,
     79                               ZSTD_sequenceFormat_e mode)
     80 {
     81     const uint8_t* litPtr = literalsBuffer;
     82     const uint8_t* const litBegin = literalsBuffer;
     83     const uint8_t* const litEnd = litBegin + literalsSize;
     84     const uint8_t* dictPtr = dict;
     85     uint8_t* op = dst;
     86     const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
     87     size_t generatedSrcBufferSize = 0;
     88     size_t bytesWritten = 0;
     89 
     90     for (size_t i = 0; i < nbSequences; ++i) {
     91         /* block boundary */
     92         if (generatedSequences[i].offset == 0)
     93             FUZZ_ASSERT(generatedSequences[i].matchLength == 0);
     94 
     95         if (litPtr + generatedSequences[i].litLength > litEnd) {
     96             litPtr = litBegin;
     97         }
     98         memcpy(op, litPtr, generatedSequences[i].litLength);
     99         bytesWritten += generatedSequences[i].litLength;
    100         op += generatedSequences[i].litLength;
    101         litPtr += generatedSequences[i].litLength;
    102 
    103         /* Copy over the match */
    104         {   size_t matchLength = generatedSequences[i].matchLength;
    105             size_t j = 0;
    106             size_t k = 0;
    107             if (dictSize != 0) {
    108                 if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
    109                     size_t dictOffset = generatedSequences[i].offset - bytesWritten;
    110                     size_t matchInDict = MIN(matchLength, dictOffset);
    111                     for (; k < matchInDict; ++k) {
    112                         op[k] = dictPtr[dictSize - dictOffset + k];
    113                     }
    114                     matchLength -= matchInDict;
    115                     op += matchInDict;
    116                 }
    117             }
    118             for (; j < matchLength; ++j) {
    119                 op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
    120             }
    121             op += j;
    122             FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
    123             bytesWritten += generatedSequences[i].matchLength;
    124         }
    125     }
    126     generatedSrcBufferSize = bytesWritten;
    127     FUZZ_ASSERT(litPtr <= litEnd);
    128     if (mode == ZSTD_sf_noBlockDelimiters) {
    129         const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
    130         if (lastLLSize <= oend - op) {
    131             memcpy(op, litPtr, lastLLSize);
    132             generatedSrcBufferSize += lastLLSize;
    133     }   }
    134     return generatedSrcBufferSize;
    135 }
    136 
    137 /* Returns nb sequences generated
    138  * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
    139  * However, it can fail with ZSTD_sf_explicitBlockDelimiters,
    140  * due to potential lack of space in
    141  */
    142 static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
    143                                       size_t literalsSizeLimit, size_t dictSize,
    144                                       size_t windowLog, ZSTD_sequenceFormat_e mode)
    145 {
    146     const uint32_t repCode = 0;  /* not used by sequence ingestion api */
    147     size_t windowSize = 1ULL << windowLog;
    148     size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
    149     uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
    150     uint32_t bytesGenerated = 0;
    151     uint32_t nbSeqGenerated = 0;
    152     uint32_t isFirstSequence = 1;
    153     uint32_t blockSize = 0;
    154 
    155     if (mode == ZSTD_sf_explicitBlockDelimiters) {
    156         /* ensure that no sequence can be larger than one block */
    157         literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
    158         matchLengthMax = MIN(matchLengthMax, blockSizeMax/2);
    159     }
    160 
    161     while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
    162          && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
    163          && !FUZZ_dataProducer_empty(producer)) {
    164         uint32_t matchLength;
    165         uint32_t matchBound = matchLengthMax;
    166         uint32_t offset;
    167         uint32_t offsetBound;
    168         const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
    169         const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
    170         bytesGenerated += litLength;
    171         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
    172             break;
    173         }
    174         offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize;
    175         offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
    176         if (dictSize > 0 && bytesGenerated <= windowSize) {
    177             /* Prevent match length from being such that it would be associated with an offset too large
    178              * from the decoder's perspective. If not possible (match would be too small),
    179              * then reduce the offset if necessary.
    180              */
    181             const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
    182             if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
    183                 const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
    184                 offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
    185             } else {
    186                 matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
    187             }
    188         }
    189         matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
    190         bytesGenerated += matchLength;
    191         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
    192             break;
    193         }
    194         {   ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
    195             const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
    196             #define SPLITPROB 6000
    197             #define SPLITMARK 5234
    198             const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
    199             if (mode == ZSTD_sf_explicitBlockDelimiters) {
    200                 const size_t seqSize = seq.litLength + seq.matchLength;
    201                 if (blockSize + seqSize > blockSizeMax) {  /* reaching limit : must end block now */
    202                     const ZSTD_Sequence endBlock = {0, 0, 0, 0};
    203                     generatedSequences[nbSeqGenerated++] = endBlock;
    204                     blockSize = seqSize;
    205                 }
    206                 if (split) {
    207                     const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
    208                     generatedSequences[nbSeqGenerated++] = endBlock;
    209                     assert(lastLits <= seq.litLength);
    210                     seq.litLength -= lastLits;
    211                     blockSize = seqSize - lastLits;
    212                 } else {
    213                     blockSize += seqSize;
    214                 }
    215             }
    216             generatedSequences[nbSeqGenerated++] = seq;
    217             isFirstSequence = 0;
    218         }
    219     }
    220 
    221     if (mode == ZSTD_sf_explicitBlockDelimiters) {
    222         /* always end sequences with a block delimiter */
    223         const ZSTD_Sequence endBlock = {0, 0, 0, 0};
    224         assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
    225         generatedSequences[nbSeqGenerated++] = endBlock;
    226     }
    227     return nbSeqGenerated;
    228 }
    229 
    230 static size_t roundTripTest(void* result, size_t resultCapacity,
    231                             void* compressed, size_t compressedCapacity,
    232                             const void* src, size_t srcSize,
    233                             const ZSTD_Sequence* seqs, size_t seqSize,
    234                             unsigned hasDict,
    235                             ZSTD_sequenceFormat_e mode)
    236 {
    237     size_t cSize;
    238     size_t dSize;
    239 
    240     if (hasDict) {
    241         FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
    242         FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
    243     }
    244 
    245     cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
    246                                    seqs, seqSize,
    247                                    src, srcSize);
    248     if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
    249       && (mode == ZSTD_sf_explicitBlockDelimiters) ) {
    250         /* Valid scenario : in explicit delimiter mode,
    251          * it might be possible for the compressed size to outgrow dstCapacity.
    252          * In which case, it's still a valid fuzzer scenario,
    253          * but no roundtrip shall be possible */
    254         return 0;
    255     }
    256     /* round-trip */
    257     FUZZ_ZASSERT(cSize);
    258     dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
    259     FUZZ_ZASSERT(dSize);
    260     FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
    261     FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
    262     return dSize;
    263 }
    264 
    265 int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
    266 {
    267     FUZZ_SEQ_PROD_SETUP();
    268 
    269     void* rBuf;
    270     size_t rBufSize;
    271     void* cBuf;
    272     size_t cBufSize;
    273     size_t generatedSrcSize;
    274     size_t nbSequences;
    275     size_t dictSize = 0;
    276     unsigned hasDict;
    277     unsigned wLog;
    278     int cLevel;
    279     ZSTD_sequenceFormat_e mode;
    280 
    281     FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
    282     FUZZ_ASSERT(producer);
    283 
    284     if (!cctx) {
    285         cctx = ZSTD_createCCtx();
    286         FUZZ_ASSERT(cctx);
    287     }
    288     if (!dctx) {
    289         dctx = ZSTD_createDCtx();
    290         FUZZ_ASSERT(dctx);
    291     }
    292 
    293     /* Generate window log first so we don't generate offsets too large */
    294     wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
    295     cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
    296     mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
    297 
    298     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
    299     ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
    300     ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
    301     ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
    302     ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
    303     ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
    304     ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
    305     ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
    306 
    307     if (!literalsBuffer) {
    308         literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
    309         FUZZ_ASSERT(literalsBuffer);
    310         literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
    311     }
    312 
    313     if (!dictBuffer) { /* Generate global dictionary buffer */
    314         ZSTD_compressionParameters cParams;
    315 
    316         /* Generate a large dictionary buffer */
    317         dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
    318         FUZZ_ASSERT(dictBuffer);
    319 
    320         /* Create global cdict and ddict */
    321         cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
    322         cParams.minMatch = ZSTD_MINMATCH_MIN;
    323         cParams.hashLog = ZSTD_HASHLOG_MIN;
    324         cParams.chainLog = ZSTD_CHAINLOG_MIN;
    325 
    326         cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
    327         ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
    328         FUZZ_ASSERT(cdict);
    329         FUZZ_ASSERT(ddict);
    330     }
    331 
    332     FUZZ_ASSERT(cdict);
    333     FUZZ_ASSERT(ddict);
    334 
    335     hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
    336     if (hasDict) {
    337         dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
    338     }
    339 
    340     if (!generatedSequences) {
    341         generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
    342     }
    343     if (!generatedSrc) {
    344         generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
    345     }
    346 
    347     nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
    348     generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
    349 
    350     /* Note : in explicit block delimiters mode,
    351      * the fuzzer might generate a lot of small blocks.
    352      * In which case, the final compressed size might be > ZSTD_compressBound().
    353      * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
    354      * The test just doesn't roundtrip. */
    355     cBufSize = ZSTD_compressBound(generatedSrcSize);
    356     cBuf = FUZZ_malloc(cBufSize);
    357 
    358     rBufSize = generatedSrcSize;
    359     rBuf = FUZZ_malloc(rBufSize);
    360 
    361     {   const size_t result = roundTripTest(rBuf, rBufSize,
    362                                         cBuf, cBufSize,
    363                                         generatedSrc, generatedSrcSize,
    364                                         generatedSequences, nbSequences,
    365                                         hasDict, mode);
    366         FUZZ_ASSERT(result <= generatedSrcSize);  /* can be 0 when no round-trip */
    367     }
    368 
    369     free(rBuf);
    370     free(cBuf);
    371     FUZZ_dataProducer_free(producer);
    372 #ifndef STATEFUL_FUZZING
    373     ZSTD_freeCCtx(cctx); cctx = NULL;
    374     ZSTD_freeDCtx(dctx); dctx = NULL;
    375     free(generatedSequences); generatedSequences = NULL;
    376     free(generatedSrc); generatedSrc = NULL;
    377     free(literalsBuffer); literalsBuffer = NULL;
    378 #endif
    379     FUZZ_SEQ_PROD_TEARDOWN();
    380     return 0;
    381 }
    382