Home | History | Annotate | Line # | Download | only in fuzz
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #define ZSTD_STATIC_LINKING_ONLY
     12 #define ZDICT_STATIC_LINKING_ONLY
     13 
     14 #include <string.h>
     15 
     16 #include "zstd_helpers.h"
     17 #include "fuzz_helpers.h"
     18 #include "zstd.h"
     19 #include "zdict.h"
     20 #include "sequence_producer.h"
     21 #include "fuzz_third_party_seq_prod.h"
     22 
     23 const int kMinClevel = -3;
     24 const int kMaxClevel = 19;
     25 
     26 void* FUZZ_seqProdState = NULL;
     27 
     28 static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
     29 {
     30     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
     31 }
     32 
     33 static unsigned produceParamValue(unsigned min, unsigned max,
     34                                   FUZZ_dataProducer_t *producer) {
     35     return FUZZ_dataProducer_uint32Range(producer, min, max);
     36 }
     37 
     38 static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
     39                     unsigned max, FUZZ_dataProducer_t *producer) {
     40     unsigned const value = produceParamValue(min, max, producer);
     41     set(cctx, param, value);
     42 }
     43 
     44 ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
     45 {
     46     /* Select compression parameters */
     47     ZSTD_compressionParameters cParams;
     48     cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
     49     cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
     50     cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
     51     cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
     52     cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
     53                                           ZSTD_MINMATCH_MAX);
     54     cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
     55     cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
     56     return ZSTD_adjustCParams(cParams, srcSize, 0);
     57 }
     58 
     59 ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
     60 {
     61     /* Select frame parameters */
     62     ZSTD_frameParameters fParams;
     63     fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
     64     fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
     65     fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
     66     return fParams;
     67 }
     68 
     69 ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
     70 {
     71     ZSTD_parameters params;
     72     params.cParams = FUZZ_randomCParams(srcSize, producer);
     73     params.fParams = FUZZ_randomFParams(producer);
     74     return params;
     75 }
     76 
     77 static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
     78 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
     79     ZSTD_registerSequenceProducer(
     80         cctx,
     81         FUZZ_seqProdState,
     82         FUZZ_thirdPartySeqProd
     83     );
     84 #else
     85     ZSTD_registerSequenceProducer(
     86         cctx,
     87         NULL,
     88         simpleSequenceProducer
     89     );
     90 #endif
     91 
     92 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
     93     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1));
     94 #else
     95     setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
     96 #endif
     97     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
     98     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
     99 }
    100 
    101 void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
    102 {
    103     ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
    104     set(cctx, ZSTD_c_windowLog, cParams.windowLog);
    105     set(cctx, ZSTD_c_hashLog, cParams.hashLog);
    106     set(cctx, ZSTD_c_chainLog, cParams.chainLog);
    107     set(cctx, ZSTD_c_searchLog, cParams.searchLog);
    108     set(cctx, ZSTD_c_minMatch, cParams.minMatch);
    109     set(cctx, ZSTD_c_targetLength, cParams.targetLength);
    110     set(cctx, ZSTD_c_strategy, cParams.strategy);
    111     /* Select frame parameters */
    112     setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
    113     setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
    114     setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
    115     /* Select long distance matching parameters */
    116     setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer);
    117     setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
    118     setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
    119             ZSTD_LDM_MINMATCH_MAX, producer);
    120     setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
    121             producer);
    122     setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
    123             ZSTD_LDM_HASHRATELOG_MAX, producer);
    124     /* Set misc parameters */
    125 #ifndef ZSTD_MULTITHREAD
    126     // To reproduce with or without ZSTD_MULTITHREAD, we are going to use
    127     // the same amount of entropy.
    128     unsigned const nbWorkers_value = produceParamValue(0, 2, producer);
    129     unsigned const rsyncable_value = produceParamValue(0, 1, producer);
    130     (void)nbWorkers_value;
    131     (void)rsyncable_value;
    132     set(cctx, ZSTD_c_nbWorkers, 0);
    133     set(cctx, ZSTD_c_rsyncable, 0);
    134 #else
    135     setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
    136     setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
    137 #endif
    138     setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
    139     setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
    140     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
    141     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
    142     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
    143     setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
    144     setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
    145     setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
    146     setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
    147     setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
    148     setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer);
    149     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
    150       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
    151     }
    152     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
    153       setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer);
    154     }
    155 
    156 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
    157     setSequenceProducerParams(cctx, producer);
    158 #else
    159     if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
    160         setSequenceProducerParams(cctx, producer);
    161     } else {
    162         ZSTD_registerSequenceProducer(cctx, NULL, NULL);
    163     }
    164 #endif
    165 }
    166 
    167 FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
    168 {
    169     size_t const dictSize = MAX(srcSize / 8, 1024);
    170     size_t const totalSampleSize = dictSize * 11;
    171     FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize };
    172     char* const samples = (char*)FUZZ_malloc(totalSampleSize);
    173     unsigned nbSamples = 100;
    174     size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples);
    175     size_t pos = 0;
    176     size_t sample = 0;
    177     ZDICT_fastCover_params_t params;
    178 
    179     for (sample = 0; sample < nbSamples; ++sample) {
    180       size_t const remaining = totalSampleSize - pos;
    181       size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
    182       size_t const limit = MIN(srcSize - offset, remaining);
    183       size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
    184       memcpy(samples + pos, (const char*)src + offset, toCopy);
    185       pos += toCopy;
    186       samplesSizes[sample] = toCopy;
    187     }
    188     memset(samples + pos, 0, totalSampleSize - pos);
    189 
    190     memset(&params, 0, sizeof(params));
    191     params.accel = 5;
    192     params.k = 40;
    193     params.d = 8;
    194     params.f = 14;
    195     params.zParams.compressionLevel = 1;
    196     dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
    197         samples, samplesSizes, nbSamples, params);
    198     if (ZSTD_isError(dict.size)) {
    199         free(dict.buff);
    200         memset(&dict, 0, sizeof(dict));
    201     }
    202 
    203     free(samplesSizes);
    204     free(samples);
    205 
    206     return dict;
    207 }
    208