Home | History | Annotate | Line # | Download | only in decompress
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 /* zstd_ddict.c :
     12  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
     13 
     14 /*-*******************************************************
     15 *  Dependencies
     16 *********************************************************/
     17 #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
     18 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
     19 #include "../common/cpu.h"         /* bmi2 */
     20 #include "../common/mem.h"         /* low level memory routines */
     21 #define FSE_STATIC_LINKING_ONLY
     22 #include "../common/fse.h"
     23 #include "../common/huf.h"
     24 #include "zstd_decompress_internal.h"
     25 #include "zstd_ddict.h"
     26 
     27 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
     28 #  include "../legacy/zstd_legacy.h"
     29 #endif
     30 
     31 
     32 
     33 /*-*******************************************************
     34 *  Types
     35 *********************************************************/
     36 struct ZSTD_DDict_s {
     37     void* dictBuffer;
     38     const void* dictContent;
     39     size_t dictSize;
     40     ZSTD_entropyDTables_t entropy;
     41     U32 dictID;
     42     U32 entropyPresent;
     43     ZSTD_customMem cMem;
     44 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
     45 
     46 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
     47 {
     48     assert(ddict != NULL);
     49     return ddict->dictContent;
     50 }
     51 
     52 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
     53 {
     54     assert(ddict != NULL);
     55     return ddict->dictSize;
     56 }
     57 
     58 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
     59 {
     60     DEBUGLOG(4, "ZSTD_copyDDictParameters");
     61     assert(dctx != NULL);
     62     assert(ddict != NULL);
     63     dctx->dictID = ddict->dictID;
     64     dctx->prefixStart = ddict->dictContent;
     65     dctx->virtualStart = ddict->dictContent;
     66     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
     67     dctx->previousDstEnd = dctx->dictEnd;
     68 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     69     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
     70     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
     71 #endif
     72     if (ddict->entropyPresent) {
     73         dctx->litEntropy = 1;
     74         dctx->fseEntropy = 1;
     75         dctx->LLTptr = ddict->entropy.LLTable;
     76         dctx->MLTptr = ddict->entropy.MLTable;
     77         dctx->OFTptr = ddict->entropy.OFTable;
     78         dctx->HUFptr = ddict->entropy.hufTable;
     79         dctx->entropy.rep[0] = ddict->entropy.rep[0];
     80         dctx->entropy.rep[1] = ddict->entropy.rep[1];
     81         dctx->entropy.rep[2] = ddict->entropy.rep[2];
     82     } else {
     83         dctx->litEntropy = 0;
     84         dctx->fseEntropy = 0;
     85     }
     86 }
     87 
     88 
     89 static size_t
     90 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
     91                            ZSTD_dictContentType_e dictContentType)
     92 {
     93     ddict->dictID = 0;
     94     ddict->entropyPresent = 0;
     95     if (dictContentType == ZSTD_dct_rawContent) return 0;
     96 
     97     if (ddict->dictSize < 8) {
     98         if (dictContentType == ZSTD_dct_fullDict)
     99             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
    100         return 0;   /* pure content mode */
    101     }
    102     {   U32 const magic = MEM_readLE32(ddict->dictContent);
    103         if (magic != ZSTD_MAGIC_DICTIONARY) {
    104             if (dictContentType == ZSTD_dct_fullDict)
    105                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
    106             return 0;   /* pure content mode */
    107         }
    108     }
    109     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
    110 
    111     /* load entropy tables */
    112     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
    113             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
    114         dictionary_corrupted, "");
    115     ddict->entropyPresent = 1;
    116     return 0;
    117 }
    118 
    119 
    120 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
    121                                       const void* dict, size_t dictSize,
    122                                       ZSTD_dictLoadMethod_e dictLoadMethod,
    123                                       ZSTD_dictContentType_e dictContentType)
    124 {
    125     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
    126         ddict->dictBuffer = NULL;
    127         ddict->dictContent = dict;
    128         if (!dict) dictSize = 0;
    129     } else {
    130         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
    131         ddict->dictBuffer = internalBuffer;
    132         ddict->dictContent = internalBuffer;
    133         if (!internalBuffer) return ERROR(memory_allocation);
    134         ZSTD_memcpy(internalBuffer, dict, dictSize);
    135     }
    136     ddict->dictSize = dictSize;
    137     ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
    138 
    139     /* parse dictionary content */
    140     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
    141 
    142     return 0;
    143 }
    144 
    145 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
    146                                       ZSTD_dictLoadMethod_e dictLoadMethod,
    147                                       ZSTD_dictContentType_e dictContentType,
    148                                       ZSTD_customMem customMem)
    149 {
    150     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
    151 
    152     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
    153         if (ddict == NULL) return NULL;
    154         ddict->cMem = customMem;
    155         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
    156                                             dict, dictSize,
    157                                             dictLoadMethod, dictContentType);
    158             if (ZSTD_isError(initResult)) {
    159                 ZSTD_freeDDict(ddict);
    160                 return NULL;
    161         }   }
    162         return ddict;
    163     }
    164 }
    165 
    166 /*! ZSTD_createDDict() :
    167 *   Create a digested dictionary, to start decompression without startup delay.
    168 *   `dict` content is copied inside DDict.
    169 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
    170 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
    171 {
    172     ZSTD_customMem const allocator = { NULL, NULL, NULL };
    173     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
    174 }
    175 
    176 /*! ZSTD_createDDict_byReference() :
    177  *  Create a digested dictionary, to start decompression without startup delay.
    178  *  Dictionary content is simply referenced, it will be accessed during decompression.
    179  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
    180 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
    181 {
    182     ZSTD_customMem const allocator = { NULL, NULL, NULL };
    183     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
    184 }
    185 
    186 
    187 const ZSTD_DDict* ZSTD_initStaticDDict(
    188                                 void* sBuffer, size_t sBufferSize,
    189                                 const void* dict, size_t dictSize,
    190                                 ZSTD_dictLoadMethod_e dictLoadMethod,
    191                                 ZSTD_dictContentType_e dictContentType)
    192 {
    193     size_t const neededSpace = sizeof(ZSTD_DDict)
    194                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
    195     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
    196     assert(sBuffer != NULL);
    197     assert(dict != NULL);
    198     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
    199     if (sBufferSize < neededSpace) return NULL;
    200     if (dictLoadMethod == ZSTD_dlm_byCopy) {
    201         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
    202         dict = ddict+1;
    203     }
    204     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
    205                                               dict, dictSize,
    206                                               ZSTD_dlm_byRef, dictContentType) ))
    207         return NULL;
    208     return ddict;
    209 }
    210 
    211 
    212 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
    213 {
    214     if (ddict==NULL) return 0;   /* support free on NULL */
    215     {   ZSTD_customMem const cMem = ddict->cMem;
    216         ZSTD_customFree(ddict->dictBuffer, cMem);
    217         ZSTD_customFree(ddict, cMem);
    218         return 0;
    219     }
    220 }
    221 
    222 /*! ZSTD_estimateDDictSize() :
    223  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
    224  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
    225 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
    226 {
    227     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
    228 }
    229 
    230 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
    231 {
    232     if (ddict==NULL) return 0;   /* support sizeof on NULL */
    233     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
    234 }
    235 
    236 /*! ZSTD_getDictID_fromDDict() :
    237  *  Provides the dictID of the dictionary loaded into `ddict`.
    238  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
    239  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
    240 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
    241 {
    242     if (ddict==NULL) return 0;
    243     return ddict->dictID;
    244 }
    245