1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /* This header contains definitions 12 * that shall **only** be used by modules within lib/compress. 13 */ 14 15 #ifndef ZSTD_COMPRESS_H 16 #define ZSTD_COMPRESS_H 17 18 /*-************************************* 19 * Dependencies 20 ***************************************/ 21 #include "../common/zstd_internal.h" 22 #include "zstd_cwksp.h" 23 #ifdef ZSTD_MULTITHREAD 24 # include "zstdmt_compress.h" 25 #endif 26 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ 27 #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */ 28 29 /*-************************************* 30 * Constants 31 ***************************************/ 32 #define kSearchStrength 8 33 #define HASH_READ_SIZE 8 34 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". 35 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. 36 It's not a big deal though : candidate will just be sorted again. 37 Additionally, candidate position 1 will be lost. 38 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 39 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. 40 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 41 42 43 /*-************************************* 44 * Context memory management 45 ***************************************/ 46 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; 47 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; 48 49 typedef struct ZSTD_prefixDict_s { 50 const void* dict; 51 size_t dictSize; 52 ZSTD_dictContentType_e dictContentType; 53 } ZSTD_prefixDict; 54 55 typedef struct { 56 void* dictBuffer; 57 void const* dict; 58 size_t dictSize; 59 ZSTD_dictContentType_e dictContentType; 60 ZSTD_CDict* cdict; 61 } ZSTD_localDict; 62 63 typedef struct { 64 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; 65 HUF_repeat repeatMode; 66 } ZSTD_hufCTables_t; 67 68 typedef struct { 69 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 70 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 71 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 72 FSE_repeat offcode_repeatMode; 73 FSE_repeat matchlength_repeatMode; 74 FSE_repeat litlength_repeatMode; 75 } ZSTD_fseCTables_t; 76 77 typedef struct { 78 ZSTD_hufCTables_t huf; 79 ZSTD_fseCTables_t fse; 80 } ZSTD_entropyCTables_t; 81 82 /*********************************************** 83 * Sequences * 84 ***********************************************/ 85 typedef struct SeqDef_s { 86 U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ 87 U16 litLength; 88 U16 mlBase; /* mlBase == matchLength - MINMATCH */ 89 } SeqDef; 90 91 /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */ 92 typedef enum { 93 ZSTD_llt_none = 0, /* no longLengthType */ 94 ZSTD_llt_literalLength = 1, /* represents a long literal */ 95 ZSTD_llt_matchLength = 2 /* represents a long match */ 96 } ZSTD_longLengthType_e; 97 98 typedef struct { 99 SeqDef* sequencesStart; 100 SeqDef* sequences; /* ptr to end of sequences */ 101 BYTE* litStart; 102 BYTE* lit; /* ptr to end of literals */ 103 BYTE* llCode; 104 BYTE* mlCode; 105 BYTE* ofCode; 106 size_t maxNbSeq; 107 size_t maxNbLit; 108 109 /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength 110 * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment 111 * the existing value of the litLength or matchLength by 0x10000. 112 */ 113 ZSTD_longLengthType_e longLengthType; 114 U32 longLengthPos; /* Index of the sequence to apply long length modification to */ 115 } SeqStore_t; 116 117 typedef struct { 118 U32 litLength; 119 U32 matchLength; 120 } ZSTD_SequenceLength; 121 122 /** 123 * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences 124 * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. 125 */ 126 MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq) 127 { 128 ZSTD_SequenceLength seqLen; 129 seqLen.litLength = seq->litLength; 130 seqLen.matchLength = seq->mlBase + MINMATCH; 131 if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 132 if (seqStore->longLengthType == ZSTD_llt_literalLength) { 133 seqLen.litLength += 0x10000; 134 } 135 if (seqStore->longLengthType == ZSTD_llt_matchLength) { 136 seqLen.matchLength += 0x10000; 137 } 138 } 139 return seqLen; 140 } 141 142 const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 143 int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 144 145 146 /*********************************************** 147 * Entropy buffer statistics structs and funcs * 148 ***********************************************/ 149 /** ZSTD_hufCTablesMetadata_t : 150 * Stores Literals Block Type for a super-block in hType, and 151 * huffman tree description in hufDesBuffer. 152 * hufDesSize refers to the size of huffman tree description in bytes. 153 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ 154 typedef struct { 155 SymbolEncodingType_e hType; 156 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; 157 size_t hufDesSize; 158 } ZSTD_hufCTablesMetadata_t; 159 160 /** ZSTD_fseCTablesMetadata_t : 161 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and 162 * fse tables in fseTablesBuffer. 163 * fseTablesSize refers to the size of fse tables in bytes. 164 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ 165 typedef struct { 166 SymbolEncodingType_e llType; 167 SymbolEncodingType_e ofType; 168 SymbolEncodingType_e mlType; 169 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; 170 size_t fseTablesSize; 171 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 172 } ZSTD_fseCTablesMetadata_t; 173 174 typedef struct { 175 ZSTD_hufCTablesMetadata_t hufMetadata; 176 ZSTD_fseCTablesMetadata_t fseMetadata; 177 } ZSTD_entropyCTablesMetadata_t; 178 179 /** ZSTD_buildBlockEntropyStats() : 180 * Builds entropy for the block. 181 * @return : 0 on success or error code */ 182 size_t ZSTD_buildBlockEntropyStats( 183 const SeqStore_t* seqStorePtr, 184 const ZSTD_entropyCTables_t* prevEntropy, 185 ZSTD_entropyCTables_t* nextEntropy, 186 const ZSTD_CCtx_params* cctxParams, 187 ZSTD_entropyCTablesMetadata_t* entropyMetadata, 188 void* workspace, size_t wkspSize); 189 190 /********************************* 191 * Compression internals structs * 192 *********************************/ 193 194 typedef struct { 195 U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ 196 U32 len; /* Raw length of match */ 197 } ZSTD_match_t; 198 199 typedef struct { 200 U32 offset; /* Offset of sequence */ 201 U32 litLength; /* Length of literals prior to match */ 202 U32 matchLength; /* Raw length of match */ 203 } rawSeq; 204 205 typedef struct { 206 rawSeq* seq; /* The start of the sequences */ 207 size_t pos; /* The index in seq where reading stopped. pos <= size. */ 208 size_t posInSequence; /* The position within the sequence at seq[pos] where reading 209 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ 210 size_t size; /* The number of sequences. <= capacity. */ 211 size_t capacity; /* The capacity starting from `seq` pointer */ 212 } RawSeqStore_t; 213 214 UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 215 216 typedef struct { 217 int price; /* price from beginning of segment to this position */ 218 U32 off; /* offset of previous match */ 219 U32 mlen; /* length of previous match */ 220 U32 litlen; /* nb of literals since previous match */ 221 U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ 222 } ZSTD_optimal_t; 223 224 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 225 226 #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) 227 typedef struct { 228 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 229 unsigned* litFreq; /* table of literals statistics, of size 256 */ 230 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 231 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 232 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 233 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ 234 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ 235 236 U32 litSum; /* nb of literals */ 237 U32 litLengthSum; /* nb of litLength codes */ 238 U32 matchLengthSum; /* nb of matchLength codes */ 239 U32 offCodeSum; /* nb of offset codes */ 240 U32 litSumBasePrice; /* to compare to log2(litfreq) */ 241 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ 242 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ 243 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 244 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 245 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 246 ZSTD_ParamSwitch_e literalCompressionMode; 247 } optState_t; 248 249 typedef struct { 250 ZSTD_entropyCTables_t entropy; 251 U32 rep[ZSTD_REP_NUM]; 252 } ZSTD_compressedBlockState_t; 253 254 typedef struct { 255 BYTE const* nextSrc; /* next block here to continue on current prefix */ 256 BYTE const* base; /* All regular indexes relative to this position */ 257 BYTE const* dictBase; /* extDict indexes relative to this position */ 258 U32 dictLimit; /* below that point, need extDict */ 259 U32 lowLimit; /* below that point, no more valid data */ 260 U32 nbOverflowCorrections; /* Number of times overflow correction has run since 261 * ZSTD_window_init(). Useful for debugging coredumps 262 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. 263 */ 264 } ZSTD_window_t; 265 266 #define ZSTD_WINDOW_START_INDEX 2 267 268 typedef struct ZSTD_MatchState_t ZSTD_MatchState_t; 269 270 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ 271 272 struct ZSTD_MatchState_t { 273 ZSTD_window_t window; /* State for window round buffer management */ 274 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 275 * When loadedDictEnd != 0, a dictionary is in use, and still valid. 276 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. 277 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). 278 * When dict referential is copied into active context (i.e. not attached), 279 * loadedDictEnd == dictSize, since referential starts from zero. 280 */ 281 U32 nextToUpdate; /* index from which to continue table update */ 282 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 283 284 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ 285 BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 286 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ 287 U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ 288 U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ 289 290 U32* hashTable; 291 U32* hashTable3; 292 U32* chainTable; 293 294 int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 295 296 int dedicatedDictSearch; /* Indicates whether this matchState is using the 297 * dedicated dictionary search structure. 298 */ 299 optState_t opt; /* optimal parser state */ 300 const ZSTD_MatchState_t* dictMatchState; 301 ZSTD_compressionParameters cParams; 302 const RawSeqStore_t* ldmSeqStore; 303 304 /* Controls prefetching in some dictMatchState matchfinders. 305 * This behavior is controlled from the cctx ms. 306 * This parameter has no effect in the cdict ms. */ 307 int prefetchCDictTables; 308 309 /* When == 0, lazy match finders insert every position. 310 * When != 0, lazy match finders only insert positions they search. 311 * This allows them to skip much faster over incompressible data, 312 * at a small cost to compression ratio. 313 */ 314 int lazySkipping; 315 }; 316 317 typedef struct { 318 ZSTD_compressedBlockState_t* prevCBlock; 319 ZSTD_compressedBlockState_t* nextCBlock; 320 ZSTD_MatchState_t matchState; 321 } ZSTD_blockState_t; 322 323 typedef struct { 324 U32 offset; 325 U32 checksum; 326 } ldmEntry_t; 327 328 typedef struct { 329 BYTE const* split; 330 U32 hash; 331 U32 checksum; 332 ldmEntry_t* bucket; 333 } ldmMatchCandidate_t; 334 335 #define LDM_BATCH_SIZE 64 336 337 typedef struct { 338 ZSTD_window_t window; /* State for the window round buffer management */ 339 ldmEntry_t* hashTable; 340 U32 loadedDictEnd; 341 BYTE* bucketOffsets; /* Next position in bucket to insert entry */ 342 size_t splitIndices[LDM_BATCH_SIZE]; 343 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; 344 } ldmState_t; 345 346 typedef struct { 347 ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 348 U32 hashLog; /* Log size of hashTable */ 349 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 350 U32 minMatchLength; /* Minimum match length */ 351 U32 hashRateLog; /* Log number of entries to skip */ 352 U32 windowLog; /* Window log for the LDM */ 353 } ldmParams_t; 354 355 typedef struct { 356 int collectSequences; 357 ZSTD_Sequence* seqStart; 358 size_t seqIndex; 359 size_t maxSequences; 360 } SeqCollector; 361 362 struct ZSTD_CCtx_params_s { 363 ZSTD_format_e format; 364 ZSTD_compressionParameters cParams; 365 ZSTD_frameParameters fParams; 366 367 int compressionLevel; 368 int forceWindow; /* force back-references to respect limit of 369 * 1<<wLog, even for dictionary */ 370 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. 371 * No target when targetCBlockSize == 0. 372 * There is no guarantee on compressed block size */ 373 int srcSizeHint; /* User's best guess of source size. 374 * Hint is not valid when srcSizeHint == 0. 375 * There is no guarantee that hint is close to actual source size */ 376 377 ZSTD_dictAttachPref_e attachDictPref; 378 ZSTD_ParamSwitch_e literalCompressionMode; 379 380 /* Multithreading: used to pass parameters to mtctx */ 381 int nbWorkers; 382 size_t jobSize; 383 int overlapLog; 384 int rsyncable; 385 386 /* Long distance matching parameters */ 387 ldmParams_t ldmParams; 388 389 /* Dedicated dict search algorithm trigger */ 390 int enableDedicatedDictSearch; 391 392 /* Input/output buffer modes */ 393 ZSTD_bufferMode_e inBufferMode; 394 ZSTD_bufferMode_e outBufferMode; 395 396 /* Sequence compression API */ 397 ZSTD_SequenceFormat_e blockDelimiters; 398 int validateSequences; 399 400 /* Block splitting 401 * @postBlockSplitter executes split analysis after sequences are produced, 402 * it's more accurate but consumes more resources. 403 * @preBlockSplitter_level splits before knowing sequences, 404 * it's more approximative but also cheaper. 405 * Valid @preBlockSplitter_level values range from 0 to 6 (included). 406 * 0 means auto, 1 means do not split, 407 * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest). 408 * Highest @preBlockSplitter_level combines well with @postBlockSplitter. 409 */ 410 ZSTD_ParamSwitch_e postBlockSplitter; 411 int preBlockSplitter_level; 412 413 /* Adjust the max block size*/ 414 size_t maxBlockSize; 415 416 /* Param for deciding whether to use row-based matchfinder */ 417 ZSTD_ParamSwitch_e useRowMatchFinder; 418 419 /* Always load a dictionary in ext-dict mode (not prefix mode)? */ 420 int deterministicRefPrefix; 421 422 /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 423 ZSTD_customMem customMem; 424 425 /* Controls prefetching in some dictMatchState matchfinders */ 426 ZSTD_ParamSwitch_e prefetchCDictTables; 427 428 /* Controls whether zstd will fall back to an internal matchfinder 429 * if the external matchfinder returns an error code. */ 430 int enableMatchFinderFallback; 431 432 /* Parameters for the external sequence producer API. 433 * Users set these parameters through ZSTD_registerSequenceProducer(). 434 * It is not possible to set these parameters individually through the public API. */ 435 void* extSeqProdState; 436 ZSTD_sequenceProducer_F extSeqProdFunc; 437 438 /* Controls repcode search in external sequence parsing */ 439 ZSTD_ParamSwitch_e searchForExternalRepcodes; 440 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 441 442 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) 443 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) 444 #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE)) 445 446 /** 447 * Indicates whether this compression proceeds directly from user-provided 448 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or 449 * whether the context needs to buffer the input/output (ZSTDb_buffered). 450 */ 451 typedef enum { 452 ZSTDb_not_buffered, 453 ZSTDb_buffered 454 } ZSTD_buffered_policy_e; 455 456 /** 457 * Struct that contains all elements of block splitter that should be allocated 458 * in a wksp. 459 */ 460 #define ZSTD_MAX_NB_BLOCK_SPLITS 196 461 typedef struct { 462 SeqStore_t fullSeqStoreChunk; 463 SeqStore_t firstHalfSeqStore; 464 SeqStore_t secondHalfSeqStore; 465 SeqStore_t currSeqStore; 466 SeqStore_t nextSeqStore; 467 468 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; 469 ZSTD_entropyCTablesMetadata_t entropyMetadata; 470 } ZSTD_blockSplitCtx; 471 472 struct ZSTD_CCtx_s { 473 ZSTD_compressionStage_e stage; 474 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ 475 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 476 ZSTD_CCtx_params requestedParams; 477 ZSTD_CCtx_params appliedParams; 478 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ 479 U32 dictID; 480 size_t dictContentSize; 481 482 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 483 size_t blockSizeMax; 484 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 485 unsigned long long consumedSrcSize; 486 unsigned long long producedCSize; 487 XXH64_state_t xxhState; 488 ZSTD_customMem customMem; 489 ZSTD_threadPool* pool; 490 size_t staticSize; 491 SeqCollector seqCollector; 492 int isFirstBlock; 493 int initialized; 494 495 SeqStore_t seqStore; /* sequences storage ptrs */ 496 ldmState_t ldmState; /* long distance matching state */ 497 rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 498 size_t maxNbLdmSequences; 499 RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 500 ZSTD_blockState_t blockState; 501 void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */ 502 size_t tmpWkspSize; 503 504 /* Whether we are streaming or not */ 505 ZSTD_buffered_policy_e bufferedPolicy; 506 507 /* streaming */ 508 char* inBuff; 509 size_t inBuffSize; 510 size_t inToCompress; 511 size_t inBuffPos; 512 size_t inBuffTarget; 513 char* outBuff; 514 size_t outBuffSize; 515 size_t outBuffContentSize; 516 size_t outBuffFlushedSize; 517 ZSTD_cStreamStage streamStage; 518 U32 frameEnded; 519 520 /* Stable in/out buffer verification */ 521 ZSTD_inBuffer expectedInBuffer; 522 size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ 523 size_t expectedOutBufferSize; 524 525 /* Dictionary */ 526 ZSTD_localDict localDict; 527 const ZSTD_CDict* cdict; 528 ZSTD_prefixDict prefixDict; /* single-usage dictionary */ 529 530 /* Multi-threading */ 531 #ifdef ZSTD_MULTITHREAD 532 ZSTDMT_CCtx* mtctx; 533 #endif 534 535 /* Tracing */ 536 #if ZSTD_TRACE 537 ZSTD_TraceCtx traceCtx; 538 #endif 539 540 /* Workspace for block splitter */ 541 ZSTD_blockSplitCtx blockSplitCtx; 542 543 /* Buffer for output from external sequence producer */ 544 ZSTD_Sequence* extSeqBuf; 545 size_t extSeqBufCapacity; 546 }; 547 548 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 549 typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; 550 551 typedef enum { 552 ZSTD_noDict = 0, 553 ZSTD_extDict = 1, 554 ZSTD_dictMatchState = 2, 555 ZSTD_dedicatedDictSearch = 3 556 } ZSTD_dictMode_e; 557 558 typedef enum { 559 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. 560 * In this mode we use both the srcSize and the dictSize 561 * when selecting and adjusting parameters. 562 */ 563 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. 564 * In this mode we only take the srcSize into account when selecting 565 * and adjusting parameters. 566 */ 567 ZSTD_cpm_createCDict = 2, /* Creating a CDict. 568 * In this mode we take both the source size and the dictionary size 569 * into account when selecting and adjusting the parameters. 570 */ 571 ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 572 * We don't know what these parameters are for. We default to the legacy 573 * behavior of taking both the source size and the dict size into account 574 * when selecting and adjusting parameters. 575 */ 576 } ZSTD_CParamMode_e; 577 578 typedef size_t (*ZSTD_BlockCompressor_f) ( 579 ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 580 void const* src, size_t srcSize); 581 ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 582 583 584 MEM_STATIC U32 ZSTD_LLcode(U32 litLength) 585 { 586 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 587 8, 9, 10, 11, 12, 13, 14, 15, 588 16, 16, 17, 17, 18, 18, 19, 19, 589 20, 20, 20, 20, 21, 21, 21, 21, 590 22, 22, 22, 22, 22, 22, 22, 22, 591 23, 23, 23, 23, 23, 23, 23, 23, 592 24, 24, 24, 24, 24, 24, 24, 24, 593 24, 24, 24, 24, 24, 24, 24, 24 }; 594 static const U32 LL_deltaCode = 19; 595 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 596 } 597 598 /* ZSTD_MLcode() : 599 * note : mlBase = matchLength - MINMATCH; 600 * because it's the format it's stored in seqStore->sequences */ 601 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) 602 { 603 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 604 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 605 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 606 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 607 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 608 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 609 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 610 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 611 static const U32 ML_deltaCode = 36; 612 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; 613 } 614 615 /* ZSTD_cParam_withinBounds: 616 * @return 1 if value is within cParam bounds, 617 * 0 otherwise */ 618 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 619 { 620 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 621 if (ZSTD_isError(bounds.error)) return 0; 622 if (value < bounds.lowerBound) return 0; 623 if (value > bounds.upperBound) return 0; 624 return 1; 625 } 626 627 /* ZSTD_selectAddr: 628 * @return index >= lowLimit ? candidate : backup, 629 * tries to force branchless codegen. */ 630 MEM_STATIC const BYTE* 631 ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup) 632 { 633 #if defined(__GNUC__) && defined(__x86_64__) 634 __asm__ ( 635 "cmp %1, %2\n" 636 "cmova %3, %0\n" 637 : "+r"(candidate) 638 : "r"(index), "r"(lowLimit), "r"(backup) 639 ); 640 return candidate; 641 #else 642 return index >= lowLimit ? candidate : backup; 643 #endif 644 } 645 646 /* ZSTD_noCompressBlock() : 647 * Writes uncompressed block to dst buffer from given src. 648 * Returns the size of the block */ 649 MEM_STATIC size_t 650 ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 651 { 652 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 653 DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); 654 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 655 dstSize_tooSmall, "dst buf too small for uncompressed block"); 656 MEM_writeLE24(dst, cBlockHeader24); 657 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 658 return ZSTD_blockHeaderSize + srcSize; 659 } 660 661 MEM_STATIC size_t 662 ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 663 { 664 BYTE* const op = (BYTE*)dst; 665 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); 666 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); 667 MEM_writeLE24(op, cBlockHeader); 668 op[3] = src; 669 return 4; 670 } 671 672 673 /* ZSTD_minGain() : 674 * minimum compression required 675 * to generate a compress block or a compressed literals section. 676 * note : use same formula for both situations */ 677 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 678 { 679 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 680 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 681 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); 682 return (srcSize >> minlog) + 2; 683 } 684 685 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) 686 { 687 switch (cctxParams->literalCompressionMode) { 688 case ZSTD_ps_enable: 689 return 0; 690 case ZSTD_ps_disable: 691 return 1; 692 default: 693 assert(0 /* impossible: pre-validated */); 694 ZSTD_FALLTHROUGH; 695 case ZSTD_ps_auto: 696 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 697 } 698 } 699 700 /*! ZSTD_safecopyLiterals() : 701 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. 702 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single 703 * large copies. 704 */ 705 static void 706 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) 707 { 708 assert(iend > ilimit_w); 709 if (ip <= ilimit_w) { 710 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); 711 op += ilimit_w - ip; 712 ip = ilimit_w; 713 } 714 while (ip < iend) *op++ = *ip++; 715 } 716 717 718 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) 719 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) 720 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) 721 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ 722 #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) 723 #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) 724 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) 725 #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) 726 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ 727 728 /*! ZSTD_storeSeqOnly() : 729 * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 730 * Literals themselves are not copied, but @litPtr is updated. 731 * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 732 * @matchLength : must be >= MINMATCH 733 */ 734 HINT_INLINE UNUSED_ATTR void 735 ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr, 736 size_t litLength, 737 U32 offBase, 738 size_t matchLength) 739 { 740 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 741 742 /* literal Length */ 743 assert(litLength <= ZSTD_BLOCKSIZE_MAX); 744 if (UNLIKELY(litLength>0xFFFF)) { 745 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 746 seqStorePtr->longLengthType = ZSTD_llt_literalLength; 747 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 748 } 749 seqStorePtr->sequences[0].litLength = (U16)litLength; 750 751 /* match offset */ 752 seqStorePtr->sequences[0].offBase = offBase; 753 754 /* match Length */ 755 assert(matchLength <= ZSTD_BLOCKSIZE_MAX); 756 assert(matchLength >= MINMATCH); 757 { size_t const mlBase = matchLength - MINMATCH; 758 if (UNLIKELY(mlBase>0xFFFF)) { 759 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 760 seqStorePtr->longLengthType = ZSTD_llt_matchLength; 761 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 762 } 763 seqStorePtr->sequences[0].mlBase = (U16)mlBase; 764 } 765 766 seqStorePtr->sequences++; 767 } 768 769 /*! ZSTD_storeSeq() : 770 * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 771 * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 772 * @matchLength : must be >= MINMATCH 773 * Allowed to over-read literals up to litLimit. 774 */ 775 HINT_INLINE UNUSED_ATTR void 776 ZSTD_storeSeq(SeqStore_t* seqStorePtr, 777 size_t litLength, const BYTE* literals, const BYTE* litLimit, 778 U32 offBase, 779 size_t matchLength) 780 { 781 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 782 BYTE const* const litEnd = literals + litLength; 783 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 784 static const BYTE* g_start = NULL; 785 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 786 { U32 const pos = (U32)((const BYTE*)literals - g_start); 787 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", 788 pos, (U32)litLength, (U32)matchLength, (U32)offBase); 789 } 790 #endif 791 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 792 /* copy Literals */ 793 assert(seqStorePtr->maxNbLit <= 128 KB); 794 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 795 assert(literals + litLength <= litLimit); 796 if (litEnd <= litLimit_w) { 797 /* Common case we can use wildcopy. 798 * First copy 16 bytes, because literals are likely short. 799 */ 800 ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); 801 ZSTD_copy16(seqStorePtr->lit, literals); 802 if (litLength > 16) { 803 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 804 } 805 } else { 806 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 807 } 808 seqStorePtr->lit += litLength; 809 810 ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength); 811 } 812 813 /* ZSTD_updateRep() : 814 * updates in-place @rep (array of repeat offsets) 815 * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() 816 */ 817 MEM_STATIC void 818 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 819 { 820 if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ 821 rep[2] = rep[1]; 822 rep[1] = rep[0]; 823 rep[0] = OFFBASE_TO_OFFSET(offBase); 824 } else { /* repcode */ 825 U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; 826 if (repCode > 0) { /* note : if repCode==0, no change */ 827 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 828 rep[2] = (repCode >= 2) ? rep[1] : rep[2]; 829 rep[1] = rep[0]; 830 rep[0] = currentOffset; 831 } else { /* repCode == 0 */ 832 /* nothing to do */ 833 } 834 } 835 } 836 837 typedef struct repcodes_s { 838 U32 rep[3]; 839 } Repcodes_t; 840 841 MEM_STATIC Repcodes_t 842 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 843 { 844 Repcodes_t newReps; 845 ZSTD_memcpy(&newReps, rep, sizeof(newReps)); 846 ZSTD_updateRep(newReps.rep, offBase, ll0); 847 return newReps; 848 } 849 850 851 /*-************************************* 852 * Match length counter 853 ***************************************/ 854 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 855 { 856 const BYTE* const pStart = pIn; 857 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); 858 859 if (pIn < pInLoopLimit) { 860 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 861 if (diff) return ZSTD_NbCommonBytes(diff); } 862 pIn+=sizeof(size_t); pMatch+=sizeof(size_t); 863 while (pIn < pInLoopLimit) { 864 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 865 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 866 pIn += ZSTD_NbCommonBytes(diff); 867 return (size_t)(pIn - pStart); 868 } } 869 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 870 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 871 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 872 return (size_t)(pIn - pStart); 873 } 874 875 /** ZSTD_count_2segments() : 876 * can count match length with `ip` & `match` in 2 different segments. 877 * convention : on reaching mEnd, match count continue starting from iStart 878 */ 879 MEM_STATIC size_t 880 ZSTD_count_2segments(const BYTE* ip, const BYTE* match, 881 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 882 { 883 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); 884 size_t const matchLength = ZSTD_count(ip, match, vEnd); 885 if (match + matchLength != mEnd) return matchLength; 886 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 887 DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match)); 888 DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip)); 889 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 890 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 891 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); 892 } 893 894 895 /*-************************************* 896 * Hashes 897 ***************************************/ 898 static const U32 prime3bytes = 506832829U; 899 static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } 900 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ 901 MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } 902 903 static const U32 prime4bytes = 2654435761U; 904 static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } 905 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } 906 static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } 907 908 static const U64 prime5bytes = 889523592379ULL; 909 static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } 910 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } 911 static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } 912 913 static const U64 prime6bytes = 227718039650203ULL; 914 static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } 915 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } 916 static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } 917 918 static const U64 prime7bytes = 58295818150454627ULL; 919 static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } 920 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } 921 static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } 922 923 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 924 static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } 925 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } 926 static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } 927 928 929 MEM_STATIC FORCE_INLINE_ATTR 930 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 931 { 932 /* Although some of these hashes do support hBits up to 64, some do not. 933 * To be on the safe side, always avoid hBits > 32. */ 934 assert(hBits <= 32); 935 936 switch(mls) 937 { 938 default: 939 case 4: return ZSTD_hash4Ptr(p, hBits); 940 case 5: return ZSTD_hash5Ptr(p, hBits); 941 case 6: return ZSTD_hash6Ptr(p, hBits); 942 case 7: return ZSTD_hash7Ptr(p, hBits); 943 case 8: return ZSTD_hash8Ptr(p, hBits); 944 } 945 } 946 947 MEM_STATIC FORCE_INLINE_ATTR 948 size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { 949 /* Although some of these hashes do support hBits up to 64, some do not. 950 * To be on the safe side, always avoid hBits > 32. */ 951 assert(hBits <= 32); 952 953 switch(mls) 954 { 955 default: 956 case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); 957 case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); 958 case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); 959 case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); 960 case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); 961 } 962 } 963 964 965 /** ZSTD_ipow() : 966 * Return base^exponent. 967 */ 968 static U64 ZSTD_ipow(U64 base, U64 exponent) 969 { 970 U64 power = 1; 971 while (exponent) { 972 if (exponent & 1) power *= base; 973 exponent >>= 1; 974 base *= base; 975 } 976 return power; 977 } 978 979 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10 980 981 /** ZSTD_rollingHash_append() : 982 * Add the buffer to the hash value. 983 */ 984 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) 985 { 986 BYTE const* istart = (BYTE const*)buf; 987 size_t pos; 988 for (pos = 0; pos < size; ++pos) { 989 hash *= prime8bytes; 990 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; 991 } 992 return hash; 993 } 994 995 /** ZSTD_rollingHash_compute() : 996 * Compute the rolling hash value of the buffer. 997 */ 998 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) 999 { 1000 return ZSTD_rollingHash_append(0, buf, size); 1001 } 1002 1003 /** ZSTD_rollingHash_primePower() : 1004 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash 1005 * over a window of length bytes. 1006 */ 1007 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) 1008 { 1009 return ZSTD_ipow(prime8bytes, length - 1); 1010 } 1011 1012 /** ZSTD_rollingHash_rotate() : 1013 * Rotate the rolling hash by one byte. 1014 */ 1015 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) 1016 { 1017 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; 1018 hash *= prime8bytes; 1019 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; 1020 return hash; 1021 } 1022 1023 /*-************************************* 1024 * Round buffer management 1025 ***************************************/ 1026 /* Max @current value allowed: 1027 * In 32-bit mode: we want to avoid crossing the 2 GB limit, 1028 * reducing risks of side effects in case of signed operations on indexes. 1029 * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB) 1030 * doesn't overflow U32 index capacity (4 GB) */ 1031 #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB) 1032 /* Maximum chunk size before overflow correction needs to be called again */ 1033 #define ZSTD_CHUNKSIZE_MAX \ 1034 ( ((U32)-1) /* Maximum ending current index */ \ 1035 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ 1036 1037 /** 1038 * ZSTD_window_clear(): 1039 * Clears the window containing the history by simply setting it to empty. 1040 */ 1041 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) 1042 { 1043 size_t const endT = (size_t)(window->nextSrc - window->base); 1044 U32 const end = (U32)endT; 1045 1046 window->lowLimit = end; 1047 window->dictLimit = end; 1048 } 1049 1050 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) 1051 { 1052 return window.dictLimit == ZSTD_WINDOW_START_INDEX && 1053 window.lowLimit == ZSTD_WINDOW_START_INDEX && 1054 (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; 1055 } 1056 1057 /** 1058 * ZSTD_window_hasExtDict(): 1059 * Returns non-zero if the window has a non-empty extDict. 1060 */ 1061 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) 1062 { 1063 return window.lowLimit < window.dictLimit; 1064 } 1065 1066 /** 1067 * ZSTD_matchState_dictMode(): 1068 * Inspects the provided matchState and figures out what dictMode should be 1069 * passed to the compressor. 1070 */ 1071 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) 1072 { 1073 return ZSTD_window_hasExtDict(ms->window) ? 1074 ZSTD_extDict : 1075 ms->dictMatchState != NULL ? 1076 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : 1077 ZSTD_noDict; 1078 } 1079 1080 /* Defining this macro to non-zero tells zstd to run the overflow correction 1081 * code much more frequently. This is very inefficient, and should only be 1082 * used for tests and fuzzers. 1083 */ 1084 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1085 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 1086 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 1087 # else 1088 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 1089 # endif 1090 #endif 1091 1092 /** 1093 * ZSTD_window_canOverflowCorrect(): 1094 * Returns non-zero if the indices are large enough for overflow correction 1095 * to work correctly without impacting compression ratio. 1096 */ 1097 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, 1098 U32 cycleLog, 1099 U32 maxDist, 1100 U32 loadedDictEnd, 1101 void const* src) 1102 { 1103 U32 const cycleSize = 1u << cycleLog; 1104 U32 const curr = (U32)((BYTE const*)src - window.base); 1105 U32 const minIndexToOverflowCorrect = cycleSize 1106 + MAX(maxDist, cycleSize) 1107 + ZSTD_WINDOW_START_INDEX; 1108 1109 /* Adjust the min index to backoff the overflow correction frequency, 1110 * so we don't waste too much CPU in overflow correction. If this 1111 * computation overflows we don't really care, we just need to make 1112 * sure it is at least minIndexToOverflowCorrect. 1113 */ 1114 U32 const adjustment = window.nbOverflowCorrections + 1; 1115 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, 1116 minIndexToOverflowCorrect); 1117 U32 const indexLargeEnough = curr > adjustedIndex; 1118 1119 /* Only overflow correct early if the dictionary is invalidated already, 1120 * so we don't hurt compression ratio. 1121 */ 1122 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; 1123 1124 return indexLargeEnough && dictionaryInvalidated; 1125 } 1126 1127 /** 1128 * ZSTD_window_needOverflowCorrection(): 1129 * Returns non-zero if the indices are getting too large and need overflow 1130 * protection. 1131 */ 1132 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, 1133 U32 cycleLog, 1134 U32 maxDist, 1135 U32 loadedDictEnd, 1136 void const* src, 1137 void const* srcEnd) 1138 { 1139 U32 const curr = (U32)((BYTE const*)srcEnd - window.base); 1140 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1141 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { 1142 return 1; 1143 } 1144 } 1145 return curr > ZSTD_CURRENT_MAX; 1146 } 1147 1148 /** 1149 * ZSTD_window_correctOverflow(): 1150 * Reduces the indices to protect from index overflow. 1151 * Returns the correction made to the indices, which must be applied to every 1152 * stored index. 1153 * 1154 * The least significant cycleLog bits of the indices must remain the same, 1155 * which may be 0. Every index up to maxDist in the past must be valid. 1156 */ 1157 MEM_STATIC 1158 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1159 U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 1160 U32 maxDist, void const* src) 1161 { 1162 /* preemptive overflow correction: 1163 * 1. correction is large enough: 1164 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog 1165 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog 1166 * 1167 * current - newCurrent 1168 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) 1169 * > (3<<29) - (1<<chainLog) 1170 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) 1171 * > 1<<29 1172 * 1173 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: 1174 * After correction, current is less than (1<<chainLog + 1<<windowLog). 1175 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. 1176 * In 32-bit mode we are safe, because (chainLog <= 29), so 1177 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. 1178 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: 1179 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. 1180 */ 1181 U32 const cycleSize = 1u << cycleLog; 1182 U32 const cycleMask = cycleSize - 1; 1183 U32 const curr = (U32)((BYTE const*)src - window->base); 1184 U32 const currentCycle = curr & cycleMask; 1185 /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ 1186 U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX 1187 ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) 1188 : 0; 1189 U32 const newCurrent = currentCycle 1190 + currentCycleCorrection 1191 + MAX(maxDist, cycleSize); 1192 U32 const correction = curr - newCurrent; 1193 /* maxDist must be a power of two so that: 1194 * (newCurrent & cycleMask) == (curr & cycleMask) 1195 * This is required to not corrupt the chains / binary tree. 1196 */ 1197 assert((maxDist & (maxDist - 1)) == 0); 1198 assert((curr & cycleMask) == (newCurrent & cycleMask)); 1199 assert(curr > newCurrent); 1200 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1201 /* Loose bound, should be around 1<<29 (see above) */ 1202 assert(correction > 1<<28); 1203 } 1204 1205 window->base += correction; 1206 window->dictBase += correction; 1207 if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { 1208 window->lowLimit = ZSTD_WINDOW_START_INDEX; 1209 } else { 1210 window->lowLimit -= correction; 1211 } 1212 if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { 1213 window->dictLimit = ZSTD_WINDOW_START_INDEX; 1214 } else { 1215 window->dictLimit -= correction; 1216 } 1217 1218 /* Ensure we can still reference the full window. */ 1219 assert(newCurrent >= maxDist); 1220 assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); 1221 /* Ensure that lowLimit and dictLimit didn't underflow. */ 1222 assert(window->lowLimit <= newCurrent); 1223 assert(window->dictLimit <= newCurrent); 1224 1225 ++window->nbOverflowCorrections; 1226 1227 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, 1228 window->lowLimit); 1229 return correction; 1230 } 1231 1232 /** 1233 * ZSTD_window_enforceMaxDist(): 1234 * Updates lowLimit so that: 1235 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd 1236 * 1237 * It ensures index is valid as long as index >= lowLimit. 1238 * This must be called before a block compression call. 1239 * 1240 * loadedDictEnd is only defined if a dictionary is in use for current compression. 1241 * As the name implies, loadedDictEnd represents the index at end of dictionary. 1242 * The value lies within context's referential, it can be directly compared to blockEndIdx. 1243 * 1244 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. 1245 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. 1246 * This is because dictionaries are allowed to be referenced fully 1247 * as long as the last byte of the dictionary is in the window. 1248 * Once input has progressed beyond window size, dictionary cannot be referenced anymore. 1249 * 1250 * In normal dict mode, the dictionary lies between lowLimit and dictLimit. 1251 * In dictMatchState mode, lowLimit and dictLimit are the same, 1252 * and the dictionary is below them. 1253 * forceWindow and dictMatchState are therefore incompatible. 1254 */ 1255 MEM_STATIC void 1256 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, 1257 const void* blockEnd, 1258 U32 maxDist, 1259 U32* loadedDictEndPtr, 1260 const ZSTD_MatchState_t** dictMatchStatePtr) 1261 { 1262 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1263 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; 1264 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1265 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1266 1267 /* - When there is no dictionary : loadedDictEnd == 0. 1268 In which case, the test (blockEndIdx > maxDist) is merely to avoid 1269 overflowing next operation `newLowLimit = blockEndIdx - maxDist`. 1270 - When there is a standard dictionary : 1271 Index referential is copied from the dictionary, 1272 which means it starts from 0. 1273 In which case, loadedDictEnd == dictSize, 1274 and it makes sense to compare `blockEndIdx > maxDist + dictSize` 1275 since `blockEndIdx` also starts from zero. 1276 - When there is an attached dictionary : 1277 loadedDictEnd is expressed within the referential of the context, 1278 so it can be directly compared against blockEndIdx. 1279 */ 1280 if (blockEndIdx > maxDist + loadedDictEnd) { 1281 U32 const newLowLimit = blockEndIdx - maxDist; 1282 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; 1283 if (window->dictLimit < window->lowLimit) { 1284 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", 1285 (unsigned)window->dictLimit, (unsigned)window->lowLimit); 1286 window->dictLimit = window->lowLimit; 1287 } 1288 /* On reaching window size, dictionaries are invalidated */ 1289 if (loadedDictEndPtr) *loadedDictEndPtr = 0; 1290 if (dictMatchStatePtr) *dictMatchStatePtr = NULL; 1291 } 1292 } 1293 1294 /* Similar to ZSTD_window_enforceMaxDist(), 1295 * but only invalidates dictionary 1296 * when input progresses beyond window size. 1297 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) 1298 * loadedDictEnd uses same referential as window->base 1299 * maxDist is the window size */ 1300 MEM_STATIC void 1301 ZSTD_checkDictValidity(const ZSTD_window_t* window, 1302 const void* blockEnd, 1303 U32 maxDist, 1304 U32* loadedDictEndPtr, 1305 const ZSTD_MatchState_t** dictMatchStatePtr) 1306 { 1307 assert(loadedDictEndPtr != NULL); 1308 assert(dictMatchStatePtr != NULL); 1309 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1310 U32 const loadedDictEnd = *loadedDictEndPtr; 1311 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1312 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1313 assert(blockEndIdx >= loadedDictEnd); 1314 1315 if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { 1316 /* On reaching window size, dictionaries are invalidated. 1317 * For simplification, if window size is reached anywhere within next block, 1318 * the dictionary is invalidated for the full block. 1319 * 1320 * We also have to invalidate the dictionary if ZSTD_window_update() has detected 1321 * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. 1322 * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use 1323 * dictMatchState, so setting it to NULL is not a problem. 1324 */ 1325 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 1326 *loadedDictEndPtr = 0; 1327 *dictMatchStatePtr = NULL; 1328 } else { 1329 if (*loadedDictEndPtr != 0) { 1330 DEBUGLOG(6, "dictionary considered valid for current block"); 1331 } } } 1332 } 1333 1334 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { 1335 ZSTD_memset(window, 0, sizeof(*window)); 1336 window->base = (BYTE const*)" "; 1337 window->dictBase = (BYTE const*)" "; 1338 ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ 1339 window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ 1340 window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ 1341 window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ 1342 window->nbOverflowCorrections = 0; 1343 } 1344 1345 /** 1346 * ZSTD_window_update(): 1347 * Updates the window by appending [src, src + srcSize) to the window. 1348 * If it is not contiguous, the current prefix becomes the extDict, and we 1349 * forget about the extDict. Handles overlap of the prefix and extDict. 1350 * Returns non-zero if the segment is contiguous. 1351 */ 1352 MEM_STATIC 1353 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1354 U32 ZSTD_window_update(ZSTD_window_t* window, 1355 const void* src, size_t srcSize, 1356 int forceNonContiguous) 1357 { 1358 BYTE const* const ip = (BYTE const*)src; 1359 U32 contiguous = 1; 1360 DEBUGLOG(5, "ZSTD_window_update"); 1361 if (srcSize == 0) 1362 return contiguous; 1363 assert(window->base != NULL); 1364 assert(window->dictBase != NULL); 1365 /* Check if blocks follow each other */ 1366 if (src != window->nextSrc || forceNonContiguous) { 1367 /* not contiguous */ 1368 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); 1369 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); 1370 window->lowLimit = window->dictLimit; 1371 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ 1372 window->dictLimit = (U32)distanceFromBase; 1373 window->dictBase = window->base; 1374 window->base = ip - distanceFromBase; 1375 /* ms->nextToUpdate = window->dictLimit; */ 1376 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ 1377 contiguous = 0; 1378 } 1379 window->nextSrc = ip + srcSize; 1380 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 1381 if ( (ip+srcSize > window->dictBase + window->lowLimit) 1382 & (ip < window->dictBase + window->dictLimit)) { 1383 size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase); 1384 U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 1385 assert(highInputIdx < UINT_MAX); 1386 window->lowLimit = lowLimitMax; 1387 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 1388 } 1389 return contiguous; 1390 } 1391 1392 /** 1393 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. 1394 */ 1395 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1396 { 1397 U32 const maxDistance = 1U << windowLog; 1398 U32 const lowestValid = ms->window.lowLimit; 1399 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1400 U32 const isDictionary = (ms->loadedDictEnd != 0); 1401 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary 1402 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't 1403 * valid for the entire block. So this check is sufficient to find the lowest valid match index. 1404 */ 1405 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1406 return matchLowest; 1407 } 1408 1409 /** 1410 * Returns the lowest allowed match index in the prefix. 1411 */ 1412 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1413 { 1414 U32 const maxDistance = 1U << windowLog; 1415 U32 const lowestValid = ms->window.dictLimit; 1416 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1417 U32 const isDictionary = (ms->loadedDictEnd != 0); 1418 /* When computing the lowest prefix index we need to take the dictionary into account to handle 1419 * the edge case where the dictionary and the source are contiguous in memory. 1420 */ 1421 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1422 return matchLowest; 1423 } 1424 1425 /* index_safety_check: 1426 * intentional underflow : ensure repIndex isn't overlapping dict + prefix 1427 * @return 1 if values are not overlapping, 1428 * 0 otherwise */ 1429 MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { 1430 return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); 1431 } 1432 1433 1434 /* debug functions */ 1435 #if (DEBUGLEVEL>=2) 1436 1437 MEM_STATIC double ZSTD_fWeight(U32 rawStat) 1438 { 1439 U32 const fp_accuracy = 8; 1440 U32 const fp_multiplier = (1 << fp_accuracy); 1441 U32 const newStat = rawStat + 1; 1442 U32 const hb = ZSTD_highbit32(newStat); 1443 U32 const BWeight = hb * fp_multiplier; 1444 U32 const FWeight = (newStat << fp_accuracy) >> hb; 1445 U32 const weight = BWeight + FWeight; 1446 assert(hb + fp_accuracy < 31); 1447 return (double)weight / fp_multiplier; 1448 } 1449 1450 /* display a table content, 1451 * listing each element, its frequency, and its predicted bit cost */ 1452 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) 1453 { 1454 unsigned u, sum; 1455 for (u=0, sum=0; u<=max; u++) sum += table[u]; 1456 DEBUGLOG(2, "total nb elts: %u", sum); 1457 for (u=0; u<=max; u++) { 1458 DEBUGLOG(2, "%2u: %5u (%.2f)", 1459 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); 1460 } 1461 } 1462 1463 #endif 1464 1465 /* Short Cache */ 1466 1467 /* Normally, zstd matchfinders follow this flow: 1468 * 1. Compute hash at ip 1469 * 2. Load index from hashTable[hash] 1470 * 3. Check if *ip == *(base + index) 1471 * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. 1472 * 1473 * Short cache is an optimization which allows us to avoid step 3 most of the time 1474 * when the data doesn't actually match. With short cache, the flow becomes: 1475 * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. 1476 * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. 1477 * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. 1478 * 1479 * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to 1480 * dictMatchState matchfinders. 1481 */ 1482 #define ZSTD_SHORT_CACHE_TAG_BITS 8 1483 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) 1484 1485 /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. 1486 * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ 1487 MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { 1488 size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; 1489 U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); 1490 assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); 1491 hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; 1492 } 1493 1494 /* Helper function for short cache matchfinders. 1495 * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ 1496 MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { 1497 U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; 1498 U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; 1499 return tag1 == tag2; 1500 } 1501 1502 /* =============================================================== 1503 * Shared internal declarations 1504 * These prototypes may be called from sources not in lib/compress 1505 * =============================================================== */ 1506 1507 /* ZSTD_loadCEntropy() : 1508 * dict : must point at beginning of a valid zstd dictionary. 1509 * return : size of dictionary header (size of magic number + dict ID + entropy tables) 1510 * assumptions : magic number supposed already checked 1511 * and dictSize >= 8 */ 1512 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 1513 const void* const dict, size_t dictSize); 1514 1515 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); 1516 1517 typedef struct { 1518 U32 idx; /* Index in array of ZSTD_Sequence */ 1519 U32 posInSequence; /* Position within sequence at idx */ 1520 size_t posInSrc; /* Number of bytes given by sequences provided so far */ 1521 } ZSTD_SequencePosition; 1522 1523 /* for benchmark */ 1524 size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, 1525 const ZSTD_Sequence* const inSeqs, size_t nbSequences, 1526 int const repcodeResolution); 1527 1528 typedef struct { 1529 size_t nbSequences; 1530 size_t blockSize; 1531 size_t litSize; 1532 } BlockSummary; 1533 1534 BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); 1535 1536 /* ============================================================== 1537 * Private declarations 1538 * These prototypes shall only be called from within lib/compress 1539 * ============================================================== */ 1540 1541 /* ZSTD_getCParamsFromCCtxParams() : 1542 * cParams are built depending on compressionLevel, src size hints, 1543 * LDM and manually set compression parameters. 1544 * Note: srcSizeHint == 0 means 0! 1545 */ 1546 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1547 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); 1548 1549 /*! ZSTD_initCStream_internal() : 1550 * Private use only. Init streaming operation. 1551 * expects params to be valid. 1552 * must receive dict, or cdict, or none, but not both. 1553 * @return : 0, or an error code */ 1554 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 1555 const void* dict, size_t dictSize, 1556 const ZSTD_CDict* cdict, 1557 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 1558 1559 void ZSTD_resetSeqStore(SeqStore_t* ssPtr); 1560 1561 /*! ZSTD_getCParamsFromCDict() : 1562 * as the name implies */ 1563 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); 1564 1565 /* ZSTD_compressBegin_advanced_internal() : 1566 * Private use only. To be called from zstdmt_compress.c. */ 1567 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 1568 const void* dict, size_t dictSize, 1569 ZSTD_dictContentType_e dictContentType, 1570 ZSTD_dictTableLoadMethod_e dtlm, 1571 const ZSTD_CDict* cdict, 1572 const ZSTD_CCtx_params* params, 1573 unsigned long long pledgedSrcSize); 1574 1575 /* ZSTD_compress_advanced_internal() : 1576 * Private use only. To be called from zstdmt_compress.c. */ 1577 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, 1578 void* dst, size_t dstCapacity, 1579 const void* src, size_t srcSize, 1580 const void* dict,size_t dictSize, 1581 const ZSTD_CCtx_params* params); 1582 1583 1584 /* ZSTD_writeLastEmptyBlock() : 1585 * output an empty Block with end-of-frame mark to complete a frame 1586 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 1587 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 1588 */ 1589 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); 1590 1591 1592 /* ZSTD_referenceExternalSequences() : 1593 * Must be called before starting a compression operation. 1594 * seqs must parse a prefix of the source. 1595 * This cannot be used when long range matching is enabled. 1596 * Zstd will use these sequences, and pass the literals to a secondary block 1597 * compressor. 1598 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 1599 * access and data corruption. 1600 */ 1601 void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1602 1603 /** ZSTD_cycleLog() : 1604 * condition for correct operation : hashLog > 1 */ 1605 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); 1606 1607 /** ZSTD_CCtx_trace() : 1608 * Trace the end of a compression call. 1609 */ 1610 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); 1611 1612 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ 1613 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { 1614 return params->extSeqProdFunc != NULL; 1615 } 1616 1617 /* =============================================================== 1618 * Deprecated definitions that are still used internally to avoid 1619 * deprecation warnings. These functions are exactly equivalent to 1620 * their public variants, but avoid the deprecation warnings. 1621 * =============================================================== */ 1622 1623 size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); 1624 1625 size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, 1626 void* dst, size_t dstCapacity, 1627 const void* src, size_t srcSize); 1628 1629 size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, 1630 void* dst, size_t dstCapacity, 1631 const void* src, size_t srcSize); 1632 1633 size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 1634 1635 1636 #endif /* ZSTD_COMPRESS_H */ 1637