Home | History | Annotate | Line # | Download | only in compress
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #include "zstd_ldm.h"
     12 
     13 #include "../common/debug.h"
     14 #include "../common/xxhash.h"
     15 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
     16 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
     17 #include "zstd_ldm_geartab.h"
     18 
     19 #define LDM_BUCKET_SIZE_LOG 4
     20 #define LDM_MIN_MATCH_LENGTH 64
     21 #define LDM_HASH_RLOG 7
     22 
     23 typedef struct {
     24     U64 rolling;
     25     U64 stopMask;
     26 } ldmRollingHashState_t;
     27 
     28 /** ZSTD_ldm_gear_init():
     29  *
     30  * Initializes the rolling hash state such that it will honor the
     31  * settings in params. */
     32 static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
     33 {
     34     unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
     35     unsigned hashRateLog = params->hashRateLog;
     36 
     37     state->rolling = ~(U32)0;
     38 
     39     /* The choice of the splitting criterion is subject to two conditions:
     40      *   1. it has to trigger on average every 2^(hashRateLog) bytes;
     41      *   2. ideally, it has to depend on a window of minMatchLength bytes.
     42      *
     43      * In the gear hash algorithm, bit n depends on the last n bytes;
     44      * so in order to obtain a good quality splitting criterion it is
     45      * preferable to use bits with high weight.
     46      *
     47      * To match condition 1 we use a mask with hashRateLog bits set
     48      * and, because of the previous remark, we make sure these bits
     49      * have the highest possible weight while still respecting
     50      * condition 2.
     51      */
     52     if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
     53         state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
     54     } else {
     55         /* In this degenerate case we simply honor the hash rate. */
     56         state->stopMask = ((U64)1 << hashRateLog) - 1;
     57     }
     58 }
     59 
     60 /** ZSTD_ldm_gear_reset()
     61  * Feeds [data, data + minMatchLength) into the hash without registering any
     62  * splits. This effectively resets the hash state. This is used when skipping
     63  * over data, either at the beginning of a block, or skipping sections.
     64  */
     65 static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
     66                                 BYTE const* data, size_t minMatchLength)
     67 {
     68     U64 hash = state->rolling;
     69     size_t n = 0;
     70 
     71 #define GEAR_ITER_ONCE() do {                                  \
     72         hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
     73         n += 1;                                                \
     74     } while (0)
     75     while (n + 3 < minMatchLength) {
     76         GEAR_ITER_ONCE();
     77         GEAR_ITER_ONCE();
     78         GEAR_ITER_ONCE();
     79         GEAR_ITER_ONCE();
     80     }
     81     while (n < minMatchLength) {
     82         GEAR_ITER_ONCE();
     83     }
     84 #undef GEAR_ITER_ONCE
     85 }
     86 
     87 /** ZSTD_ldm_gear_feed():
     88  *
     89  * Registers in the splits array all the split points found in the first
     90  * size bytes following the data pointer. This function terminates when
     91  * either all the data has been processed or LDM_BATCH_SIZE splits are
     92  * present in the splits array.
     93  *
     94  * Precondition: The splits array must not be full.
     95  * Returns: The number of bytes processed. */
     96 static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
     97                                  BYTE const* data, size_t size,
     98                                  size_t* splits, unsigned* numSplits)
     99 {
    100     size_t n;
    101     U64 hash, mask;
    102 
    103     hash = state->rolling;
    104     mask = state->stopMask;
    105     n = 0;
    106 
    107 #define GEAR_ITER_ONCE() do { \
    108         hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
    109         n += 1; \
    110         if (UNLIKELY((hash & mask) == 0)) { \
    111             splits[*numSplits] = n; \
    112             *numSplits += 1; \
    113             if (*numSplits == LDM_BATCH_SIZE) \
    114                 goto done; \
    115         } \
    116     } while (0)
    117 
    118     while (n + 3 < size) {
    119         GEAR_ITER_ONCE();
    120         GEAR_ITER_ONCE();
    121         GEAR_ITER_ONCE();
    122         GEAR_ITER_ONCE();
    123     }
    124     while (n < size) {
    125         GEAR_ITER_ONCE();
    126     }
    127 
    128 #undef GEAR_ITER_ONCE
    129 
    130 done:
    131     state->rolling = hash;
    132     return n;
    133 }
    134 
    135 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
    136                         const ZSTD_compressionParameters* cParams)
    137 {
    138     params->windowLog = cParams->windowLog;
    139     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
    140     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
    141     if (params->hashRateLog == 0) {
    142         if (params->hashLog > 0) {
    143             /* if params->hashLog is set, derive hashRateLog from it */
    144             assert(params->hashLog <= ZSTD_HASHLOG_MAX);
    145             if (params->windowLog > params->hashLog) {
    146                 params->hashRateLog = params->windowLog - params->hashLog;
    147             }
    148         } else {
    149             assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
    150             /* mapping from [fast, rate7] to [btultra2, rate4] */
    151             params->hashRateLog = 7 - (cParams->strategy/3);
    152         }
    153     }
    154     if (params->hashLog == 0) {
    155         params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX);
    156     }
    157     if (params->minMatchLength == 0) {
    158         params->minMatchLength = LDM_MIN_MATCH_LENGTH;
    159         if (cParams->strategy >= ZSTD_btultra)
    160             params->minMatchLength /= 2;
    161     }
    162     if (params->bucketSizeLog==0) {
    163         assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
    164         params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX);
    165     }
    166     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
    167 }
    168 
    169 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
    170 {
    171     size_t const ldmHSize = ((size_t)1) << params.hashLog;
    172     size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
    173     size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
    174     size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
    175                            + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
    176     return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
    177 }
    178 
    179 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
    180 {
    181     return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
    182 }
    183 
    184 /** ZSTD_ldm_getBucket() :
    185  *  Returns a pointer to the start of the bucket associated with hash. */
    186 static ldmEntry_t* ZSTD_ldm_getBucket(
    187         const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog)
    188 {
    189     return ldmState->hashTable + (hash << bucketSizeLog);
    190 }
    191 
    192 /** ZSTD_ldm_insertEntry() :
    193  *  Insert the entry with corresponding hash into the hash table */
    194 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
    195                                  size_t const hash, const ldmEntry_t entry,
    196                                  U32 const bucketSizeLog)
    197 {
    198     BYTE* const pOffset = ldmState->bucketOffsets + hash;
    199     unsigned const offset = *pOffset;
    200 
    201     *(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry;
    202     *pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1));
    203 
    204 }
    205 
    206 /** ZSTD_ldm_countBackwardsMatch() :
    207  *  Returns the number of bytes that match backwards before pIn and pMatch.
    208  *
    209  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
    210 static size_t ZSTD_ldm_countBackwardsMatch(
    211             const BYTE* pIn, const BYTE* pAnchor,
    212             const BYTE* pMatch, const BYTE* pMatchBase)
    213 {
    214     size_t matchLength = 0;
    215     while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
    216         pIn--;
    217         pMatch--;
    218         matchLength++;
    219     }
    220     return matchLength;
    221 }
    222 
    223 /** ZSTD_ldm_countBackwardsMatch_2segments() :
    224  *  Returns the number of bytes that match backwards from pMatch,
    225  *  even with the backwards match spanning 2 different segments.
    226  *
    227  *  On reaching `pMatchBase`, start counting from mEnd */
    228 static size_t ZSTD_ldm_countBackwardsMatch_2segments(
    229                     const BYTE* pIn, const BYTE* pAnchor,
    230                     const BYTE* pMatch, const BYTE* pMatchBase,
    231                     const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
    232 {
    233     size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
    234     if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
    235         /* If backwards match is entirely in the extDict or prefix, immediately return */
    236         return matchLength;
    237     }
    238     DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
    239     matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
    240     DEBUGLOG(7, "final backwards match length = %zu", matchLength);
    241     return matchLength;
    242 }
    243 
    244 /** ZSTD_ldm_fillFastTables() :
    245  *
    246  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
    247  *  This is similar to ZSTD_loadDictionaryContent.
    248  *
    249  *  The tables for the other strategies are filled within their
    250  *  block compressors. */
    251 static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms,
    252                                       void const* end)
    253 {
    254     const BYTE* const iend = (const BYTE*)end;
    255 
    256     switch(ms->cParams.strategy)
    257     {
    258     case ZSTD_fast:
    259         ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
    260         break;
    261 
    262     case ZSTD_dfast:
    263 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
    264         ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
    265 #else
    266         assert(0); /* shouldn't be called: cparams should've been adjusted. */
    267 #endif
    268         break;
    269 
    270     case ZSTD_greedy:
    271     case ZSTD_lazy:
    272     case ZSTD_lazy2:
    273     case ZSTD_btlazy2:
    274     case ZSTD_btopt:
    275     case ZSTD_btultra:
    276     case ZSTD_btultra2:
    277         break;
    278     default:
    279         assert(0);  /* not possible : not a valid strategy id */
    280     }
    281 
    282     return 0;
    283 }
    284 
    285 void ZSTD_ldm_fillHashTable(
    286             ldmState_t* ldmState, const BYTE* ip,
    287             const BYTE* iend, ldmParams_t const* params)
    288 {
    289     U32 const minMatchLength = params->minMatchLength;
    290     U32 const bucketSizeLog = params->bucketSizeLog;
    291     U32 const hBits = params->hashLog - bucketSizeLog;
    292     BYTE const* const base = ldmState->window.base;
    293     BYTE const* const istart = ip;
    294     ldmRollingHashState_t hashState;
    295     size_t* const splits = ldmState->splitIndices;
    296     unsigned numSplits;
    297 
    298     DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
    299 
    300     ZSTD_ldm_gear_init(&hashState, params);
    301     while (ip < iend) {
    302         size_t hashed;
    303         unsigned n;
    304 
    305         numSplits = 0;
    306         hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits);
    307 
    308         for (n = 0; n < numSplits; n++) {
    309             if (ip + splits[n] >= istart + minMatchLength) {
    310                 BYTE const* const split = ip + splits[n] - minMatchLength;
    311                 U64 const xxhash = XXH64(split, minMatchLength, 0);
    312                 U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
    313                 ldmEntry_t entry;
    314 
    315                 entry.offset = (U32)(split - base);
    316                 entry.checksum = (U32)(xxhash >> 32);
    317                 ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog);
    318             }
    319         }
    320 
    321         ip += hashed;
    322     }
    323 }
    324 
    325 
    326 /** ZSTD_ldm_limitTableUpdate() :
    327  *
    328  *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
    329  *  if it is far way
    330  *  (after a long match, only update tables a limited amount). */
    331 static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor)
    332 {
    333     U32 const curr = (U32)(anchor - ms->window.base);
    334     if (curr > ms->nextToUpdate + 1024) {
    335         ms->nextToUpdate =
    336             curr - MIN(512, curr - ms->nextToUpdate - 1024);
    337     }
    338 }
    339 
    340 static
    341 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    342 size_t ZSTD_ldm_generateSequences_internal(
    343         ldmState_t* ldmState, RawSeqStore_t* rawSeqStore,
    344         ldmParams_t const* params, void const* src, size_t srcSize)
    345 {
    346     /* LDM parameters */
    347     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
    348     U32 const minMatchLength = params->minMatchLength;
    349     U32 const entsPerBucket = 1U << params->bucketSizeLog;
    350     U32 const hBits = params->hashLog - params->bucketSizeLog;
    351     /* Prefix and extDict parameters */
    352     U32 const dictLimit = ldmState->window.dictLimit;
    353     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
    354     BYTE const* const base = ldmState->window.base;
    355     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
    356     BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
    357     BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
    358     BYTE const* const lowPrefixPtr = base + dictLimit;
    359     /* Input bounds */
    360     BYTE const* const istart = (BYTE const*)src;
    361     BYTE const* const iend = istart + srcSize;
    362     BYTE const* const ilimit = iend - HASH_READ_SIZE;
    363     /* Input positions */
    364     BYTE const* anchor = istart;
    365     BYTE const* ip = istart;
    366     /* Rolling hash state */
    367     ldmRollingHashState_t hashState;
    368     /* Arrays for staged-processing */
    369     size_t* const splits = ldmState->splitIndices;
    370     ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
    371     unsigned numSplits;
    372 
    373     if (srcSize < minMatchLength)
    374         return iend - anchor;
    375 
    376     /* Initialize the rolling hash state with the first minMatchLength bytes */
    377     ZSTD_ldm_gear_init(&hashState, params);
    378     ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
    379     ip += minMatchLength;
    380 
    381     while (ip < ilimit) {
    382         size_t hashed;
    383         unsigned n;
    384 
    385         numSplits = 0;
    386         hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
    387                                     splits, &numSplits);
    388 
    389         for (n = 0; n < numSplits; n++) {
    390             BYTE const* const split = ip + splits[n] - minMatchLength;
    391             U64 const xxhash = XXH64(split, minMatchLength, 0);
    392             U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
    393 
    394             candidates[n].split = split;
    395             candidates[n].hash = hash;
    396             candidates[n].checksum = (U32)(xxhash >> 32);
    397             candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog);
    398             PREFETCH_L1(candidates[n].bucket);
    399         }
    400 
    401         for (n = 0; n < numSplits; n++) {
    402             size_t forwardMatchLength = 0, backwardMatchLength = 0,
    403                    bestMatchLength = 0, mLength;
    404             U32 offset;
    405             BYTE const* const split = candidates[n].split;
    406             U32 const checksum = candidates[n].checksum;
    407             U32 const hash = candidates[n].hash;
    408             ldmEntry_t* const bucket = candidates[n].bucket;
    409             ldmEntry_t const* cur;
    410             ldmEntry_t const* bestEntry = NULL;
    411             ldmEntry_t newEntry;
    412 
    413             newEntry.offset = (U32)(split - base);
    414             newEntry.checksum = checksum;
    415 
    416             /* If a split point would generate a sequence overlapping with
    417              * the previous one, we merely register it in the hash table and
    418              * move on */
    419             if (split < anchor) {
    420                 ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
    421                 continue;
    422             }
    423 
    424             for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
    425                 size_t curForwardMatchLength, curBackwardMatchLength,
    426                        curTotalMatchLength;
    427                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
    428                     continue;
    429                 }
    430                 if (extDict) {
    431                     BYTE const* const curMatchBase =
    432                         cur->offset < dictLimit ? dictBase : base;
    433                     BYTE const* const pMatch = curMatchBase + cur->offset;
    434                     BYTE const* const matchEnd =
    435                         cur->offset < dictLimit ? dictEnd : iend;
    436                     BYTE const* const lowMatchPtr =
    437                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
    438                     curForwardMatchLength =
    439                         ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
    440                     if (curForwardMatchLength < minMatchLength) {
    441                         continue;
    442                     }
    443                     curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
    444                             split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
    445                 } else { /* !extDict */
    446                     BYTE const* const pMatch = base + cur->offset;
    447                     curForwardMatchLength = ZSTD_count(split, pMatch, iend);
    448                     if (curForwardMatchLength < minMatchLength) {
    449                         continue;
    450                     }
    451                     curBackwardMatchLength =
    452                         ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
    453                 }
    454                 curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
    455 
    456                 if (curTotalMatchLength > bestMatchLength) {
    457                     bestMatchLength = curTotalMatchLength;
    458                     forwardMatchLength = curForwardMatchLength;
    459                     backwardMatchLength = curBackwardMatchLength;
    460                     bestEntry = cur;
    461                 }
    462             }
    463 
    464             /* No match found -- insert an entry into the hash table
    465              * and process the next candidate match */
    466             if (bestEntry == NULL) {
    467                 ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
    468                 continue;
    469             }
    470 
    471             /* Match found */
    472             offset = (U32)(split - base) - bestEntry->offset;
    473             mLength = forwardMatchLength + backwardMatchLength;
    474             {
    475                 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
    476 
    477                 /* Out of sequence storage */
    478                 if (rawSeqStore->size == rawSeqStore->capacity)
    479                     return ERROR(dstSize_tooSmall);
    480                 seq->litLength = (U32)(split - backwardMatchLength - anchor);
    481                 seq->matchLength = (U32)mLength;
    482                 seq->offset = offset;
    483                 rawSeqStore->size++;
    484             }
    485 
    486             /* Insert the current entry into the hash table --- it must be
    487              * done after the previous block to avoid clobbering bestEntry */
    488             ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
    489 
    490             anchor = split + forwardMatchLength;
    491 
    492             /* If we find a match that ends after the data that we've hashed
    493              * then we have a repeating, overlapping, pattern. E.g. all zeros.
    494              * If one repetition of the pattern matches our `stopMask` then all
    495              * repetitions will. We don't need to insert them all into out table,
    496              * only the first one. So skip over overlapping matches.
    497              * This is a major speed boost (20x) for compressing a single byte
    498              * repeated, when that byte ends up in the table.
    499              */
    500             if (anchor > ip + hashed) {
    501                 ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
    502                 /* Continue the outer loop at anchor (ip + hashed == anchor). */
    503                 ip = anchor - hashed;
    504                 break;
    505             }
    506         }
    507 
    508         ip += hashed;
    509     }
    510 
    511     return iend - anchor;
    512 }
    513 
    514 /*! ZSTD_ldm_reduceTable() :
    515  *  reduce table indexes by `reducerValue` */
    516 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
    517                                  U32 const reducerValue)
    518 {
    519     U32 u;
    520     for (u = 0; u < size; u++) {
    521         if (table[u].offset < reducerValue) table[u].offset = 0;
    522         else table[u].offset -= reducerValue;
    523     }
    524 }
    525 
    526 size_t ZSTD_ldm_generateSequences(
    527         ldmState_t* ldmState, RawSeqStore_t* sequences,
    528         ldmParams_t const* params, void const* src, size_t srcSize)
    529 {
    530     U32 const maxDist = 1U << params->windowLog;
    531     BYTE const* const istart = (BYTE const*)src;
    532     BYTE const* const iend = istart + srcSize;
    533     size_t const kMaxChunkSize = 1 << 20;
    534     size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
    535     size_t chunk;
    536     size_t leftoverSize = 0;
    537 
    538     assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
    539     /* Check that ZSTD_window_update() has been called for this chunk prior
    540      * to passing it to this function.
    541      */
    542     assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
    543     /* The input could be very large (in zstdmt), so it must be broken up into
    544      * chunks to enforce the maximum distance and handle overflow correction.
    545      */
    546     assert(sequences->pos <= sequences->size);
    547     assert(sequences->size <= sequences->capacity);
    548     for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
    549         BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
    550         size_t const remaining = (size_t)(iend - chunkStart);
    551         BYTE const *const chunkEnd =
    552             (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
    553         size_t const chunkSize = chunkEnd - chunkStart;
    554         size_t newLeftoverSize;
    555         size_t const prevSize = sequences->size;
    556 
    557         assert(chunkStart < iend);
    558         /* 1. Perform overflow correction if necessary. */
    559         if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
    560             U32 const ldmHSize = 1U << params->hashLog;
    561             U32 const correction = ZSTD_window_correctOverflow(
    562                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
    563             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
    564             /* invalidate dictionaries on overflow correction */
    565             ldmState->loadedDictEnd = 0;
    566         }
    567         /* 2. We enforce the maximum offset allowed.
    568          *
    569          * kMaxChunkSize should be small enough that we don't lose too much of
    570          * the window through early invalidation.
    571          * TODO: * Test the chunk size.
    572          *       * Try invalidation after the sequence generation and test the
    573          *         offset against maxDist directly.
    574          *
    575          * NOTE: Because of dictionaries + sequence splitting we MUST make sure
    576          * that any offset used is valid at the END of the sequence, since it may
    577          * be split into two sequences. This condition holds when using
    578          * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
    579          * against maxDist directly, we'll have to carefully handle that case.
    580          */
    581         ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
    582         /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
    583         newLeftoverSize = ZSTD_ldm_generateSequences_internal(
    584             ldmState, sequences, params, chunkStart, chunkSize);
    585         if (ZSTD_isError(newLeftoverSize))
    586             return newLeftoverSize;
    587         /* 4. We add the leftover literals from previous iterations to the first
    588          *    newly generated sequence, or add the `newLeftoverSize` if none are
    589          *    generated.
    590          */
    591         /* Prepend the leftover literals from the last call */
    592         if (prevSize < sequences->size) {
    593             sequences->seq[prevSize].litLength += (U32)leftoverSize;
    594             leftoverSize = newLeftoverSize;
    595         } else {
    596             assert(newLeftoverSize == chunkSize);
    597             leftoverSize += chunkSize;
    598         }
    599     }
    600     return 0;
    601 }
    602 
    603 void
    604 ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
    605 {
    606     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
    607         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
    608         if (srcSize <= seq->litLength) {
    609             /* Skip past srcSize literals */
    610             seq->litLength -= (U32)srcSize;
    611             return;
    612         }
    613         srcSize -= seq->litLength;
    614         seq->litLength = 0;
    615         if (srcSize < seq->matchLength) {
    616             /* Skip past the first srcSize of the match */
    617             seq->matchLength -= (U32)srcSize;
    618             if (seq->matchLength < minMatch) {
    619                 /* The match is too short, omit it */
    620                 if (rawSeqStore->pos + 1 < rawSeqStore->size) {
    621                     seq[1].litLength += seq[0].matchLength;
    622                 }
    623                 rawSeqStore->pos++;
    624             }
    625             return;
    626         }
    627         srcSize -= seq->matchLength;
    628         seq->matchLength = 0;
    629         rawSeqStore->pos++;
    630     }
    631 }
    632 
    633 /**
    634  * If the sequence length is longer than remaining then the sequence is split
    635  * between this block and the next.
    636  *
    637  * Returns the current sequence to handle, or if the rest of the block should
    638  * be literals, it returns a sequence with offset == 0.
    639  */
    640 static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore,
    641                                  U32 const remaining, U32 const minMatch)
    642 {
    643     rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
    644     assert(sequence.offset > 0);
    645     /* Likely: No partial sequence */
    646     if (remaining >= sequence.litLength + sequence.matchLength) {
    647         rawSeqStore->pos++;
    648         return sequence;
    649     }
    650     /* Cut the sequence short (offset == 0 ==> rest is literals). */
    651     if (remaining <= sequence.litLength) {
    652         sequence.offset = 0;
    653     } else if (remaining < sequence.litLength + sequence.matchLength) {
    654         sequence.matchLength = remaining - sequence.litLength;
    655         if (sequence.matchLength < minMatch) {
    656             sequence.offset = 0;
    657         }
    658     }
    659     /* Skip past `remaining` bytes for the future sequences. */
    660     ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
    661     return sequence;
    662 }
    663 
    664 void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) {
    665     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
    666     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
    667         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
    668         if (currPos >= currSeq.litLength + currSeq.matchLength) {
    669             currPos -= currSeq.litLength + currSeq.matchLength;
    670             rawSeqStore->pos++;
    671         } else {
    672             rawSeqStore->posInSequence = currPos;
    673             break;
    674         }
    675     }
    676     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
    677         rawSeqStore->posInSequence = 0;
    678     }
    679 }
    680 
    681 size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
    682     ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    683     ZSTD_ParamSwitch_e useRowMatchFinder,
    684     void const* src, size_t srcSize)
    685 {
    686     const ZSTD_compressionParameters* const cParams = &ms->cParams;
    687     unsigned const minMatch = cParams->minMatch;
    688     ZSTD_BlockCompressor_f const blockCompressor =
    689         ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
    690     /* Input bounds */
    691     BYTE const* const istart = (BYTE const*)src;
    692     BYTE const* const iend = istart + srcSize;
    693     /* Input positions */
    694     BYTE const* ip = istart;
    695 
    696     DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
    697     /* If using opt parser, use LDMs only as candidates rather than always accepting them */
    698     if (cParams->strategy >= ZSTD_btopt) {
    699         size_t lastLLSize;
    700         ms->ldmSeqStore = rawSeqStore;
    701         lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
    702         ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
    703         return lastLLSize;
    704     }
    705 
    706     assert(rawSeqStore->pos <= rawSeqStore->size);
    707     assert(rawSeqStore->size <= rawSeqStore->capacity);
    708     /* Loop through each sequence and apply the block compressor to the literals */
    709     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
    710         /* maybeSplitSequence updates rawSeqStore->pos */
    711         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
    712                                                    (U32)(iend - ip), minMatch);
    713         /* End signal */
    714         if (sequence.offset == 0)
    715             break;
    716 
    717         assert(ip + sequence.litLength + sequence.matchLength <= iend);
    718 
    719         /* Fill tables for block compressor */
    720         ZSTD_ldm_limitTableUpdate(ms, ip);
    721         ZSTD_ldm_fillFastTables(ms, ip);
    722         /* Run the block compressor */
    723         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
    724         {
    725             int i;
    726             size_t const newLitLength =
    727                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
    728             ip += sequence.litLength;
    729             /* Update the repcodes */
    730             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
    731                 rep[i] = rep[i-1];
    732             rep[0] = sequence.offset;
    733             /* Store the sequence */
    734             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
    735                           OFFSET_TO_OFFBASE(sequence.offset),
    736                           sequence.matchLength);
    737             ip += sequence.matchLength;
    738         }
    739     }
    740     /* Fill the tables for the block compressor */
    741     ZSTD_ldm_limitTableUpdate(ms, ip);
    742     ZSTD_ldm_fillFastTables(ms, ip);
    743     /* Compress the last literals */
    744     return blockCompressor(ms, seqStore, rep, ip, iend - ip);
    745 }
    746