Home | History | Annotate | Line # | Download | only in compress
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #include "zstd_compress_internal.h"
     12 #include "hist.h"
     13 #include "zstd_opt.h"
     14 
     15 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
     16  || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
     17  || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
     18 
     19 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
     20 #define ZSTD_MAX_PRICE     (1<<30)
     21 
     22 #define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
     23 
     24 
     25 /*-*************************************
     26 *  Price functions for optimal parser
     27 ***************************************/
     28 
     29 #if 0    /* approximation at bit level (for tests) */
     30 #  define BITCOST_ACCURACY 0
     31 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
     32 #  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
     33 #elif 0  /* fractional bit accuracy (for tests) */
     34 #  define BITCOST_ACCURACY 8
     35 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
     36 #  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
     37 #else    /* opt==approx, ultra==accurate */
     38 #  define BITCOST_ACCURACY 8
     39 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
     40 #  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
     41 #endif
     42 
     43 /* ZSTD_bitWeight() :
     44  * provide estimated "cost" of a stat in full bits only */
     45 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
     46 {
     47     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
     48 }
     49 
     50 /* ZSTD_fracWeight() :
     51  * provide fractional-bit "cost" of a stat,
     52  * using linear interpolation approximation */
     53 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
     54 {
     55     U32 const stat = rawStat + 1;
     56     U32 const hb = ZSTD_highbit32(stat);
     57     U32 const BWeight = hb * BITCOST_MULTIPLIER;
     58     /* Fweight was meant for "Fractional weight"
     59      * but it's effectively a value between 1 and 2
     60      * using fixed point arithmetic */
     61     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
     62     U32 const weight = BWeight + FWeight;
     63     assert(hb + BITCOST_ACCURACY < 31);
     64     return weight;
     65 }
     66 
     67 #if (DEBUGLEVEL>=2)
     68 /* debugging function,
     69  * @return price in bytes as fractional value
     70  * for debug messages only */
     71 MEM_STATIC double ZSTD_fCost(int price)
     72 {
     73     return (double)price / (BITCOST_MULTIPLIER*8);
     74 }
     75 #endif
     76 
     77 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
     78 {
     79     return optPtr->literalCompressionMode != ZSTD_ps_disable;
     80 }
     81 
     82 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
     83 {
     84     if (ZSTD_compressedLiterals(optPtr))
     85         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
     86     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
     87     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
     88     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
     89 }
     90 
     91 
     92 static U32 sum_u32(const unsigned table[], size_t nbElts)
     93 {
     94     size_t n;
     95     U32 total = 0;
     96     for (n=0; n<nbElts; n++) {
     97         total += table[n];
     98     }
     99     return total;
    100 }
    101 
    102 typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
    103 
    104 static U32
    105 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
    106 {
    107     U32 s, sum=0;
    108     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
    109             (unsigned)lastEltIndex+1, (unsigned)shift );
    110     assert(shift < 30);
    111     for (s=0; s<lastEltIndex+1; s++) {
    112         unsigned const base = base1 ? 1 : (table[s]>0);
    113         unsigned const newStat = base + (table[s] >> shift);
    114         sum += newStat;
    115         table[s] = newStat;
    116     }
    117     return sum;
    118 }
    119 
    120 /* ZSTD_scaleStats() :
    121  * reduce all elt frequencies in table if sum too large
    122  * return the resulting sum of elements */
    123 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
    124 {
    125     U32 const prevsum = sum_u32(table, lastEltIndex+1);
    126     U32 const factor = prevsum >> logTarget;
    127     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
    128     assert(logTarget < 30);
    129     if (factor <= 1) return prevsum;
    130     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
    131 }
    132 
    133 /* ZSTD_rescaleFreqs() :
    134  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
    135  *    take hints from dictionary if there is one
    136  *    and init from zero if there is none,
    137  *    using src for literals stats, and baseline stats for sequence symbols
    138  * otherwise downscale existing stats, to be used as seed for next block.
    139  */
    140 static void
    141 ZSTD_rescaleFreqs(optState_t* const optPtr,
    142             const BYTE* const src, size_t const srcSize,
    143                   int const optLevel)
    144 {
    145     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
    146     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
    147     optPtr->priceType = zop_dynamic;
    148 
    149     if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
    150 
    151         /* heuristic: use pre-defined stats for too small inputs */
    152         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
    153             DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
    154             optPtr->priceType = zop_predef;
    155         }
    156 
    157         assert(optPtr->symbolCosts != NULL);
    158         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
    159 
    160             /* huffman stats covering the full value set : table presumed generated by dictionary */
    161             optPtr->priceType = zop_dynamic;
    162 
    163             if (compressedLiterals) {
    164                 /* generate literals statistics from huffman table */
    165                 unsigned lit;
    166                 assert(optPtr->litFreq != NULL);
    167                 optPtr->litSum = 0;
    168                 for (lit=0; lit<=MaxLit; lit++) {
    169                     U32 const scaleLog = 11;   /* scale to 2K */
    170                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
    171                     assert(bitCost <= scaleLog);
    172                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
    173                     optPtr->litSum += optPtr->litFreq[lit];
    174             }   }
    175 
    176             {   unsigned ll;
    177                 FSE_CState_t llstate;
    178                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
    179                 optPtr->litLengthSum = 0;
    180                 for (ll=0; ll<=MaxLL; ll++) {
    181                     U32 const scaleLog = 10;   /* scale to 1K */
    182                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
    183                     assert(bitCost < scaleLog);
    184                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
    185                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
    186             }   }
    187 
    188             {   unsigned ml;
    189                 FSE_CState_t mlstate;
    190                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
    191                 optPtr->matchLengthSum = 0;
    192                 for (ml=0; ml<=MaxML; ml++) {
    193                     U32 const scaleLog = 10;
    194                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
    195                     assert(bitCost < scaleLog);
    196                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
    197                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
    198             }   }
    199 
    200             {   unsigned of;
    201                 FSE_CState_t ofstate;
    202                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
    203                 optPtr->offCodeSum = 0;
    204                 for (of=0; of<=MaxOff; of++) {
    205                     U32 const scaleLog = 10;
    206                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
    207                     assert(bitCost < scaleLog);
    208                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
    209                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
    210             }   }
    211 
    212         } else {  /* first block, no dictionary */
    213 
    214             assert(optPtr->litFreq != NULL);
    215             if (compressedLiterals) {
    216                 /* base initial cost of literals on direct frequency within src */
    217                 unsigned lit = MaxLit;
    218                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
    219                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
    220             }
    221 
    222             {   unsigned const baseLLfreqs[MaxLL+1] = {
    223                     4, 2, 1, 1, 1, 1, 1, 1,
    224                     1, 1, 1, 1, 1, 1, 1, 1,
    225                     1, 1, 1, 1, 1, 1, 1, 1,
    226                     1, 1, 1, 1, 1, 1, 1, 1,
    227                     1, 1, 1, 1
    228                 };
    229                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
    230                 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
    231             }
    232 
    233             {   unsigned ml;
    234                 for (ml=0; ml<=MaxML; ml++)
    235                     optPtr->matchLengthFreq[ml] = 1;
    236             }
    237             optPtr->matchLengthSum = MaxML+1;
    238 
    239             {   unsigned const baseOFCfreqs[MaxOff+1] = {
    240                     6, 2, 1, 1, 2, 3, 4, 4,
    241                     4, 3, 2, 1, 1, 1, 1, 1,
    242                     1, 1, 1, 1, 1, 1, 1, 1,
    243                     1, 1, 1, 1, 1, 1, 1, 1
    244                 };
    245                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
    246                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
    247             }
    248 
    249         }
    250 
    251     } else {   /* new block : scale down accumulated statistics */
    252 
    253         if (compressedLiterals)
    254             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
    255         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
    256         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
    257         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
    258     }
    259 
    260     ZSTD_setBasePrices(optPtr, optLevel);
    261 }
    262 
    263 /* ZSTD_rawLiteralsCost() :
    264  * price of literals (only) in specified segment (which length can be 0).
    265  * does not include price of literalLength symbol */
    266 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
    267                                 const optState_t* const optPtr,
    268                                 int optLevel)
    269 {
    270     DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
    271     if (litLength == 0) return 0;
    272 
    273     if (!ZSTD_compressedLiterals(optPtr))
    274         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
    275 
    276     if (optPtr->priceType == zop_predef)
    277         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
    278 
    279     /* dynamic statistics */
    280     {   U32 price = optPtr->litSumBasePrice * litLength;
    281         U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
    282         U32 u;
    283         assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
    284         for (u=0; u < litLength; u++) {
    285             U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
    286             if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
    287             price -= litPrice;
    288         }
    289         return price;
    290     }
    291 }
    292 
    293 /* ZSTD_litLengthPrice() :
    294  * cost of literalLength symbol */
    295 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
    296 {
    297     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
    298     if (optPtr->priceType == zop_predef)
    299         return WEIGHT(litLength, optLevel);
    300 
    301     /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
    302      * because it isn't representable in the zstd format.
    303      * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
    304      * In such a case, the block would be all literals.
    305      */
    306     if (litLength == ZSTD_BLOCKSIZE_MAX)
    307         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
    308 
    309     /* dynamic statistics */
    310     {   U32 const llCode = ZSTD_LLcode(litLength);
    311         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
    312              + optPtr->litLengthSumBasePrice
    313              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
    314     }
    315 }
    316 
    317 /* ZSTD_getMatchPrice() :
    318  * Provides the cost of the match part (offset + matchLength) of a sequence.
    319  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
    320  * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
    321  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
    322  */
    323 FORCE_INLINE_TEMPLATE U32
    324 ZSTD_getMatchPrice(U32 const offBase,
    325                    U32 const matchLength,
    326              const optState_t* const optPtr,
    327                    int const optLevel)
    328 {
    329     U32 price;
    330     U32 const offCode = ZSTD_highbit32(offBase);
    331     U32 const mlBase = matchLength - MINMATCH;
    332     assert(matchLength >= MINMATCH);
    333 
    334     if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
    335         return WEIGHT(mlBase, optLevel)
    336              + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
    337 
    338     /* dynamic statistics */
    339     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
    340     if ((optLevel<2) /*static*/ && offCode >= 20)
    341         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
    342 
    343     /* match Length */
    344     {   U32 const mlCode = ZSTD_MLcode(mlBase);
    345         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
    346     }
    347 
    348     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
    349 
    350     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
    351     return price;
    352 }
    353 
    354 /* ZSTD_updateStats() :
    355  * assumption : literals + litLength <= iend */
    356 static void ZSTD_updateStats(optState_t* const optPtr,
    357                              U32 litLength, const BYTE* literals,
    358                              U32 offBase, U32 matchLength)
    359 {
    360     /* literals */
    361     if (ZSTD_compressedLiterals(optPtr)) {
    362         U32 u;
    363         for (u=0; u < litLength; u++)
    364             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
    365         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
    366     }
    367 
    368     /* literal Length */
    369     {   U32 const llCode = ZSTD_LLcode(litLength);
    370         optPtr->litLengthFreq[llCode]++;
    371         optPtr->litLengthSum++;
    372     }
    373 
    374     /* offset code : follows storeSeq() numeric representation */
    375     {   U32 const offCode = ZSTD_highbit32(offBase);
    376         assert(offCode <= MaxOff);
    377         optPtr->offCodeFreq[offCode]++;
    378         optPtr->offCodeSum++;
    379     }
    380 
    381     /* match Length */
    382     {   U32 const mlBase = matchLength - MINMATCH;
    383         U32 const mlCode = ZSTD_MLcode(mlBase);
    384         optPtr->matchLengthFreq[mlCode]++;
    385         optPtr->matchLengthSum++;
    386     }
    387 }
    388 
    389 
    390 /* ZSTD_readMINMATCH() :
    391  * function safe only for comparisons
    392  * assumption : memPtr must be at least 4 bytes before end of buffer */
    393 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
    394 {
    395     switch (length)
    396     {
    397     default :
    398     case 4 : return MEM_read32(memPtr);
    399     case 3 : if (MEM_isLittleEndian())
    400                 return MEM_read32(memPtr)<<8;
    401              else
    402                 return MEM_read32(memPtr)>>8;
    403     }
    404 }
    405 
    406 
    407 /* Update hashTable3 up to ip (excluded)
    408    Assumption : always within prefix (i.e. not within extDict) */
    409 static
    410 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    411 U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
    412                                        U32* nextToUpdate3,
    413                                        const BYTE* const ip)
    414 {
    415     U32* const hashTable3 = ms->hashTable3;
    416     U32 const hashLog3 = ms->hashLog3;
    417     const BYTE* const base = ms->window.base;
    418     U32 idx = *nextToUpdate3;
    419     U32 const target = (U32)(ip - base);
    420     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
    421     assert(hashLog3 > 0);
    422 
    423     while(idx < target) {
    424         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
    425         idx++;
    426     }
    427 
    428     *nextToUpdate3 = target;
    429     return hashTable3[hash3];
    430 }
    431 
    432 
    433 /*-*************************************
    434 *  Binary Tree search
    435 ***************************************/
    436 /** ZSTD_insertBt1() : add one or multiple positions to tree.
    437  * @param ip assumed <= iend-8 .
    438  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
    439  * @return : nb of positions added */
    440 static
    441 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    442 U32 ZSTD_insertBt1(
    443                 const ZSTD_MatchState_t* ms,
    444                 const BYTE* const ip, const BYTE* const iend,
    445                 U32 const target,
    446                 U32 const mls, const int extDict)
    447 {
    448     const ZSTD_compressionParameters* const cParams = &ms->cParams;
    449     U32*   const hashTable = ms->hashTable;
    450     U32    const hashLog = cParams->hashLog;
    451     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
    452     U32*   const bt = ms->chainTable;
    453     U32    const btLog  = cParams->chainLog - 1;
    454     U32    const btMask = (1 << btLog) - 1;
    455     U32 matchIndex = hashTable[h];
    456     size_t commonLengthSmaller=0, commonLengthLarger=0;
    457     const BYTE* const base = ms->window.base;
    458     const BYTE* const dictBase = ms->window.dictBase;
    459     const U32 dictLimit = ms->window.dictLimit;
    460     const BYTE* const dictEnd = dictBase + dictLimit;
    461     const BYTE* const prefixStart = base + dictLimit;
    462     const BYTE* match;
    463     const U32 curr = (U32)(ip-base);
    464     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
    465     U32* smallerPtr = bt + 2*(curr&btMask);
    466     U32* largerPtr  = smallerPtr + 1;
    467     U32 dummy32;   /* to be nullified at the end */
    468     /* windowLow is based on target because
    469      * we only need positions that will be in the window at the end of the tree update.
    470      */
    471     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
    472     U32 matchEndIdx = curr+8+1;
    473     size_t bestLength = 8;
    474     U32 nbCompares = 1U << cParams->searchLog;
    475 #ifdef ZSTD_C_PREDICT
    476     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
    477     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
    478     predictedSmall += (predictedSmall>0);
    479     predictedLarge += (predictedLarge>0);
    480 #endif /* ZSTD_C_PREDICT */
    481 
    482     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
    483 
    484     assert(curr <= target);
    485     assert(ip <= iend-8);   /* required for h calculation */
    486     hashTable[h] = curr;   /* Update Hash Table */
    487 
    488     assert(windowLow > 0);
    489     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
    490         U32* const nextPtr = bt + 2*(matchIndex & btMask);
    491         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    492         assert(matchIndex < curr);
    493 
    494 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
    495         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
    496         if (matchIndex == predictedSmall) {
    497             /* no need to check length, result known */
    498             *smallerPtr = matchIndex;
    499             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    500             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
    501             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
    502             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
    503             continue;
    504         }
    505         if (matchIndex == predictedLarge) {
    506             *largerPtr = matchIndex;
    507             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    508             largerPtr = nextPtr;
    509             matchIndex = nextPtr[0];
    510             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
    511             continue;
    512         }
    513 #endif
    514 
    515         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
    516             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
    517             match = base + matchIndex;
    518             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
    519         } else {
    520             match = dictBase + matchIndex;
    521             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
    522             if (matchIndex+matchLength >= dictLimit)
    523                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
    524         }
    525 
    526         if (matchLength > bestLength) {
    527             bestLength = matchLength;
    528             if (matchLength > matchEndIdx - matchIndex)
    529                 matchEndIdx = matchIndex + (U32)matchLength;
    530         }
    531 
    532         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
    533             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
    534         }
    535 
    536         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
    537             /* match is smaller than current */
    538             *smallerPtr = matchIndex;             /* update smaller idx */
    539             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    540             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
    541             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
    542             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
    543         } else {
    544             /* match is larger than current */
    545             *largerPtr = matchIndex;
    546             commonLengthLarger = matchLength;
    547             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
    548             largerPtr = nextPtr;
    549             matchIndex = nextPtr[0];
    550     }   }
    551 
    552     *smallerPtr = *largerPtr = 0;
    553     {   U32 positions = 0;
    554         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
    555         assert(matchEndIdx > curr + 8);
    556         return MAX(positions, matchEndIdx - (curr + 8));
    557     }
    558 }
    559 
    560 FORCE_INLINE_TEMPLATE
    561 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    562 void ZSTD_updateTree_internal(
    563                 ZSTD_MatchState_t* ms,
    564                 const BYTE* const ip, const BYTE* const iend,
    565                 const U32 mls, const ZSTD_dictMode_e dictMode)
    566 {
    567     const BYTE* const base = ms->window.base;
    568     U32 const target = (U32)(ip - base);
    569     U32 idx = ms->nextToUpdate;
    570     DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
    571                 idx, target, dictMode);
    572 
    573     while(idx < target) {
    574         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
    575         assert(idx < (U32)(idx + forward));
    576         idx += forward;
    577     }
    578     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
    579     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
    580     ms->nextToUpdate = target;
    581 }
    582 
    583 void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
    584     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
    585 }
    586 
    587 FORCE_INLINE_TEMPLATE
    588 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    589 U32
    590 ZSTD_insertBtAndGetAllMatches (
    591                 ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
    592                 ZSTD_MatchState_t* ms,
    593                 U32* nextToUpdate3,
    594                 const BYTE* const ip, const BYTE* const iLimit,
    595                 const ZSTD_dictMode_e dictMode,
    596                 const U32 rep[ZSTD_REP_NUM],
    597                 const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
    598                 const U32 lengthToBeat,
    599                 const U32 mls /* template */)
    600 {
    601     const ZSTD_compressionParameters* const cParams = &ms->cParams;
    602     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
    603     const BYTE* const base = ms->window.base;
    604     U32 const curr = (U32)(ip-base);
    605     U32 const hashLog = cParams->hashLog;
    606     U32 const minMatch = (mls==3) ? 3 : 4;
    607     U32* const hashTable = ms->hashTable;
    608     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
    609     U32 matchIndex  = hashTable[h];
    610     U32* const bt   = ms->chainTable;
    611     U32 const btLog = cParams->chainLog - 1;
    612     U32 const btMask= (1U << btLog) - 1;
    613     size_t commonLengthSmaller=0, commonLengthLarger=0;
    614     const BYTE* const dictBase = ms->window.dictBase;
    615     U32 const dictLimit = ms->window.dictLimit;
    616     const BYTE* const dictEnd = dictBase + dictLimit;
    617     const BYTE* const prefixStart = base + dictLimit;
    618     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
    619     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
    620     U32 const matchLow = windowLow ? windowLow : 1;
    621     U32* smallerPtr = bt + 2*(curr&btMask);
    622     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
    623     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
    624     U32 dummy32;   /* to be nullified at the end */
    625     U32 mnum = 0;
    626     U32 nbCompares = 1U << cParams->searchLog;
    627 
    628     const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
    629     const ZSTD_compressionParameters* const dmsCParams =
    630                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
    631     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
    632     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
    633     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
    634     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
    635     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
    636     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
    637     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
    638     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
    639     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
    640 
    641     size_t bestLength = lengthToBeat-1;
    642     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
    643 
    644     /* check repCode */
    645     assert(ll0 <= 1);   /* necessarily 1 or 0 */
    646     {   U32 const lastR = ZSTD_REP_NUM + ll0;
    647         U32 repCode;
    648         for (repCode = ll0; repCode < lastR; repCode++) {
    649             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
    650             U32 const repIndex = curr - repOffset;
    651             U32 repLen = 0;
    652             assert(curr >= dictLimit);
    653             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
    654                 /* We must validate the repcode offset because when we're using a dictionary the
    655                  * valid offset range shrinks when the dictionary goes out of bounds.
    656                  */
    657                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
    658                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
    659                 }
    660             } else {  /* repIndex < dictLimit || repIndex >= curr */
    661                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
    662                                              dmsBase + repIndex - dmsIndexDelta :
    663                                              dictBase + repIndex;
    664                 assert(curr >= windowLow);
    665                 if ( dictMode == ZSTD_extDict
    666                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
    667                      & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
    668                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
    669                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
    670                 }
    671                 if (dictMode == ZSTD_dictMatchState
    672                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
    673                      & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
    674                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
    675                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
    676             }   }
    677             /* save longer solution */
    678             if (repLen > bestLength) {
    679                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
    680                             repCode, ll0, repOffset, repLen);
    681                 bestLength = repLen;
    682                 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
    683                 matches[mnum].len = (U32)repLen;
    684                 mnum++;
    685                 if ( (repLen > sufficient_len)
    686                    | (ip+repLen == iLimit) ) {  /* best possible */
    687                     return mnum;
    688     }   }   }   }
    689 
    690     /* HC3 match finder */
    691     if ((mls == 3) /*static*/ && (bestLength < mls)) {
    692         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
    693         if ((matchIndex3 >= matchLow)
    694           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
    695             size_t mlen;
    696             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
    697                 const BYTE* const match = base + matchIndex3;
    698                 mlen = ZSTD_count(ip, match, iLimit);
    699             } else {
    700                 const BYTE* const match = dictBase + matchIndex3;
    701                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
    702             }
    703 
    704             /* save best solution */
    705             if (mlen >= mls /* == 3 > bestLength */) {
    706                 DEBUGLOG(8, "found small match with hlog3, of length %u",
    707                             (U32)mlen);
    708                 bestLength = mlen;
    709                 assert(curr > matchIndex3);
    710                 assert(mnum==0);  /* no prior solution */
    711                 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
    712                 matches[0].len = (U32)mlen;
    713                 mnum = 1;
    714                 if ( (mlen > sufficient_len) |
    715                      (ip+mlen == iLimit) ) {  /* best possible length */
    716                     ms->nextToUpdate = curr+1;  /* skip insertion */
    717                     return 1;
    718         }   }   }
    719         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
    720     }  /* if (mls == 3) */
    721 
    722     hashTable[h] = curr;   /* Update Hash Table */
    723 
    724     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
    725         U32* const nextPtr = bt + 2*(matchIndex & btMask);
    726         const BYTE* match;
    727         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    728         assert(curr > matchIndex);
    729 
    730         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
    731             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
    732             match = base + matchIndex;
    733             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
    734             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
    735         } else {
    736             match = dictBase + matchIndex;
    737             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
    738             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
    739             if (matchIndex+matchLength >= dictLimit)
    740                 match = base + matchIndex;   /* prepare for match[matchLength] read */
    741         }
    742 
    743         if (matchLength > bestLength) {
    744             DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
    745                     (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
    746             assert(matchEndIdx > matchIndex);
    747             if (matchLength > matchEndIdx - matchIndex)
    748                 matchEndIdx = matchIndex + (U32)matchLength;
    749             bestLength = matchLength;
    750             matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
    751             matches[mnum].len = (U32)matchLength;
    752             mnum++;
    753             if ( (matchLength > ZSTD_OPT_NUM)
    754                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
    755                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
    756                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
    757         }   }
    758 
    759         if (match[matchLength] < ip[matchLength]) {
    760             /* match smaller than current */
    761             *smallerPtr = matchIndex;             /* update smaller idx */
    762             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    763             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    764             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
    765             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
    766         } else {
    767             *largerPtr = matchIndex;
    768             commonLengthLarger = matchLength;
    769             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    770             largerPtr = nextPtr;
    771             matchIndex = nextPtr[0];
    772     }   }
    773 
    774     *smallerPtr = *largerPtr = 0;
    775 
    776     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
    777     if (dictMode == ZSTD_dictMatchState && nbCompares) {
    778         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
    779         U32 dictMatchIndex = dms->hashTable[dmsH];
    780         const U32* const dmsBt = dms->chainTable;
    781         commonLengthSmaller = commonLengthLarger = 0;
    782         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
    783             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
    784             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    785             const BYTE* match = dmsBase + dictMatchIndex;
    786             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
    787             if (dictMatchIndex+matchLength >= dmsHighLimit)
    788                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
    789 
    790             if (matchLength > bestLength) {
    791                 matchIndex = dictMatchIndex + dmsIndexDelta;
    792                 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
    793                         (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
    794                 if (matchLength > matchEndIdx - matchIndex)
    795                     matchEndIdx = matchIndex + (U32)matchLength;
    796                 bestLength = matchLength;
    797                 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
    798                 matches[mnum].len = (U32)matchLength;
    799                 mnum++;
    800                 if ( (matchLength > ZSTD_OPT_NUM)
    801                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
    802                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
    803             }   }
    804 
    805             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
    806             if (match[matchLength] < ip[matchLength]) {
    807                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    808                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
    809             } else {
    810                 /* match is larger than current */
    811                 commonLengthLarger = matchLength;
    812                 dictMatchIndex = nextPtr[0];
    813     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
    814 
    815     assert(matchEndIdx > curr+8);
    816     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
    817     return mnum;
    818 }
    819 
    820 typedef U32 (*ZSTD_getAllMatchesFn)(
    821     ZSTD_match_t*,
    822     ZSTD_MatchState_t*,
    823     U32*,
    824     const BYTE*,
    825     const BYTE*,
    826     const U32 rep[ZSTD_REP_NUM],
    827     U32 const ll0,
    828     U32 const lengthToBeat);
    829 
    830 FORCE_INLINE_TEMPLATE
    831 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    832 U32 ZSTD_btGetAllMatches_internal(
    833         ZSTD_match_t* matches,
    834         ZSTD_MatchState_t* ms,
    835         U32* nextToUpdate3,
    836         const BYTE* ip,
    837         const BYTE* const iHighLimit,
    838         const U32 rep[ZSTD_REP_NUM],
    839         U32 const ll0,
    840         U32 const lengthToBeat,
    841         const ZSTD_dictMode_e dictMode,
    842         const U32 mls)
    843 {
    844     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
    845     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
    846     if (ip < ms->window.base + ms->nextToUpdate)
    847         return 0;   /* skipped area */
    848     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
    849     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
    850 }
    851 
    852 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
    853 
    854 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
    855     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
    856             ZSTD_match_t* matches,                             \
    857             ZSTD_MatchState_t* ms,                             \
    858             U32* nextToUpdate3,                                \
    859             const BYTE* ip,                                    \
    860             const BYTE* const iHighLimit,                      \
    861             const U32 rep[ZSTD_REP_NUM],                       \
    862             U32 const ll0,                                     \
    863             U32 const lengthToBeat)                            \
    864     {                                                          \
    865         return ZSTD_btGetAllMatches_internal(                  \
    866                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
    867                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
    868     }
    869 
    870 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
    871     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
    872     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
    873     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
    874     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
    875 
    876 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
    877 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
    878 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
    879 
    880 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
    881     {                                            \
    882         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
    883         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
    884         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
    885         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
    886     }
    887 
    888 static ZSTD_getAllMatchesFn
    889 ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
    890 {
    891     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
    892         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
    893         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
    894         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
    895     };
    896     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
    897     assert((U32)dictMode < 3);
    898     assert(mls - 3 < 4);
    899     return getAllMatchesFns[(int)dictMode][mls - 3];
    900 }
    901 
    902 /*************************
    903 *  LDM helper functions  *
    904 *************************/
    905 
    906 /* Struct containing info needed to make decision about ldm inclusion */
    907 typedef struct {
    908     RawSeqStore_t seqStore;   /* External match candidates store for this block */
    909     U32 startPosInBlock;      /* Start position of the current match candidate */
    910     U32 endPosInBlock;        /* End position of the current match candidate */
    911     U32 offset;               /* Offset of the match candidate */
    912 } ZSTD_optLdm_t;
    913 
    914 /* ZSTD_optLdm_skipRawSeqStoreBytes():
    915  * Moves forward in @rawSeqStore by @nbBytes,
    916  * which will update the fields 'pos' and 'posInSequence'.
    917  */
    918 static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
    919 {
    920     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
    921     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
    922         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
    923         if (currPos >= currSeq.litLength + currSeq.matchLength) {
    924             currPos -= currSeq.litLength + currSeq.matchLength;
    925             rawSeqStore->pos++;
    926         } else {
    927             rawSeqStore->posInSequence = currPos;
    928             break;
    929         }
    930     }
    931     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
    932         rawSeqStore->posInSequence = 0;
    933     }
    934 }
    935 
    936 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
    937  * Calculates the beginning and end of the next match in the current block.
    938  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
    939  */
    940 static void
    941 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
    942                                        U32 blockBytesRemaining)
    943 {
    944     rawSeq currSeq;
    945     U32 currBlockEndPos;
    946     U32 literalsBytesRemaining;
    947     U32 matchBytesRemaining;
    948 
    949     /* Setting match end position to MAX to ensure we never use an LDM during this block */
    950     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
    951         optLdm->startPosInBlock = UINT_MAX;
    952         optLdm->endPosInBlock = UINT_MAX;
    953         return;
    954     }
    955     /* Calculate appropriate bytes left in matchLength and litLength
    956      * after adjusting based on ldmSeqStore->posInSequence */
    957     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
    958     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
    959     currBlockEndPos = currPosInBlock + blockBytesRemaining;
    960     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
    961             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
    962             0;
    963     matchBytesRemaining = (literalsBytesRemaining == 0) ?
    964             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
    965             currSeq.matchLength;
    966 
    967     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
    968     if (literalsBytesRemaining >= blockBytesRemaining) {
    969         optLdm->startPosInBlock = UINT_MAX;
    970         optLdm->endPosInBlock = UINT_MAX;
    971         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
    972         return;
    973     }
    974 
    975     /* Matches may be < minMatch by this process. In that case, we will reject them
    976        when we are deciding whether or not to add the ldm */
    977     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
    978     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
    979     optLdm->offset = currSeq.offset;
    980 
    981     if (optLdm->endPosInBlock > currBlockEndPos) {
    982         /* Match ends after the block ends, we can't use the whole match */
    983         optLdm->endPosInBlock = currBlockEndPos;
    984         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
    985     } else {
    986         /* Consume nb of bytes equal to size of sequence left */
    987         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
    988     }
    989 }
    990 
    991 /* ZSTD_optLdm_maybeAddMatch():
    992  * Adds a match if it's long enough,
    993  * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
    994  * into 'matches'. Maintains the correct ordering of 'matches'.
    995  */
    996 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
    997                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
    998                                       U32 minMatch)
    999 {
   1000     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
   1001     /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
   1002     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
   1003 
   1004     /* Ensure that current block position is not outside of the match */
   1005     if (currPosInBlock < optLdm->startPosInBlock
   1006       || currPosInBlock >= optLdm->endPosInBlock
   1007       || candidateMatchLength < minMatch) {
   1008         return;
   1009     }
   1010 
   1011     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
   1012         U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
   1013         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
   1014                  candidateOffBase, candidateMatchLength, currPosInBlock);
   1015         matches[*nbMatches].len = candidateMatchLength;
   1016         matches[*nbMatches].off = candidateOffBase;
   1017         (*nbMatches)++;
   1018     }
   1019 }
   1020 
   1021 /* ZSTD_optLdm_processMatchCandidate():
   1022  * Wrapper function to update ldm seq store and call ldm functions as necessary.
   1023  */
   1024 static void
   1025 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
   1026                                   ZSTD_match_t* matches, U32* nbMatches,
   1027                                   U32 currPosInBlock, U32 remainingBytes,
   1028                                   U32 minMatch)
   1029 {
   1030     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
   1031         return;
   1032     }
   1033 
   1034     if (currPosInBlock >= optLdm->endPosInBlock) {
   1035         if (currPosInBlock > optLdm->endPosInBlock) {
   1036             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
   1037              * at the end of a match from the ldm seq store, and will often be some bytes
   1038              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
   1039              */
   1040             U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
   1041             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
   1042         }
   1043         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
   1044     }
   1045     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
   1046 }
   1047 
   1048 
   1049 /*-*******************************
   1050 *  Optimal parser
   1051 *********************************/
   1052 
   1053 #if 0 /* debug */
   1054 
   1055 static void
   1056 listStats(const U32* table, int lastEltID)
   1057 {
   1058     int const nbElts = lastEltID + 1;
   1059     int enb;
   1060     for (enb=0; enb < nbElts; enb++) {
   1061         (void)table;
   1062         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
   1063         RAWLOG(2, "%4i,", table[enb]);
   1064     }
   1065     RAWLOG(2, " \n");
   1066 }
   1067 
   1068 #endif
   1069 
   1070 #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
   1071 #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
   1072 #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
   1073 
   1074 FORCE_INLINE_TEMPLATE
   1075 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
   1076 size_t
   1077 ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
   1078                                SeqStore_t* seqStore,
   1079                                U32 rep[ZSTD_REP_NUM],
   1080                          const void* src, size_t srcSize,
   1081                          const int optLevel,
   1082                          const ZSTD_dictMode_e dictMode)
   1083 {
   1084     optState_t* const optStatePtr = &ms->opt;
   1085     const BYTE* const istart = (const BYTE*)src;
   1086     const BYTE* ip = istart;
   1087     const BYTE* anchor = istart;
   1088     const BYTE* const iend = istart + srcSize;
   1089     const BYTE* const ilimit = iend - 8;
   1090     const BYTE* const base = ms->window.base;
   1091     const BYTE* const prefixStart = base + ms->window.dictLimit;
   1092     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   1093 
   1094     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
   1095 
   1096     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
   1097     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
   1098     U32 nextToUpdate3 = ms->nextToUpdate;
   1099 
   1100     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
   1101     ZSTD_match_t* const matches = optStatePtr->matchTable;
   1102     ZSTD_optimal_t lastStretch;
   1103     ZSTD_optLdm_t optLdm;
   1104 
   1105     ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
   1106 
   1107     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
   1108     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
   1109     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
   1110 
   1111     /* init */
   1112     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
   1113                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
   1114     assert(optLevel <= 2);
   1115     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
   1116     ip += (ip==prefixStart);
   1117 
   1118     /* Match Loop */
   1119     while (ip < ilimit) {
   1120         U32 cur, last_pos = 0;
   1121 
   1122         /* find first match */
   1123         {   U32 const litlen = (U32)(ip - anchor);
   1124             U32 const ll0 = !litlen;
   1125             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
   1126             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
   1127                                               (U32)(ip-istart), (U32)(iend-ip),
   1128                                               minMatch);
   1129             if (!nbMatches) {
   1130                 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
   1131                 ip++;
   1132                 continue;
   1133             }
   1134 
   1135             /* Match found: let's store this solution, and eventually find more candidates.
   1136              * During this forward pass, @opt is used to store stretches,
   1137              * defined as "a match followed by N literals".
   1138              * Note how this is different from a Sequence, which is "N literals followed by a match".
   1139              * Storing stretches allows us to store different match predecessors
   1140              * for each literal position part of a literals run. */
   1141 
   1142             /* initialize opt[0] */
   1143             opt[0].mlen = 0;  /* there are only literals so far */
   1144             opt[0].litlen = litlen;
   1145             /* No need to include the actual price of the literals before the first match
   1146              * because it is static for the duration of the forward pass, and is included
   1147              * in every subsequent price. But, we include the literal length because
   1148              * the cost variation of litlen depends on the value of litlen.
   1149              */
   1150             opt[0].price = LL_PRICE(litlen);
   1151             ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
   1152             ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
   1153 
   1154             /* large match -> immediate encoding */
   1155             {   U32 const maxML = matches[nbMatches-1].len;
   1156                 U32 const maxOffBase = matches[nbMatches-1].off;
   1157                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
   1158                             nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
   1159 
   1160                 if (maxML > sufficient_len) {
   1161                     lastStretch.litlen = 0;
   1162                     lastStretch.mlen = maxML;
   1163                     lastStretch.off = maxOffBase;
   1164                     DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
   1165                                 maxML, sufficient_len);
   1166                     cur = 0;
   1167                     last_pos = maxML;
   1168                     goto _shortestPath;
   1169             }   }
   1170 
   1171             /* set prices for first matches starting position == 0 */
   1172             assert(opt[0].price >= 0);
   1173             {   U32 pos;
   1174                 U32 matchNb;
   1175                 for (pos = 1; pos < minMatch; pos++) {
   1176                     opt[pos].price = ZSTD_MAX_PRICE;
   1177                     opt[pos].mlen = 0;
   1178                     opt[pos].litlen = litlen + pos;
   1179                 }
   1180                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
   1181                     U32 const offBase = matches[matchNb].off;
   1182                     U32 const end = matches[matchNb].len;
   1183                     for ( ; pos <= end ; pos++ ) {
   1184                         int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
   1185                         int const sequencePrice = opt[0].price + matchPrice;
   1186                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
   1187                                     pos, ZSTD_fCost(sequencePrice));
   1188                         opt[pos].mlen = pos;
   1189                         opt[pos].off = offBase;
   1190                         opt[pos].litlen = 0; /* end of match */
   1191                         opt[pos].price = sequencePrice + LL_PRICE(0);
   1192                     }
   1193                 }
   1194                 last_pos = pos-1;
   1195                 opt[pos].price = ZSTD_MAX_PRICE;
   1196             }
   1197         }
   1198 
   1199         /* check further positions */
   1200         for (cur = 1; cur <= last_pos; cur++) {
   1201             const BYTE* const inr = ip + cur;
   1202             assert(cur <= ZSTD_OPT_NUM);
   1203             DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
   1204 
   1205             /* Fix current position with one literal if cheaper */
   1206             {   U32 const litlen = opt[cur-1].litlen + 1;
   1207                 int const price = opt[cur-1].price
   1208                                 + LIT_PRICE(ip+cur-1)
   1209                                 + LL_INCPRICE(litlen);
   1210                 assert(price < 1000000000); /* overflow check */
   1211                 if (price <= opt[cur].price) {
   1212                     ZSTD_optimal_t const prevMatch = opt[cur];
   1213                     DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
   1214                                 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
   1215                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
   1216                     opt[cur] = opt[cur-1];
   1217                     opt[cur].litlen = litlen;
   1218                     opt[cur].price = price;
   1219                     if ( (optLevel >= 1) /* additional check only for higher modes */
   1220                       && (prevMatch.litlen == 0) /* replace a match */
   1221                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
   1222                       && LIKELY(ip + cur < iend)
   1223                     ) {
   1224                         /* check next position, in case it would be cheaper */
   1225                         int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
   1226                         int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
   1227                         DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
   1228                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
   1229                         if ( (with1literal < withMoreLiterals)
   1230                           && (with1literal < opt[cur+1].price) ) {
   1231                             /* update offset history - before it disappears */
   1232                             U32 const prev = cur - prevMatch.mlen;
   1233                             Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
   1234                             assert(cur >= prevMatch.mlen);
   1235                             DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
   1236                                         ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
   1237                                         newReps.rep[0], newReps.rep[1], newReps.rep[2] );
   1238                             opt[cur+1] = prevMatch;  /* mlen & offbase */
   1239                             ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
   1240                             opt[cur+1].litlen = 1;
   1241                             opt[cur+1].price = with1literal;
   1242                             if (last_pos < cur+1) last_pos = cur+1;
   1243                         }
   1244                     }
   1245                 } else {
   1246                     DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
   1247                                 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
   1248                 }
   1249             }
   1250 
   1251             /* Offset history is not updated during match comparison.
   1252              * Do it here, now that the match is selected and confirmed.
   1253              */
   1254             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
   1255             assert(cur >= opt[cur].mlen);
   1256             if (opt[cur].litlen == 0) {
   1257                 /* just finished a match => alter offset history */
   1258                 U32 const prev = cur - opt[cur].mlen;
   1259                 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
   1260                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
   1261             }
   1262 
   1263             /* last match must start at a minimum distance of 8 from oend */
   1264             if (inr > ilimit) continue;
   1265 
   1266             if (cur == last_pos) break;
   1267 
   1268             if ( (optLevel==0) /*static_test*/
   1269               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
   1270                 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
   1271                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
   1272             }
   1273 
   1274             assert(opt[cur].price >= 0);
   1275             {   U32 const ll0 = (opt[cur].litlen == 0);
   1276                 int const previousPrice = opt[cur].price;
   1277                 int const basePrice = previousPrice + LL_PRICE(0);
   1278                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
   1279                 U32 matchNb;
   1280 
   1281                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
   1282                                                   (U32)(inr-istart), (U32)(iend-inr),
   1283                                                   minMatch);
   1284 
   1285                 if (!nbMatches) {
   1286                     DEBUGLOG(7, "rPos:%u : no match found", cur);
   1287                     continue;
   1288                 }
   1289 
   1290                 {   U32 const longestML = matches[nbMatches-1].len;
   1291                     DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
   1292                                 (int)(inr-istart), cur, nbMatches, longestML);
   1293 
   1294                     if ( (longestML > sufficient_len)
   1295                       || (cur + longestML >= ZSTD_OPT_NUM)
   1296                       || (ip + cur + longestML >= iend) ) {
   1297                         lastStretch.mlen = longestML;
   1298                         lastStretch.off = matches[nbMatches-1].off;
   1299                         lastStretch.litlen = 0;
   1300                         last_pos = cur + longestML;
   1301                         goto _shortestPath;
   1302                 }   }
   1303 
   1304                 /* set prices using matches found at position == cur */
   1305                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
   1306                     U32 const offset = matches[matchNb].off;
   1307                     U32 const lastML = matches[matchNb].len;
   1308                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
   1309                     U32 mlen;
   1310 
   1311                     DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
   1312                                 matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
   1313 
   1314                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
   1315                         U32 const pos = cur + mlen;
   1316                         int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
   1317 
   1318                         if ((pos > last_pos) || (price < opt[pos].price)) {
   1319                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
   1320                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
   1321                             while (last_pos < pos) {
   1322                                 /* fill empty positions, for future comparisons */
   1323                                 last_pos++;
   1324                                 opt[last_pos].price = ZSTD_MAX_PRICE;
   1325                                 opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
   1326                             }
   1327                             opt[pos].mlen = mlen;
   1328                             opt[pos].off = offset;
   1329                             opt[pos].litlen = 0;
   1330                             opt[pos].price = price;
   1331                         } else {
   1332                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
   1333                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
   1334                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
   1335                         }
   1336             }   }   }
   1337             opt[last_pos+1].price = ZSTD_MAX_PRICE;
   1338         }  /* for (cur = 1; cur <= last_pos; cur++) */
   1339 
   1340         lastStretch = opt[last_pos];
   1341         assert(cur >= lastStretch.mlen);
   1342         cur = last_pos - lastStretch.mlen;
   1343 
   1344 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
   1345         assert(opt[0].mlen == 0);
   1346         assert(last_pos >= lastStretch.mlen);
   1347         assert(cur == last_pos - lastStretch.mlen);
   1348 
   1349         if (lastStretch.mlen==0) {
   1350             /* no solution : all matches have been converted into literals */
   1351             assert(lastStretch.litlen == (ip - anchor) + last_pos);
   1352             ip += last_pos;
   1353             continue;
   1354         }
   1355         assert(lastStretch.off > 0);
   1356 
   1357         /* Update offset history */
   1358         if (lastStretch.litlen == 0) {
   1359             /* finishing on a match : update offset history */
   1360             Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
   1361             ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
   1362         } else {
   1363             ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
   1364             assert(cur >= lastStretch.litlen);
   1365             cur -= lastStretch.litlen;
   1366         }
   1367 
   1368         /* Let's write the shortest path solution.
   1369          * It is stored in @opt in reverse order,
   1370          * starting from @storeEnd (==cur+2),
   1371          * effectively partially @opt overwriting.
   1372          * Content is changed too:
   1373          * - So far, @opt stored stretches, aka a match followed by literals
   1374          * - Now, it will store sequences, aka literals followed by a match
   1375          */
   1376         {   U32 const storeEnd = cur + 2;
   1377             U32 storeStart = storeEnd;
   1378             U32 stretchPos = cur;
   1379 
   1380             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
   1381                         last_pos, cur); (void)last_pos;
   1382             assert(storeEnd < ZSTD_OPT_SIZE);
   1383             DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
   1384                         storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
   1385             if (lastStretch.litlen > 0) {
   1386                 /* last "sequence" is unfinished: just a bunch of literals */
   1387                 opt[storeEnd].litlen = lastStretch.litlen;
   1388                 opt[storeEnd].mlen = 0;
   1389                 storeStart = storeEnd-1;
   1390                 opt[storeStart] = lastStretch;
   1391             } {
   1392                 opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
   1393                 storeStart = storeEnd;
   1394             }
   1395             while (1) {
   1396                 ZSTD_optimal_t nextStretch = opt[stretchPos];
   1397                 opt[storeStart].litlen = nextStretch.litlen;
   1398                 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
   1399                             opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
   1400                 if (nextStretch.mlen == 0) {
   1401                     /* reaching beginning of segment */
   1402                     break;
   1403                 }
   1404                 storeStart--;
   1405                 opt[storeStart] = nextStretch; /* note: litlen will be fixed */
   1406                 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
   1407                 stretchPos -= nextStretch.litlen + nextStretch.mlen;
   1408             }
   1409 
   1410             /* save sequences */
   1411             DEBUGLOG(6, "sending selected sequences into seqStore");
   1412             {   U32 storePos;
   1413                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
   1414                     U32 const llen = opt[storePos].litlen;
   1415                     U32 const mlen = opt[storePos].mlen;
   1416                     U32 const offBase = opt[storePos].off;
   1417                     U32 const advance = llen + mlen;
   1418                     DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
   1419                                 (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
   1420 
   1421                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
   1422                         assert(storePos == storeEnd);   /* must be last sequence */
   1423                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
   1424                         continue;   /* will finish */
   1425                     }
   1426 
   1427                     assert(anchor + llen <= iend);
   1428                     ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
   1429                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
   1430                     anchor += advance;
   1431                     ip = anchor;
   1432             }   }
   1433             DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
   1434 
   1435             /* update all costs */
   1436             ZSTD_setBasePrices(optStatePtr, optLevel);
   1437         }
   1438     }   /* while (ip < ilimit) */
   1439 
   1440     /* Return the last literals size */
   1441     return (size_t)(iend - anchor);
   1442 }
   1443 #endif /* build exclusions */
   1444 
   1445 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
   1446 static size_t ZSTD_compressBlock_opt0(
   1447         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1448         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
   1449 {
   1450     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
   1451 }
   1452 #endif
   1453 
   1454 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
   1455 static size_t ZSTD_compressBlock_opt2(
   1456         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1457         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
   1458 {
   1459     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
   1460 }
   1461 #endif
   1462 
   1463 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
   1464 size_t ZSTD_compressBlock_btopt(
   1465         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1466         const void* src, size_t srcSize)
   1467 {
   1468     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
   1469     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
   1470 }
   1471 #endif
   1472 
   1473 
   1474 
   1475 
   1476 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
   1477 /* ZSTD_initStats_ultra():
   1478  * make a first compression pass, just to seed stats with more accurate starting values.
   1479  * only works on first block, with no dictionary and no ldm.
   1480  * this function cannot error out, its narrow contract must be respected.
   1481  */
   1482 static
   1483 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
   1484 void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
   1485                           SeqStore_t* seqStore,
   1486                           U32 rep[ZSTD_REP_NUM],
   1487                     const void* src, size_t srcSize)
   1488 {
   1489     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
   1490     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
   1491 
   1492     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
   1493     assert(ms->opt.litLengthSum == 0);    /* first block */
   1494     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
   1495     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
   1496     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
   1497 
   1498     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
   1499 
   1500     /* invalidate first scan from history, only keep entropy stats */
   1501     ZSTD_resetSeqStore(seqStore);
   1502     ms->window.base -= srcSize;
   1503     ms->window.dictLimit += (U32)srcSize;
   1504     ms->window.lowLimit = ms->window.dictLimit;
   1505     ms->nextToUpdate = ms->window.dictLimit;
   1506 
   1507 }
   1508 
   1509 size_t ZSTD_compressBlock_btultra(
   1510         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1511         const void* src, size_t srcSize)
   1512 {
   1513     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
   1514     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
   1515 }
   1516 
   1517 size_t ZSTD_compressBlock_btultra2(
   1518         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1519         const void* src, size_t srcSize)
   1520 {
   1521     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
   1522     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
   1523 
   1524     /* 2-passes strategy:
   1525      * this strategy makes a first pass over first block to collect statistics
   1526      * in order to seed next round's statistics with it.
   1527      * After 1st pass, function forgets history, and starts a new block.
   1528      * Consequently, this can only work if no data has been previously loaded in tables,
   1529      * aka, no dictionary, no prefix, no ldm preprocessing.
   1530      * The compression ratio gain is generally small (~0.5% on first block),
   1531      * the cost is 2x cpu time on first block. */
   1532     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
   1533     if ( (ms->opt.litLengthSum==0)   /* first block */
   1534       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
   1535       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
   1536       && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
   1537       && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
   1538       ) {
   1539         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
   1540     }
   1541 
   1542     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
   1543 }
   1544 #endif
   1545 
   1546 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
   1547 size_t ZSTD_compressBlock_btopt_dictMatchState(
   1548         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1549         const void* src, size_t srcSize)
   1550 {
   1551     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
   1552 }
   1553 
   1554 size_t ZSTD_compressBlock_btopt_extDict(
   1555         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1556         const void* src, size_t srcSize)
   1557 {
   1558     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
   1559 }
   1560 #endif
   1561 
   1562 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
   1563 size_t ZSTD_compressBlock_btultra_dictMatchState(
   1564         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1565         const void* src, size_t srcSize)
   1566 {
   1567     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
   1568 }
   1569 
   1570 size_t ZSTD_compressBlock_btultra_extDict(
   1571         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1572         const void* src, size_t srcSize)
   1573 {
   1574     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
   1575 }
   1576 #endif
   1577 
   1578 /* note : no btultra2 variant for extDict nor dictMatchState,
   1579  * because btultra2 is not meant to work with dictionaries
   1580  * and is only specific for the first block (no prefix) */
   1581