Home | History | Annotate | Line # | Download | only in compress
zstd_double_fast.c revision 1.1
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 #include "zstd_compress_internal.h"
     12 #include "zstd_double_fast.h"
     13 
     14 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
     15 
     16 static
     17 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
     18 void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
     19                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
     20 {
     21     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     22     U32* const hashLarge = ms->hashTable;
     23     U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
     24     U32  const mls = cParams->minMatch;
     25     U32* const hashSmall = ms->chainTable;
     26     U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
     27     const BYTE* const base = ms->window.base;
     28     const BYTE* ip = base + ms->nextToUpdate;
     29     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
     30     const U32 fastHashFillStep = 3;
     31 
     32     /* Always insert every fastHashFillStep position into the hash tables.
     33      * Insert the other positions into the large hash table if their entry
     34      * is empty.
     35      */
     36     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
     37         U32 const curr = (U32)(ip - base);
     38         U32 i;
     39         for (i = 0; i < fastHashFillStep; ++i) {
     40             size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
     41             size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
     42             if (i == 0) {
     43                 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
     44             }
     45             if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
     46                 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
     47             }
     48             /* Only load extra positions for ZSTD_dtlm_full */
     49             if (dtlm == ZSTD_dtlm_fast)
     50                 break;
     51     }   }
     52 }
     53 
     54 static
     55 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
     56 void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
     57                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
     58 {
     59     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     60     U32* const hashLarge = ms->hashTable;
     61     U32  const hBitsL = cParams->hashLog;
     62     U32  const mls = cParams->minMatch;
     63     U32* const hashSmall = ms->chainTable;
     64     U32  const hBitsS = cParams->chainLog;
     65     const BYTE* const base = ms->window.base;
     66     const BYTE* ip = base + ms->nextToUpdate;
     67     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
     68     const U32 fastHashFillStep = 3;
     69 
     70     /* Always insert every fastHashFillStep position into the hash tables.
     71      * Insert the other positions into the large hash table if their entry
     72      * is empty.
     73      */
     74     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
     75         U32 const curr = (U32)(ip - base);
     76         U32 i;
     77         for (i = 0; i < fastHashFillStep; ++i) {
     78             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
     79             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
     80             if (i == 0)
     81                 hashSmall[smHash] = curr + i;
     82             if (i == 0 || hashLarge[lgHash] == 0)
     83                 hashLarge[lgHash] = curr + i;
     84             /* Only load extra positions for ZSTD_dtlm_full */
     85             if (dtlm == ZSTD_dtlm_fast)
     86                 break;
     87         }   }
     88 }
     89 
     90 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
     91                         const void* const end,
     92                         ZSTD_dictTableLoadMethod_e dtlm,
     93                         ZSTD_tableFillPurpose_e tfp)
     94 {
     95     if (tfp == ZSTD_tfp_forCDict) {
     96         ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
     97     } else {
     98         ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
     99     }
    100 }
    101 
    102 
    103 FORCE_INLINE_TEMPLATE
    104 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    105 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
    106         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    107         void const* src, size_t srcSize, U32 const mls /* template */)
    108 {
    109     ZSTD_compressionParameters const* cParams = &ms->cParams;
    110     U32* const hashLong = ms->hashTable;
    111     const U32 hBitsL = cParams->hashLog;
    112     U32* const hashSmall = ms->chainTable;
    113     const U32 hBitsS = cParams->chainLog;
    114     const BYTE* const base = ms->window.base;
    115     const BYTE* const istart = (const BYTE*)src;
    116     const BYTE* anchor = istart;
    117     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
    118     /* presumes that, if there is a dictionary, it must be using Attach mode */
    119     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
    120     const BYTE* const prefixLowest = base + prefixLowestIndex;
    121     const BYTE* const iend = istart + srcSize;
    122     const BYTE* const ilimit = iend - HASH_READ_SIZE;
    123     U32 offset_1=rep[0], offset_2=rep[1];
    124     U32 offsetSaved1 = 0, offsetSaved2 = 0;
    125 
    126     size_t mLength;
    127     U32 offset;
    128     U32 curr;
    129 
    130     /* how many positions to search before increasing step size */
    131     const size_t kStepIncr = 1 << kSearchStrength;
    132     /* the position at which to increment the step size if no match is found */
    133     const BYTE* nextStep;
    134     size_t step; /* the current step size */
    135 
    136     size_t hl0; /* the long hash at ip */
    137     size_t hl1; /* the long hash at ip1 */
    138 
    139     U32 idxl0; /* the long match index for ip */
    140     U32 idxl1; /* the long match index for ip1 */
    141 
    142     const BYTE* matchl0; /* the long match for ip */
    143     const BYTE* matchs0; /* the short match for ip */
    144     const BYTE* matchl1; /* the long match for ip1 */
    145 
    146     const BYTE* ip = istart; /* the current position */
    147     const BYTE* ip1; /* the next position */
    148 
    149     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
    150 
    151     /* init */
    152     ip += ((ip - prefixLowest) == 0);
    153     {
    154         U32 const current = (U32)(ip - base);
    155         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
    156         U32 const maxRep = current - windowLow;
    157         if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
    158         if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
    159     }
    160 
    161     /* Outer Loop: one iteration per match found and stored */
    162     while (1) {
    163         step = 1;
    164         nextStep = ip + kStepIncr;
    165         ip1 = ip + step;
    166 
    167         if (ip1 > ilimit) {
    168             goto _cleanup;
    169         }
    170 
    171         hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
    172         idxl0 = hashLong[hl0];
    173         matchl0 = base + idxl0;
    174 
    175         /* Inner Loop: one iteration per search / position */
    176         do {
    177             const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
    178             const U32 idxs0 = hashSmall[hs0];
    179             curr = (U32)(ip-base);
    180             matchs0 = base + idxs0;
    181 
    182             hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
    183 
    184             /* check noDict repcode */
    185             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
    186                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
    187                 ip++;
    188                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
    189                 goto _match_stored;
    190             }
    191 
    192             hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
    193 
    194             if (idxl0 > prefixLowestIndex) {
    195                 /* check prefix long match */
    196                 if (MEM_read64(matchl0) == MEM_read64(ip)) {
    197                     mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
    198                     offset = (U32)(ip-matchl0);
    199                     while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
    200                     goto _match_found;
    201                 }
    202             }
    203 
    204             idxl1 = hashLong[hl1];
    205             matchl1 = base + idxl1;
    206 
    207             if (idxs0 > prefixLowestIndex) {
    208                 /* check prefix short match */
    209                 if (MEM_read32(matchs0) == MEM_read32(ip)) {
    210                     goto _search_next_long;
    211                 }
    212             }
    213 
    214             if (ip1 >= nextStep) {
    215                 PREFETCH_L1(ip1 + 64);
    216                 PREFETCH_L1(ip1 + 128);
    217                 step++;
    218                 nextStep += kStepIncr;
    219             }
    220             ip = ip1;
    221             ip1 += step;
    222 
    223             hl0 = hl1;
    224             idxl0 = idxl1;
    225             matchl0 = matchl1;
    226     #if defined(__aarch64__)
    227             PREFETCH_L1(ip+256);
    228     #endif
    229         } while (ip1 <= ilimit);
    230 
    231 _cleanup:
    232         /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
    233          * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
    234         offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
    235 
    236         /* save reps for next block */
    237         rep[0] = offset_1 ? offset_1 : offsetSaved1;
    238         rep[1] = offset_2 ? offset_2 : offsetSaved2;
    239 
    240         /* Return the last literals size */
    241         return (size_t)(iend - anchor);
    242 
    243 _search_next_long:
    244 
    245         /* check prefix long +1 match */
    246         if (idxl1 > prefixLowestIndex) {
    247             if (MEM_read64(matchl1) == MEM_read64(ip1)) {
    248                 ip = ip1;
    249                 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
    250                 offset = (U32)(ip-matchl1);
    251                 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
    252                 goto _match_found;
    253             }
    254         }
    255 
    256         /* if no long +1 match, explore the short match we found */
    257         mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
    258         offset = (U32)(ip - matchs0);
    259         while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
    260 
    261         /* fall-through */
    262 
    263 _match_found: /* requires ip, offset, mLength */
    264         offset_2 = offset_1;
    265         offset_1 = offset;
    266 
    267         if (step < 4) {
    268             /* It is unsafe to write this value back to the hashtable when ip1 is
    269              * greater than or equal to the new ip we will have after we're done
    270              * processing this match. Rather than perform that test directly
    271              * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
    272              * more predictable test. The minmatch even if we take a short match is
    273              * 4 bytes, so as long as step, the distance between ip and ip1
    274              * (initially) is less than 4, we know ip1 < new ip. */
    275             hashLong[hl1] = (U32)(ip1 - base);
    276         }
    277 
    278         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
    279 
    280 _match_stored:
    281         /* match found */
    282         ip += mLength;
    283         anchor = ip;
    284 
    285         if (ip <= ilimit) {
    286             /* Complementary insertion */
    287             /* done after iLimit test, as candidates could be > iend-8 */
    288             {   U32 const indexToInsert = curr+2;
    289                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
    290                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
    291                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
    292                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
    293             }
    294 
    295             /* check immediate repcode */
    296             while ( (ip <= ilimit)
    297                  && ( (offset_2>0)
    298                     & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
    299                 /* store sequence */
    300                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
    301                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
    302                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
    303                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
    304                 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
    305                 ip += rLength;
    306                 anchor = ip;
    307                 continue;   /* faster when present ... (?) */
    308             }
    309         }
    310     }
    311 }
    312 
    313 
    314 FORCE_INLINE_TEMPLATE
    315 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    316 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
    317         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    318         void const* src, size_t srcSize,
    319         U32 const mls /* template */)
    320 {
    321     ZSTD_compressionParameters const* cParams = &ms->cParams;
    322     U32* const hashLong = ms->hashTable;
    323     const U32 hBitsL = cParams->hashLog;
    324     U32* const hashSmall = ms->chainTable;
    325     const U32 hBitsS = cParams->chainLog;
    326     const BYTE* const base = ms->window.base;
    327     const BYTE* const istart = (const BYTE*)src;
    328     const BYTE* ip = istart;
    329     const BYTE* anchor = istart;
    330     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
    331     /* presumes that, if there is a dictionary, it must be using Attach mode */
    332     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
    333     const BYTE* const prefixLowest = base + prefixLowestIndex;
    334     const BYTE* const iend = istart + srcSize;
    335     const BYTE* const ilimit = iend - HASH_READ_SIZE;
    336     U32 offset_1=rep[0], offset_2=rep[1];
    337 
    338     const ZSTD_matchState_t* const dms = ms->dictMatchState;
    339     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
    340     const U32* const dictHashLong  = dms->hashTable;
    341     const U32* const dictHashSmall = dms->chainTable;
    342     const U32 dictStartIndex       = dms->window.dictLimit;
    343     const BYTE* const dictBase     = dms->window.base;
    344     const BYTE* const dictStart    = dictBase + dictStartIndex;
    345     const BYTE* const dictEnd      = dms->window.nextSrc;
    346     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
    347     const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
    348     const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
    349     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
    350 
    351     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
    352 
    353     /* if a dictionary is attached, it must be within window range */
    354     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
    355 
    356     if (ms->prefetchCDictTables) {
    357         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
    358         size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
    359         PREFETCH_AREA(dictHashLong, hashTableBytes);
    360         PREFETCH_AREA(dictHashSmall, chainTableBytes);
    361     }
    362 
    363     /* init */
    364     ip += (dictAndPrefixLength == 0);
    365 
    366     /* dictMatchState repCode checks don't currently handle repCode == 0
    367      * disabling. */
    368     assert(offset_1 <= dictAndPrefixLength);
    369     assert(offset_2 <= dictAndPrefixLength);
    370 
    371     /* Main Search Loop */
    372     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
    373         size_t mLength;
    374         U32 offset;
    375         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
    376         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
    377         size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
    378         size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
    379         U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
    380         U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
    381         int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
    382         int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
    383         U32 const curr = (U32)(ip-base);
    384         U32 const matchIndexL = hashLong[h2];
    385         U32 matchIndexS = hashSmall[h];
    386         const BYTE* matchLong = base + matchIndexL;
    387         const BYTE* match = base + matchIndexS;
    388         const U32 repIndex = curr + 1 - offset_1;
    389         const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
    390                                dictBase + (repIndex - dictIndexDelta) :
    391                                base + repIndex;
    392         hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
    393 
    394         /* check repcode */
    395         if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
    396             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
    397             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
    398             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
    399             ip++;
    400             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
    401             goto _match_stored;
    402         }
    403 
    404         if (matchIndexL > prefixLowestIndex) {
    405             /* check prefix long match */
    406             if (MEM_read64(matchLong) == MEM_read64(ip)) {
    407                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
    408                 offset = (U32)(ip-matchLong);
    409                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
    410                 goto _match_found;
    411             }
    412         } else if (dictTagsMatchL) {
    413             /* check dictMatchState long match */
    414             U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
    415             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
    416             assert(dictMatchL < dictEnd);
    417 
    418             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
    419                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
    420                 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
    421                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
    422                 goto _match_found;
    423         }   }
    424 
    425         if (matchIndexS > prefixLowestIndex) {
    426             /* check prefix short match */
    427             if (MEM_read32(match) == MEM_read32(ip)) {
    428                 goto _search_next_long;
    429             }
    430         } else if (dictTagsMatchS) {
    431             /* check dictMatchState short match */
    432             U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
    433             match = dictBase + dictMatchIndexS;
    434             matchIndexS = dictMatchIndexS + dictIndexDelta;
    435 
    436             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
    437                 goto _search_next_long;
    438         }   }
    439 
    440         ip += ((ip-anchor) >> kSearchStrength) + 1;
    441 #if defined(__aarch64__)
    442         PREFETCH_L1(ip+256);
    443 #endif
    444         continue;
    445 
    446 _search_next_long:
    447         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
    448             size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
    449             U32 const matchIndexL3 = hashLong[hl3];
    450             U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
    451             int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
    452             const BYTE* matchL3 = base + matchIndexL3;
    453             hashLong[hl3] = curr + 1;
    454 
    455             /* check prefix long +1 match */
    456             if (matchIndexL3 > prefixLowestIndex) {
    457                 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
    458                     mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
    459                     ip++;
    460                     offset = (U32)(ip-matchL3);
    461                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
    462                     goto _match_found;
    463                 }
    464             } else if (dictTagsMatchL3) {
    465                 /* check dict long +1 match */
    466                 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
    467                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
    468                 assert(dictMatchL3 < dictEnd);
    469                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
    470                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
    471                     ip++;
    472                     offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
    473                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
    474                     goto _match_found;
    475         }   }   }
    476 
    477         /* if no long +1 match, explore the short match we found */
    478         if (matchIndexS < prefixLowestIndex) {
    479             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
    480             offset = (U32)(curr - matchIndexS);
    481             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
    482         } else {
    483             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
    484             offset = (U32)(ip - match);
    485             while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
    486         }
    487 
    488 _match_found:
    489         offset_2 = offset_1;
    490         offset_1 = offset;
    491 
    492         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
    493 
    494 _match_stored:
    495         /* match found */
    496         ip += mLength;
    497         anchor = ip;
    498 
    499         if (ip <= ilimit) {
    500             /* Complementary insertion */
    501             /* done after iLimit test, as candidates could be > iend-8 */
    502             {   U32 const indexToInsert = curr+2;
    503                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
    504                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
    505                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
    506                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
    507             }
    508 
    509             /* check immediate repcode */
    510             while (ip <= ilimit) {
    511                 U32 const current2 = (U32)(ip-base);
    512                 U32 const repIndex2 = current2 - offset_2;
    513                 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
    514                         dictBase + repIndex2 - dictIndexDelta :
    515                         base + repIndex2;
    516                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
    517                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
    518                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
    519                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
    520                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
    521                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
    522                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
    523                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
    524                     ip += repLength2;
    525                     anchor = ip;
    526                     continue;
    527                 }
    528                 break;
    529             }
    530         }
    531     }   /* while (ip < ilimit) */
    532 
    533     /* save reps for next block */
    534     rep[0] = offset_1;
    535     rep[1] = offset_2;
    536 
    537     /* Return the last literals size */
    538     return (size_t)(iend - anchor);
    539 }
    540 
    541 #define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
    542     static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
    543             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
    544             void const* src, size_t srcSize)                                                             \
    545     {                                                                                                    \
    546         return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
    547     }
    548 
    549 ZSTD_GEN_DFAST_FN(noDict, 4)
    550 ZSTD_GEN_DFAST_FN(noDict, 5)
    551 ZSTD_GEN_DFAST_FN(noDict, 6)
    552 ZSTD_GEN_DFAST_FN(noDict, 7)
    553 
    554 ZSTD_GEN_DFAST_FN(dictMatchState, 4)
    555 ZSTD_GEN_DFAST_FN(dictMatchState, 5)
    556 ZSTD_GEN_DFAST_FN(dictMatchState, 6)
    557 ZSTD_GEN_DFAST_FN(dictMatchState, 7)
    558 
    559 
    560 size_t ZSTD_compressBlock_doubleFast(
    561         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    562         void const* src, size_t srcSize)
    563 {
    564     const U32 mls = ms->cParams.minMatch;
    565     switch(mls)
    566     {
    567     default: /* includes case 3 */
    568     case 4 :
    569         return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
    570     case 5 :
    571         return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
    572     case 6 :
    573         return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
    574     case 7 :
    575         return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
    576     }
    577 }
    578 
    579 
    580 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
    581         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    582         void const* src, size_t srcSize)
    583 {
    584     const U32 mls = ms->cParams.minMatch;
    585     switch(mls)
    586     {
    587     default: /* includes case 3 */
    588     case 4 :
    589         return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
    590     case 5 :
    591         return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
    592     case 6 :
    593         return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
    594     case 7 :
    595         return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
    596     }
    597 }
    598 
    599 
    600 static
    601 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
    602 size_t ZSTD_compressBlock_doubleFast_extDict_generic(
    603         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    604         void const* src, size_t srcSize,
    605         U32 const mls /* template */)
    606 {
    607     ZSTD_compressionParameters const* cParams = &ms->cParams;
    608     U32* const hashLong = ms->hashTable;
    609     U32  const hBitsL = cParams->hashLog;
    610     U32* const hashSmall = ms->chainTable;
    611     U32  const hBitsS = cParams->chainLog;
    612     const BYTE* const istart = (const BYTE*)src;
    613     const BYTE* ip = istart;
    614     const BYTE* anchor = istart;
    615     const BYTE* const iend = istart + srcSize;
    616     const BYTE* const ilimit = iend - 8;
    617     const BYTE* const base = ms->window.base;
    618     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
    619     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
    620     const U32   dictStartIndex = lowLimit;
    621     const U32   dictLimit = ms->window.dictLimit;
    622     const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
    623     const BYTE* const prefixStart = base + prefixStartIndex;
    624     const BYTE* const dictBase = ms->window.dictBase;
    625     const BYTE* const dictStart = dictBase + dictStartIndex;
    626     const BYTE* const dictEnd = dictBase + prefixStartIndex;
    627     U32 offset_1=rep[0], offset_2=rep[1];
    628 
    629     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
    630 
    631     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
    632     if (prefixStartIndex == dictStartIndex)
    633         return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
    634 
    635     /* Search Loop */
    636     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
    637         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
    638         const U32 matchIndex = hashSmall[hSmall];
    639         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
    640         const BYTE* match = matchBase + matchIndex;
    641 
    642         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
    643         const U32 matchLongIndex = hashLong[hLong];
    644         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
    645         const BYTE* matchLong = matchLongBase + matchLongIndex;
    646 
    647         const U32 curr = (U32)(ip-base);
    648         const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
    649         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
    650         const BYTE* const repMatch = repBase + repIndex;
    651         size_t mLength;
    652         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
    653 
    654         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
    655             & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
    656           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
    657             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
    658             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
    659             ip++;
    660             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
    661         } else {
    662             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
    663                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
    664                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
    665                 U32 offset;
    666                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
    667                 offset = curr - matchLongIndex;
    668                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
    669                 offset_2 = offset_1;
    670                 offset_1 = offset;
    671                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
    672 
    673             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
    674                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
    675                 U32 const matchIndex3 = hashLong[h3];
    676                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
    677                 const BYTE* match3 = match3Base + matchIndex3;
    678                 U32 offset;
    679                 hashLong[h3] = curr + 1;
    680                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
    681                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
    682                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
    683                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
    684                     ip++;
    685                     offset = curr+1 - matchIndex3;
    686                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
    687                 } else {
    688                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
    689                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
    690                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
    691                     offset = curr - matchIndex;
    692                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
    693                 }
    694                 offset_2 = offset_1;
    695                 offset_1 = offset;
    696                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
    697 
    698             } else {
    699                 ip += ((ip-anchor) >> kSearchStrength) + 1;
    700                 continue;
    701         }   }
    702 
    703         /* move to next sequence start */
    704         ip += mLength;
    705         anchor = ip;
    706 
    707         if (ip <= ilimit) {
    708             /* Complementary insertion */
    709             /* done after iLimit test, as candidates could be > iend-8 */
    710             {   U32 const indexToInsert = curr+2;
    711                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
    712                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
    713                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
    714                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
    715             }
    716 
    717             /* check immediate repcode */
    718             while (ip <= ilimit) {
    719                 U32 const current2 = (U32)(ip-base);
    720                 U32 const repIndex2 = current2 - offset_2;
    721                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
    722                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
    723                     & (offset_2 <= current2 - dictStartIndex))
    724                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
    725                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
    726                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
    727                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
    728                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
    729                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
    730                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
    731                     ip += repLength2;
    732                     anchor = ip;
    733                     continue;
    734                 }
    735                 break;
    736     }   }   }
    737 
    738     /* save reps for next block */
    739     rep[0] = offset_1;
    740     rep[1] = offset_2;
    741 
    742     /* Return the last literals size */
    743     return (size_t)(iend - anchor);
    744 }
    745 
    746 ZSTD_GEN_DFAST_FN(extDict, 4)
    747 ZSTD_GEN_DFAST_FN(extDict, 5)
    748 ZSTD_GEN_DFAST_FN(extDict, 6)
    749 ZSTD_GEN_DFAST_FN(extDict, 7)
    750 
    751 size_t ZSTD_compressBlock_doubleFast_extDict(
    752         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    753         void const* src, size_t srcSize)
    754 {
    755     U32 const mls = ms->cParams.minMatch;
    756     switch(mls)
    757     {
    758     default: /* includes case 3 */
    759     case 4 :
    760         return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
    761     case 5 :
    762         return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
    763     case 6 :
    764         return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
    765     case 7 :
    766         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
    767     }
    768 }
    769 
    770 #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
    771