1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "hist.h" 13 #include "zstd_opt.h" 14 15 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 16 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 17 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 18 19 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ 20 #define ZSTD_MAX_PRICE (1<<30) 21 22 #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ 23 24 25 /*-************************************* 26 * Price functions for optimal parser 27 ***************************************/ 28 29 #if 0 /* approximation at bit level (for tests) */ 30 # define BITCOST_ACCURACY 0 31 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 32 # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) 33 #elif 0 /* fractional bit accuracy (for tests) */ 34 # define BITCOST_ACCURACY 8 35 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 36 # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) 37 #else /* opt==approx, ultra==accurate */ 38 # define BITCOST_ACCURACY 8 39 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 40 # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) 41 #endif 42 43 /* ZSTD_bitWeight() : 44 * provide estimated "cost" of a stat in full bits only */ 45 MEM_STATIC U32 ZSTD_bitWeight(U32 stat) 46 { 47 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); 48 } 49 50 /* ZSTD_fracWeight() : 51 * provide fractional-bit "cost" of a stat, 52 * using linear interpolation approximation */ 53 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) 54 { 55 U32 const stat = rawStat + 1; 56 U32 const hb = ZSTD_highbit32(stat); 57 U32 const BWeight = hb * BITCOST_MULTIPLIER; 58 /* Fweight was meant for "Fractional weight" 59 * but it's effectively a value between 1 and 2 60 * using fixed point arithmetic */ 61 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; 62 U32 const weight = BWeight + FWeight; 63 assert(hb + BITCOST_ACCURACY < 31); 64 return weight; 65 } 66 67 #if (DEBUGLEVEL>=2) 68 /* debugging function, 69 * @return price in bytes as fractional value 70 * for debug messages only */ 71 MEM_STATIC double ZSTD_fCost(int price) 72 { 73 return (double)price / (BITCOST_MULTIPLIER*8); 74 } 75 #endif 76 77 static int ZSTD_compressedLiterals(optState_t const* const optPtr) 78 { 79 return optPtr->literalCompressionMode != ZSTD_ps_disable; 80 } 81 82 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) 83 { 84 if (ZSTD_compressedLiterals(optPtr)) 85 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); 86 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); 87 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); 88 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); 89 } 90 91 92 static U32 sum_u32(const unsigned table[], size_t nbElts) 93 { 94 size_t n; 95 U32 total = 0; 96 for (n=0; n<nbElts; n++) { 97 total += table[n]; 98 } 99 return total; 100 } 101 102 typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; 103 104 static U32 105 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) 106 { 107 U32 s, sum=0; 108 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", 109 (unsigned)lastEltIndex+1, (unsigned)shift ); 110 assert(shift < 30); 111 for (s=0; s<lastEltIndex+1; s++) { 112 unsigned const base = base1 ? 1 : (table[s]>0); 113 unsigned const newStat = base + (table[s] >> shift); 114 sum += newStat; 115 table[s] = newStat; 116 } 117 return sum; 118 } 119 120 /* ZSTD_scaleStats() : 121 * reduce all elt frequencies in table if sum too large 122 * return the resulting sum of elements */ 123 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) 124 { 125 U32 const prevsum = sum_u32(table, lastEltIndex+1); 126 U32 const factor = prevsum >> logTarget; 127 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); 128 assert(logTarget < 30); 129 if (factor <= 1) return prevsum; 130 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); 131 } 132 133 /* ZSTD_rescaleFreqs() : 134 * if first block (detected by optPtr->litLengthSum == 0) : init statistics 135 * take hints from dictionary if there is one 136 * and init from zero if there is none, 137 * using src for literals stats, and baseline stats for sequence symbols 138 * otherwise downscale existing stats, to be used as seed for next block. 139 */ 140 static void 141 ZSTD_rescaleFreqs(optState_t* const optPtr, 142 const BYTE* const src, size_t const srcSize, 143 int const optLevel) 144 { 145 int const compressedLiterals = ZSTD_compressedLiterals(optPtr); 146 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); 147 optPtr->priceType = zop_dynamic; 148 149 if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ 150 151 /* heuristic: use pre-defined stats for too small inputs */ 152 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { 153 DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); 154 optPtr->priceType = zop_predef; 155 } 156 157 assert(optPtr->symbolCosts != NULL); 158 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { 159 160 /* huffman stats covering the full value set : table presumed generated by dictionary */ 161 optPtr->priceType = zop_dynamic; 162 163 if (compressedLiterals) { 164 /* generate literals statistics from huffman table */ 165 unsigned lit; 166 assert(optPtr->litFreq != NULL); 167 optPtr->litSum = 0; 168 for (lit=0; lit<=MaxLit; lit++) { 169 U32 const scaleLog = 11; /* scale to 2K */ 170 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); 171 assert(bitCost <= scaleLog); 172 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 173 optPtr->litSum += optPtr->litFreq[lit]; 174 } } 175 176 { unsigned ll; 177 FSE_CState_t llstate; 178 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); 179 optPtr->litLengthSum = 0; 180 for (ll=0; ll<=MaxLL; ll++) { 181 U32 const scaleLog = 10; /* scale to 1K */ 182 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); 183 assert(bitCost < scaleLog); 184 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 185 optPtr->litLengthSum += optPtr->litLengthFreq[ll]; 186 } } 187 188 { unsigned ml; 189 FSE_CState_t mlstate; 190 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); 191 optPtr->matchLengthSum = 0; 192 for (ml=0; ml<=MaxML; ml++) { 193 U32 const scaleLog = 10; 194 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); 195 assert(bitCost < scaleLog); 196 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 197 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; 198 } } 199 200 { unsigned of; 201 FSE_CState_t ofstate; 202 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); 203 optPtr->offCodeSum = 0; 204 for (of=0; of<=MaxOff; of++) { 205 U32 const scaleLog = 10; 206 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); 207 assert(bitCost < scaleLog); 208 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 209 optPtr->offCodeSum += optPtr->offCodeFreq[of]; 210 } } 211 212 } else { /* first block, no dictionary */ 213 214 assert(optPtr->litFreq != NULL); 215 if (compressedLiterals) { 216 /* base initial cost of literals on direct frequency within src */ 217 unsigned lit = MaxLit; 218 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ 219 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); 220 } 221 222 { unsigned const baseLLfreqs[MaxLL+1] = { 223 4, 2, 1, 1, 1, 1, 1, 1, 224 1, 1, 1, 1, 1, 1, 1, 1, 225 1, 1, 1, 1, 1, 1, 1, 1, 226 1, 1, 1, 1, 1, 1, 1, 1, 227 1, 1, 1, 1 228 }; 229 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); 230 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); 231 } 232 233 { unsigned ml; 234 for (ml=0; ml<=MaxML; ml++) 235 optPtr->matchLengthFreq[ml] = 1; 236 } 237 optPtr->matchLengthSum = MaxML+1; 238 239 { unsigned const baseOFCfreqs[MaxOff+1] = { 240 6, 2, 1, 1, 2, 3, 4, 4, 241 4, 3, 2, 1, 1, 1, 1, 1, 242 1, 1, 1, 1, 1, 1, 1, 1, 243 1, 1, 1, 1, 1, 1, 1, 1 244 }; 245 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); 246 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); 247 } 248 249 } 250 251 } else { /* new block : scale down accumulated statistics */ 252 253 if (compressedLiterals) 254 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); 255 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); 256 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); 257 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); 258 } 259 260 ZSTD_setBasePrices(optPtr, optLevel); 261 } 262 263 /* ZSTD_rawLiteralsCost() : 264 * price of literals (only) in specified segment (which length can be 0). 265 * does not include price of literalLength symbol */ 266 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, 267 const optState_t* const optPtr, 268 int optLevel) 269 { 270 DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); 271 if (litLength == 0) return 0; 272 273 if (!ZSTD_compressedLiterals(optPtr)) 274 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ 275 276 if (optPtr->priceType == zop_predef) 277 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ 278 279 /* dynamic statistics */ 280 { U32 price = optPtr->litSumBasePrice * litLength; 281 U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; 282 U32 u; 283 assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); 284 for (u=0; u < litLength; u++) { 285 U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); 286 if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; 287 price -= litPrice; 288 } 289 return price; 290 } 291 } 292 293 /* ZSTD_litLengthPrice() : 294 * cost of literalLength symbol */ 295 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) 296 { 297 assert(litLength <= ZSTD_BLOCKSIZE_MAX); 298 if (optPtr->priceType == zop_predef) 299 return WEIGHT(litLength, optLevel); 300 301 /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX 302 * because it isn't representable in the zstd format. 303 * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. 304 * In such a case, the block would be all literals. 305 */ 306 if (litLength == ZSTD_BLOCKSIZE_MAX) 307 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); 308 309 /* dynamic statistics */ 310 { U32 const llCode = ZSTD_LLcode(litLength); 311 return (LL_bits[llCode] * BITCOST_MULTIPLIER) 312 + optPtr->litLengthSumBasePrice 313 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); 314 } 315 } 316 317 /* ZSTD_getMatchPrice() : 318 * Provides the cost of the match part (offset + matchLength) of a sequence. 319 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. 320 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() 321 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) 322 */ 323 FORCE_INLINE_TEMPLATE U32 324 ZSTD_getMatchPrice(U32 const offBase, 325 U32 const matchLength, 326 const optState_t* const optPtr, 327 int const optLevel) 328 { 329 U32 price; 330 U32 const offCode = ZSTD_highbit32(offBase); 331 U32 const mlBase = matchLength - MINMATCH; 332 assert(matchLength >= MINMATCH); 333 334 if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ 335 return WEIGHT(mlBase, optLevel) 336 + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ 337 338 /* dynamic statistics */ 339 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); 340 if ((optLevel<2) /*static*/ && offCode >= 20) 341 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ 342 343 /* match Length */ 344 { U32 const mlCode = ZSTD_MLcode(mlBase); 345 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); 346 } 347 348 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ 349 350 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); 351 return price; 352 } 353 354 /* ZSTD_updateStats() : 355 * assumption : literals + litLength <= iend */ 356 static void ZSTD_updateStats(optState_t* const optPtr, 357 U32 litLength, const BYTE* literals, 358 U32 offBase, U32 matchLength) 359 { 360 /* literals */ 361 if (ZSTD_compressedLiterals(optPtr)) { 362 U32 u; 363 for (u=0; u < litLength; u++) 364 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; 365 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; 366 } 367 368 /* literal Length */ 369 { U32 const llCode = ZSTD_LLcode(litLength); 370 optPtr->litLengthFreq[llCode]++; 371 optPtr->litLengthSum++; 372 } 373 374 /* offset code : follows storeSeq() numeric representation */ 375 { U32 const offCode = ZSTD_highbit32(offBase); 376 assert(offCode <= MaxOff); 377 optPtr->offCodeFreq[offCode]++; 378 optPtr->offCodeSum++; 379 } 380 381 /* match Length */ 382 { U32 const mlBase = matchLength - MINMATCH; 383 U32 const mlCode = ZSTD_MLcode(mlBase); 384 optPtr->matchLengthFreq[mlCode]++; 385 optPtr->matchLengthSum++; 386 } 387 } 388 389 390 /* ZSTD_readMINMATCH() : 391 * function safe only for comparisons 392 * assumption : memPtr must be at least 4 bytes before end of buffer */ 393 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) 394 { 395 switch (length) 396 { 397 default : 398 case 4 : return MEM_read32(memPtr); 399 case 3 : if (MEM_isLittleEndian()) 400 return MEM_read32(memPtr)<<8; 401 else 402 return MEM_read32(memPtr)>>8; 403 } 404 } 405 406 407 /* Update hashTable3 up to ip (excluded) 408 Assumption : always within prefix (i.e. not within extDict) */ 409 static 410 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 411 U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms, 412 U32* nextToUpdate3, 413 const BYTE* const ip) 414 { 415 U32* const hashTable3 = ms->hashTable3; 416 U32 const hashLog3 = ms->hashLog3; 417 const BYTE* const base = ms->window.base; 418 U32 idx = *nextToUpdate3; 419 U32 const target = (U32)(ip - base); 420 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); 421 assert(hashLog3 > 0); 422 423 while(idx < target) { 424 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; 425 idx++; 426 } 427 428 *nextToUpdate3 = target; 429 return hashTable3[hash3]; 430 } 431 432 433 /*-************************************* 434 * Binary Tree search 435 ***************************************/ 436 /** ZSTD_insertBt1() : add one or multiple positions to tree. 437 * @param ip assumed <= iend-8 . 438 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position 439 * @return : nb of positions added */ 440 static 441 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 442 U32 ZSTD_insertBt1( 443 const ZSTD_MatchState_t* ms, 444 const BYTE* const ip, const BYTE* const iend, 445 U32 const target, 446 U32 const mls, const int extDict) 447 { 448 const ZSTD_compressionParameters* const cParams = &ms->cParams; 449 U32* const hashTable = ms->hashTable; 450 U32 const hashLog = cParams->hashLog; 451 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 452 U32* const bt = ms->chainTable; 453 U32 const btLog = cParams->chainLog - 1; 454 U32 const btMask = (1 << btLog) - 1; 455 U32 matchIndex = hashTable[h]; 456 size_t commonLengthSmaller=0, commonLengthLarger=0; 457 const BYTE* const base = ms->window.base; 458 const BYTE* const dictBase = ms->window.dictBase; 459 const U32 dictLimit = ms->window.dictLimit; 460 const BYTE* const dictEnd = dictBase + dictLimit; 461 const BYTE* const prefixStart = base + dictLimit; 462 const BYTE* match; 463 const U32 curr = (U32)(ip-base); 464 const U32 btLow = btMask >= curr ? 0 : curr - btMask; 465 U32* smallerPtr = bt + 2*(curr&btMask); 466 U32* largerPtr = smallerPtr + 1; 467 U32 dummy32; /* to be nullified at the end */ 468 /* windowLow is based on target because 469 * we only need positions that will be in the window at the end of the tree update. 470 */ 471 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); 472 U32 matchEndIdx = curr+8+1; 473 size_t bestLength = 8; 474 U32 nbCompares = 1U << cParams->searchLog; 475 #ifdef ZSTD_C_PREDICT 476 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); 477 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); 478 predictedSmall += (predictedSmall>0); 479 predictedLarge += (predictedLarge>0); 480 #endif /* ZSTD_C_PREDICT */ 481 482 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); 483 484 assert(curr <= target); 485 assert(ip <= iend-8); /* required for h calculation */ 486 hashTable[h] = curr; /* Update Hash Table */ 487 488 assert(windowLow > 0); 489 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) { 490 U32* const nextPtr = bt + 2*(matchIndex & btMask); 491 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 492 assert(matchIndex < curr); 493 494 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 495 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ 496 if (matchIndex == predictedSmall) { 497 /* no need to check length, result known */ 498 *smallerPtr = matchIndex; 499 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 500 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 501 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 502 predictedSmall = predictPtr[1] + (predictPtr[1]>0); 503 continue; 504 } 505 if (matchIndex == predictedLarge) { 506 *largerPtr = matchIndex; 507 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 508 largerPtr = nextPtr; 509 matchIndex = nextPtr[0]; 510 predictedLarge = predictPtr[0] + (predictPtr[0]>0); 511 continue; 512 } 513 #endif 514 515 if (!extDict || (matchIndex+matchLength >= dictLimit)) { 516 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ 517 match = base + matchIndex; 518 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 519 } else { 520 match = dictBase + matchIndex; 521 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 522 if (matchIndex+matchLength >= dictLimit) 523 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 524 } 525 526 if (matchLength > bestLength) { 527 bestLength = matchLength; 528 if (matchLength > matchEndIdx - matchIndex) 529 matchEndIdx = matchIndex + (U32)matchLength; 530 } 531 532 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 533 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 534 } 535 536 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 537 /* match is smaller than current */ 538 *smallerPtr = matchIndex; /* update smaller idx */ 539 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 540 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 541 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 542 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 543 } else { 544 /* match is larger than current */ 545 *largerPtr = matchIndex; 546 commonLengthLarger = matchLength; 547 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 548 largerPtr = nextPtr; 549 matchIndex = nextPtr[0]; 550 } } 551 552 *smallerPtr = *largerPtr = 0; 553 { U32 positions = 0; 554 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ 555 assert(matchEndIdx > curr + 8); 556 return MAX(positions, matchEndIdx - (curr + 8)); 557 } 558 } 559 560 FORCE_INLINE_TEMPLATE 561 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 562 void ZSTD_updateTree_internal( 563 ZSTD_MatchState_t* ms, 564 const BYTE* const ip, const BYTE* const iend, 565 const U32 mls, const ZSTD_dictMode_e dictMode) 566 { 567 const BYTE* const base = ms->window.base; 568 U32 const target = (U32)(ip - base); 569 U32 idx = ms->nextToUpdate; 570 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", 571 idx, target, dictMode); 572 573 while(idx < target) { 574 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); 575 assert(idx < (U32)(idx + forward)); 576 idx += forward; 577 } 578 assert((size_t)(ip - base) <= (size_t)(U32)(-1)); 579 assert((size_t)(iend - base) <= (size_t)(U32)(-1)); 580 ms->nextToUpdate = target; 581 } 582 583 void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) { 584 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); 585 } 586 587 FORCE_INLINE_TEMPLATE 588 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 589 U32 590 ZSTD_insertBtAndGetAllMatches ( 591 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ 592 ZSTD_MatchState_t* ms, 593 U32* nextToUpdate3, 594 const BYTE* const ip, const BYTE* const iLimit, 595 const ZSTD_dictMode_e dictMode, 596 const U32 rep[ZSTD_REP_NUM], 597 const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ 598 const U32 lengthToBeat, 599 const U32 mls /* template */) 600 { 601 const ZSTD_compressionParameters* const cParams = &ms->cParams; 602 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 603 const BYTE* const base = ms->window.base; 604 U32 const curr = (U32)(ip-base); 605 U32 const hashLog = cParams->hashLog; 606 U32 const minMatch = (mls==3) ? 3 : 4; 607 U32* const hashTable = ms->hashTable; 608 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 609 U32 matchIndex = hashTable[h]; 610 U32* const bt = ms->chainTable; 611 U32 const btLog = cParams->chainLog - 1; 612 U32 const btMask= (1U << btLog) - 1; 613 size_t commonLengthSmaller=0, commonLengthLarger=0; 614 const BYTE* const dictBase = ms->window.dictBase; 615 U32 const dictLimit = ms->window.dictLimit; 616 const BYTE* const dictEnd = dictBase + dictLimit; 617 const BYTE* const prefixStart = base + dictLimit; 618 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; 619 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); 620 U32 const matchLow = windowLow ? windowLow : 1; 621 U32* smallerPtr = bt + 2*(curr&btMask); 622 U32* largerPtr = bt + 2*(curr&btMask) + 1; 623 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ 624 U32 dummy32; /* to be nullified at the end */ 625 U32 mnum = 0; 626 U32 nbCompares = 1U << cParams->searchLog; 627 628 const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; 629 const ZSTD_compressionParameters* const dmsCParams = 630 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; 631 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; 632 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; 633 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; 634 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; 635 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; 636 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; 637 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; 638 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; 639 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; 640 641 size_t bestLength = lengthToBeat-1; 642 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); 643 644 /* check repCode */ 645 assert(ll0 <= 1); /* necessarily 1 or 0 */ 646 { U32 const lastR = ZSTD_REP_NUM + ll0; 647 U32 repCode; 648 for (repCode = ll0; repCode < lastR; repCode++) { 649 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 650 U32 const repIndex = curr - repOffset; 651 U32 repLen = 0; 652 assert(curr >= dictLimit); 653 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ 654 /* We must validate the repcode offset because when we're using a dictionary the 655 * valid offset range shrinks when the dictionary goes out of bounds. 656 */ 657 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { 658 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; 659 } 660 } else { /* repIndex < dictLimit || repIndex >= curr */ 661 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? 662 dmsBase + repIndex - dmsIndexDelta : 663 dictBase + repIndex; 664 assert(curr >= windowLow); 665 if ( dictMode == ZSTD_extDict 666 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ 667 & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) 668 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 669 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; 670 } 671 if (dictMode == ZSTD_dictMatchState 672 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ 673 & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) 674 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 675 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; 676 } } 677 /* save longer solution */ 678 if (repLen > bestLength) { 679 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", 680 repCode, ll0, repOffset, repLen); 681 bestLength = repLen; 682 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ 683 matches[mnum].len = (U32)repLen; 684 mnum++; 685 if ( (repLen > sufficient_len) 686 | (ip+repLen == iLimit) ) { /* best possible */ 687 return mnum; 688 } } } } 689 690 /* HC3 match finder */ 691 if ((mls == 3) /*static*/ && (bestLength < mls)) { 692 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); 693 if ((matchIndex3 >= matchLow) 694 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { 695 size_t mlen; 696 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { 697 const BYTE* const match = base + matchIndex3; 698 mlen = ZSTD_count(ip, match, iLimit); 699 } else { 700 const BYTE* const match = dictBase + matchIndex3; 701 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); 702 } 703 704 /* save best solution */ 705 if (mlen >= mls /* == 3 > bestLength */) { 706 DEBUGLOG(8, "found small match with hlog3, of length %u", 707 (U32)mlen); 708 bestLength = mlen; 709 assert(curr > matchIndex3); 710 assert(mnum==0); /* no prior solution */ 711 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); 712 matches[0].len = (U32)mlen; 713 mnum = 1; 714 if ( (mlen > sufficient_len) | 715 (ip+mlen == iLimit) ) { /* best possible length */ 716 ms->nextToUpdate = curr+1; /* skip insertion */ 717 return 1; 718 } } } 719 /* no dictMatchState lookup: dicts don't have a populated HC3 table */ 720 } /* if (mls == 3) */ 721 722 hashTable[h] = curr; /* Update Hash Table */ 723 724 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) { 725 U32* const nextPtr = bt + 2*(matchIndex & btMask); 726 const BYTE* match; 727 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 728 assert(curr > matchIndex); 729 730 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { 731 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ 732 match = base + matchIndex; 733 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 734 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); 735 } else { 736 match = dictBase + matchIndex; 737 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 738 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); 739 if (matchIndex+matchLength >= dictLimit) 740 match = base + matchIndex; /* prepare for match[matchLength] read */ 741 } 742 743 if (matchLength > bestLength) { 744 DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", 745 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 746 assert(matchEndIdx > matchIndex); 747 if (matchLength > matchEndIdx - matchIndex) 748 matchEndIdx = matchIndex + (U32)matchLength; 749 bestLength = matchLength; 750 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 751 matches[mnum].len = (U32)matchLength; 752 mnum++; 753 if ( (matchLength > ZSTD_OPT_NUM) 754 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 755 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ 756 break; /* drop, to preserve bt consistency (miss a little bit of compression) */ 757 } } 758 759 if (match[matchLength] < ip[matchLength]) { 760 /* match smaller than current */ 761 *smallerPtr = matchIndex; /* update smaller idx */ 762 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 763 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 764 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ 765 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ 766 } else { 767 *largerPtr = matchIndex; 768 commonLengthLarger = matchLength; 769 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 770 largerPtr = nextPtr; 771 matchIndex = nextPtr[0]; 772 } } 773 774 *smallerPtr = *largerPtr = 0; 775 776 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ 777 if (dictMode == ZSTD_dictMatchState && nbCompares) { 778 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); 779 U32 dictMatchIndex = dms->hashTable[dmsH]; 780 const U32* const dmsBt = dms->chainTable; 781 commonLengthSmaller = commonLengthLarger = 0; 782 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) { 783 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); 784 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 785 const BYTE* match = dmsBase + dictMatchIndex; 786 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); 787 if (dictMatchIndex+matchLength >= dmsHighLimit) 788 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ 789 790 if (matchLength > bestLength) { 791 matchIndex = dictMatchIndex + dmsIndexDelta; 792 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", 793 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 794 if (matchLength > matchEndIdx - matchIndex) 795 matchEndIdx = matchIndex + (U32)matchLength; 796 bestLength = matchLength; 797 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 798 matches[mnum].len = (U32)matchLength; 799 mnum++; 800 if ( (matchLength > ZSTD_OPT_NUM) 801 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 802 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 803 } } 804 805 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ 806 if (match[matchLength] < ip[matchLength]) { 807 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 808 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 809 } else { 810 /* match is larger than current */ 811 commonLengthLarger = matchLength; 812 dictMatchIndex = nextPtr[0]; 813 } } } /* if (dictMode == ZSTD_dictMatchState) */ 814 815 assert(matchEndIdx > curr+8); 816 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 817 return mnum; 818 } 819 820 typedef U32 (*ZSTD_getAllMatchesFn)( 821 ZSTD_match_t*, 822 ZSTD_MatchState_t*, 823 U32*, 824 const BYTE*, 825 const BYTE*, 826 const U32 rep[ZSTD_REP_NUM], 827 U32 const ll0, 828 U32 const lengthToBeat); 829 830 FORCE_INLINE_TEMPLATE 831 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 832 U32 ZSTD_btGetAllMatches_internal( 833 ZSTD_match_t* matches, 834 ZSTD_MatchState_t* ms, 835 U32* nextToUpdate3, 836 const BYTE* ip, 837 const BYTE* const iHighLimit, 838 const U32 rep[ZSTD_REP_NUM], 839 U32 const ll0, 840 U32 const lengthToBeat, 841 const ZSTD_dictMode_e dictMode, 842 const U32 mls) 843 { 844 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); 845 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); 846 if (ip < ms->window.base + ms->nextToUpdate) 847 return 0; /* skipped area */ 848 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); 849 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); 850 } 851 852 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls 853 854 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ 855 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ 856 ZSTD_match_t* matches, \ 857 ZSTD_MatchState_t* ms, \ 858 U32* nextToUpdate3, \ 859 const BYTE* ip, \ 860 const BYTE* const iHighLimit, \ 861 const U32 rep[ZSTD_REP_NUM], \ 862 U32 const ll0, \ 863 U32 const lengthToBeat) \ 864 { \ 865 return ZSTD_btGetAllMatches_internal( \ 866 matches, ms, nextToUpdate3, ip, iHighLimit, \ 867 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ 868 } 869 870 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ 871 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ 872 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ 873 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ 874 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) 875 876 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) 877 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) 878 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) 879 880 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ 881 { \ 882 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ 883 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ 884 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ 885 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ 886 } 887 888 static ZSTD_getAllMatchesFn 889 ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode) 890 { 891 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { 892 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), 893 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), 894 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) 895 }; 896 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); 897 assert((U32)dictMode < 3); 898 assert(mls - 3 < 4); 899 return getAllMatchesFns[(int)dictMode][mls - 3]; 900 } 901 902 /************************* 903 * LDM helper functions * 904 *************************/ 905 906 /* Struct containing info needed to make decision about ldm inclusion */ 907 typedef struct { 908 RawSeqStore_t seqStore; /* External match candidates store for this block */ 909 U32 startPosInBlock; /* Start position of the current match candidate */ 910 U32 endPosInBlock; /* End position of the current match candidate */ 911 U32 offset; /* Offset of the match candidate */ 912 } ZSTD_optLdm_t; 913 914 /* ZSTD_optLdm_skipRawSeqStoreBytes(): 915 * Moves forward in @rawSeqStore by @nbBytes, 916 * which will update the fields 'pos' and 'posInSequence'. 917 */ 918 static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) 919 { 920 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 921 while (currPos && rawSeqStore->pos < rawSeqStore->size) { 922 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; 923 if (currPos >= currSeq.litLength + currSeq.matchLength) { 924 currPos -= currSeq.litLength + currSeq.matchLength; 925 rawSeqStore->pos++; 926 } else { 927 rawSeqStore->posInSequence = currPos; 928 break; 929 } 930 } 931 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { 932 rawSeqStore->posInSequence = 0; 933 } 934 } 935 936 /* ZSTD_opt_getNextMatchAndUpdateSeqStore(): 937 * Calculates the beginning and end of the next match in the current block. 938 * Updates 'pos' and 'posInSequence' of the ldmSeqStore. 939 */ 940 static void 941 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, 942 U32 blockBytesRemaining) 943 { 944 rawSeq currSeq; 945 U32 currBlockEndPos; 946 U32 literalsBytesRemaining; 947 U32 matchBytesRemaining; 948 949 /* Setting match end position to MAX to ensure we never use an LDM during this block */ 950 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 951 optLdm->startPosInBlock = UINT_MAX; 952 optLdm->endPosInBlock = UINT_MAX; 953 return; 954 } 955 /* Calculate appropriate bytes left in matchLength and litLength 956 * after adjusting based on ldmSeqStore->posInSequence */ 957 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; 958 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); 959 currBlockEndPos = currPosInBlock + blockBytesRemaining; 960 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? 961 currSeq.litLength - (U32)optLdm->seqStore.posInSequence : 962 0; 963 matchBytesRemaining = (literalsBytesRemaining == 0) ? 964 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : 965 currSeq.matchLength; 966 967 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ 968 if (literalsBytesRemaining >= blockBytesRemaining) { 969 optLdm->startPosInBlock = UINT_MAX; 970 optLdm->endPosInBlock = UINT_MAX; 971 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); 972 return; 973 } 974 975 /* Matches may be < minMatch by this process. In that case, we will reject them 976 when we are deciding whether or not to add the ldm */ 977 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; 978 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; 979 optLdm->offset = currSeq.offset; 980 981 if (optLdm->endPosInBlock > currBlockEndPos) { 982 /* Match ends after the block ends, we can't use the whole match */ 983 optLdm->endPosInBlock = currBlockEndPos; 984 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); 985 } else { 986 /* Consume nb of bytes equal to size of sequence left */ 987 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); 988 } 989 } 990 991 /* ZSTD_optLdm_maybeAddMatch(): 992 * Adds a match if it's long enough, 993 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', 994 * into 'matches'. Maintains the correct ordering of 'matches'. 995 */ 996 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, 997 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock, 998 U32 minMatch) 999 { 1000 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; 1001 /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ 1002 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; 1003 1004 /* Ensure that current block position is not outside of the match */ 1005 if (currPosInBlock < optLdm->startPosInBlock 1006 || currPosInBlock >= optLdm->endPosInBlock 1007 || candidateMatchLength < minMatch) { 1008 return; 1009 } 1010 1011 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { 1012 U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); 1013 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", 1014 candidateOffBase, candidateMatchLength, currPosInBlock); 1015 matches[*nbMatches].len = candidateMatchLength; 1016 matches[*nbMatches].off = candidateOffBase; 1017 (*nbMatches)++; 1018 } 1019 } 1020 1021 /* ZSTD_optLdm_processMatchCandidate(): 1022 * Wrapper function to update ldm seq store and call ldm functions as necessary. 1023 */ 1024 static void 1025 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, 1026 ZSTD_match_t* matches, U32* nbMatches, 1027 U32 currPosInBlock, U32 remainingBytes, 1028 U32 minMatch) 1029 { 1030 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 1031 return; 1032 } 1033 1034 if (currPosInBlock >= optLdm->endPosInBlock) { 1035 if (currPosInBlock > optLdm->endPosInBlock) { 1036 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily 1037 * at the end of a match from the ldm seq store, and will often be some bytes 1038 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" 1039 */ 1040 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; 1041 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); 1042 } 1043 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); 1044 } 1045 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch); 1046 } 1047 1048 1049 /*-******************************* 1050 * Optimal parser 1051 *********************************/ 1052 1053 #if 0 /* debug */ 1054 1055 static void 1056 listStats(const U32* table, int lastEltID) 1057 { 1058 int const nbElts = lastEltID + 1; 1059 int enb; 1060 for (enb=0; enb < nbElts; enb++) { 1061 (void)table; 1062 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ 1063 RAWLOG(2, "%4i,", table[enb]); 1064 } 1065 RAWLOG(2, " \n"); 1066 } 1067 1068 #endif 1069 1070 #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) 1071 #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) 1072 #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) 1073 1074 FORCE_INLINE_TEMPLATE 1075 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1076 size_t 1077 ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms, 1078 SeqStore_t* seqStore, 1079 U32 rep[ZSTD_REP_NUM], 1080 const void* src, size_t srcSize, 1081 const int optLevel, 1082 const ZSTD_dictMode_e dictMode) 1083 { 1084 optState_t* const optStatePtr = &ms->opt; 1085 const BYTE* const istart = (const BYTE*)src; 1086 const BYTE* ip = istart; 1087 const BYTE* anchor = istart; 1088 const BYTE* const iend = istart + srcSize; 1089 const BYTE* const ilimit = iend - 8; 1090 const BYTE* const base = ms->window.base; 1091 const BYTE* const prefixStart = base + ms->window.dictLimit; 1092 const ZSTD_compressionParameters* const cParams = &ms->cParams; 1093 1094 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); 1095 1096 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 1097 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; 1098 U32 nextToUpdate3 = ms->nextToUpdate; 1099 1100 ZSTD_optimal_t* const opt = optStatePtr->priceTable; 1101 ZSTD_match_t* const matches = optStatePtr->matchTable; 1102 ZSTD_optimal_t lastStretch; 1103 ZSTD_optLdm_t optLdm; 1104 1105 ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); 1106 1107 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; 1108 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; 1109 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); 1110 1111 /* init */ 1112 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", 1113 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); 1114 assert(optLevel <= 2); 1115 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); 1116 ip += (ip==prefixStart); 1117 1118 /* Match Loop */ 1119 while (ip < ilimit) { 1120 U32 cur, last_pos = 0; 1121 1122 /* find first match */ 1123 { U32 const litlen = (U32)(ip - anchor); 1124 U32 const ll0 = !litlen; 1125 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); 1126 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1127 (U32)(ip-istart), (U32)(iend-ip), 1128 minMatch); 1129 if (!nbMatches) { 1130 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); 1131 ip++; 1132 continue; 1133 } 1134 1135 /* Match found: let's store this solution, and eventually find more candidates. 1136 * During this forward pass, @opt is used to store stretches, 1137 * defined as "a match followed by N literals". 1138 * Note how this is different from a Sequence, which is "N literals followed by a match". 1139 * Storing stretches allows us to store different match predecessors 1140 * for each literal position part of a literals run. */ 1141 1142 /* initialize opt[0] */ 1143 opt[0].mlen = 0; /* there are only literals so far */ 1144 opt[0].litlen = litlen; 1145 /* No need to include the actual price of the literals before the first match 1146 * because it is static for the duration of the forward pass, and is included 1147 * in every subsequent price. But, we include the literal length because 1148 * the cost variation of litlen depends on the value of litlen. 1149 */ 1150 opt[0].price = LL_PRICE(litlen); 1151 ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); 1152 ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); 1153 1154 /* large match -> immediate encoding */ 1155 { U32 const maxML = matches[nbMatches-1].len; 1156 U32 const maxOffBase = matches[nbMatches-1].off; 1157 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", 1158 nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); 1159 1160 if (maxML > sufficient_len) { 1161 lastStretch.litlen = 0; 1162 lastStretch.mlen = maxML; 1163 lastStretch.off = maxOffBase; 1164 DEBUGLOG(6, "large match (%u>%u) => immediate encoding", 1165 maxML, sufficient_len); 1166 cur = 0; 1167 last_pos = maxML; 1168 goto _shortestPath; 1169 } } 1170 1171 /* set prices for first matches starting position == 0 */ 1172 assert(opt[0].price >= 0); 1173 { U32 pos; 1174 U32 matchNb; 1175 for (pos = 1; pos < minMatch; pos++) { 1176 opt[pos].price = ZSTD_MAX_PRICE; 1177 opt[pos].mlen = 0; 1178 opt[pos].litlen = litlen + pos; 1179 } 1180 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1181 U32 const offBase = matches[matchNb].off; 1182 U32 const end = matches[matchNb].len; 1183 for ( ; pos <= end ; pos++ ) { 1184 int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); 1185 int const sequencePrice = opt[0].price + matchPrice; 1186 DEBUGLOG(7, "rPos:%u => set initial price : %.2f", 1187 pos, ZSTD_fCost(sequencePrice)); 1188 opt[pos].mlen = pos; 1189 opt[pos].off = offBase; 1190 opt[pos].litlen = 0; /* end of match */ 1191 opt[pos].price = sequencePrice + LL_PRICE(0); 1192 } 1193 } 1194 last_pos = pos-1; 1195 opt[pos].price = ZSTD_MAX_PRICE; 1196 } 1197 } 1198 1199 /* check further positions */ 1200 for (cur = 1; cur <= last_pos; cur++) { 1201 const BYTE* const inr = ip + cur; 1202 assert(cur <= ZSTD_OPT_NUM); 1203 DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur); 1204 1205 /* Fix current position with one literal if cheaper */ 1206 { U32 const litlen = opt[cur-1].litlen + 1; 1207 int const price = opt[cur-1].price 1208 + LIT_PRICE(ip+cur-1) 1209 + LL_INCPRICE(litlen); 1210 assert(price < 1000000000); /* overflow check */ 1211 if (price <= opt[cur].price) { 1212 ZSTD_optimal_t const prevMatch = opt[cur]; 1213 DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", 1214 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, 1215 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); 1216 opt[cur] = opt[cur-1]; 1217 opt[cur].litlen = litlen; 1218 opt[cur].price = price; 1219 if ( (optLevel >= 1) /* additional check only for higher modes */ 1220 && (prevMatch.litlen == 0) /* replace a match */ 1221 && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ 1222 && LIKELY(ip + cur < iend) 1223 ) { 1224 /* check next position, in case it would be cheaper */ 1225 int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); 1226 int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); 1227 DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", 1228 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); 1229 if ( (with1literal < withMoreLiterals) 1230 && (with1literal < opt[cur+1].price) ) { 1231 /* update offset history - before it disappears */ 1232 U32 const prev = cur - prevMatch.mlen; 1233 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); 1234 assert(cur >= prevMatch.mlen); 1235 DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", 1236 ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), 1237 newReps.rep[0], newReps.rep[1], newReps.rep[2] ); 1238 opt[cur+1] = prevMatch; /* mlen & offbase */ 1239 ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t)); 1240 opt[cur+1].litlen = 1; 1241 opt[cur+1].price = with1literal; 1242 if (last_pos < cur+1) last_pos = cur+1; 1243 } 1244 } 1245 } else { 1246 DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)", 1247 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); 1248 } 1249 } 1250 1251 /* Offset history is not updated during match comparison. 1252 * Do it here, now that the match is selected and confirmed. 1253 */ 1254 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t)); 1255 assert(cur >= opt[cur].mlen); 1256 if (opt[cur].litlen == 0) { 1257 /* just finished a match => alter offset history */ 1258 U32 const prev = cur - opt[cur].mlen; 1259 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); 1260 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t)); 1261 } 1262 1263 /* last match must start at a minimum distance of 8 from oend */ 1264 if (inr > ilimit) continue; 1265 1266 if (cur == last_pos) break; 1267 1268 if ( (optLevel==0) /*static_test*/ 1269 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { 1270 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); 1271 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ 1272 } 1273 1274 assert(opt[cur].price >= 0); 1275 { U32 const ll0 = (opt[cur].litlen == 0); 1276 int const previousPrice = opt[cur].price; 1277 int const basePrice = previousPrice + LL_PRICE(0); 1278 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); 1279 U32 matchNb; 1280 1281 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1282 (U32)(inr-istart), (U32)(iend-inr), 1283 minMatch); 1284 1285 if (!nbMatches) { 1286 DEBUGLOG(7, "rPos:%u : no match found", cur); 1287 continue; 1288 } 1289 1290 { U32 const longestML = matches[nbMatches-1].len; 1291 DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u", 1292 (int)(inr-istart), cur, nbMatches, longestML); 1293 1294 if ( (longestML > sufficient_len) 1295 || (cur + longestML >= ZSTD_OPT_NUM) 1296 || (ip + cur + longestML >= iend) ) { 1297 lastStretch.mlen = longestML; 1298 lastStretch.off = matches[nbMatches-1].off; 1299 lastStretch.litlen = 0; 1300 last_pos = cur + longestML; 1301 goto _shortestPath; 1302 } } 1303 1304 /* set prices using matches found at position == cur */ 1305 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1306 U32 const offset = matches[matchNb].off; 1307 U32 const lastML = matches[matchNb].len; 1308 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; 1309 U32 mlen; 1310 1311 DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", 1312 matchNb, matches[matchNb].off, lastML, opt[cur].litlen); 1313 1314 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ 1315 U32 const pos = cur + mlen; 1316 int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 1317 1318 if ((pos > last_pos) || (price < opt[pos].price)) { 1319 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", 1320 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1321 while (last_pos < pos) { 1322 /* fill empty positions, for future comparisons */ 1323 last_pos++; 1324 opt[last_pos].price = ZSTD_MAX_PRICE; 1325 opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ 1326 } 1327 opt[pos].mlen = mlen; 1328 opt[pos].off = offset; 1329 opt[pos].litlen = 0; 1330 opt[pos].price = price; 1331 } else { 1332 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", 1333 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1334 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ 1335 } 1336 } } } 1337 opt[last_pos+1].price = ZSTD_MAX_PRICE; 1338 } /* for (cur = 1; cur <= last_pos; cur++) */ 1339 1340 lastStretch = opt[last_pos]; 1341 assert(cur >= lastStretch.mlen); 1342 cur = last_pos - lastStretch.mlen; 1343 1344 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ 1345 assert(opt[0].mlen == 0); 1346 assert(last_pos >= lastStretch.mlen); 1347 assert(cur == last_pos - lastStretch.mlen); 1348 1349 if (lastStretch.mlen==0) { 1350 /* no solution : all matches have been converted into literals */ 1351 assert(lastStretch.litlen == (ip - anchor) + last_pos); 1352 ip += last_pos; 1353 continue; 1354 } 1355 assert(lastStretch.off > 0); 1356 1357 /* Update offset history */ 1358 if (lastStretch.litlen == 0) { 1359 /* finishing on a match : update offset history */ 1360 Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); 1361 ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t)); 1362 } else { 1363 ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t)); 1364 assert(cur >= lastStretch.litlen); 1365 cur -= lastStretch.litlen; 1366 } 1367 1368 /* Let's write the shortest path solution. 1369 * It is stored in @opt in reverse order, 1370 * starting from @storeEnd (==cur+2), 1371 * effectively partially @opt overwriting. 1372 * Content is changed too: 1373 * - So far, @opt stored stretches, aka a match followed by literals 1374 * - Now, it will store sequences, aka literals followed by a match 1375 */ 1376 { U32 const storeEnd = cur + 2; 1377 U32 storeStart = storeEnd; 1378 U32 stretchPos = cur; 1379 1380 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", 1381 last_pos, cur); (void)last_pos; 1382 assert(storeEnd < ZSTD_OPT_SIZE); 1383 DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1384 storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); 1385 if (lastStretch.litlen > 0) { 1386 /* last "sequence" is unfinished: just a bunch of literals */ 1387 opt[storeEnd].litlen = lastStretch.litlen; 1388 opt[storeEnd].mlen = 0; 1389 storeStart = storeEnd-1; 1390 opt[storeStart] = lastStretch; 1391 } { 1392 opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ 1393 storeStart = storeEnd; 1394 } 1395 while (1) { 1396 ZSTD_optimal_t nextStretch = opt[stretchPos]; 1397 opt[storeStart].litlen = nextStretch.litlen; 1398 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", 1399 opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); 1400 if (nextStretch.mlen == 0) { 1401 /* reaching beginning of segment */ 1402 break; 1403 } 1404 storeStart--; 1405 opt[storeStart] = nextStretch; /* note: litlen will be fixed */ 1406 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); 1407 stretchPos -= nextStretch.litlen + nextStretch.mlen; 1408 } 1409 1410 /* save sequences */ 1411 DEBUGLOG(6, "sending selected sequences into seqStore"); 1412 { U32 storePos; 1413 for (storePos=storeStart; storePos <= storeEnd; storePos++) { 1414 U32 const llen = opt[storePos].litlen; 1415 U32 const mlen = opt[storePos].mlen; 1416 U32 const offBase = opt[storePos].off; 1417 U32 const advance = llen + mlen; 1418 DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u", 1419 (int)(anchor - istart), (unsigned)llen, (unsigned)mlen); 1420 1421 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ 1422 assert(storePos == storeEnd); /* must be last sequence */ 1423 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ 1424 continue; /* will finish */ 1425 } 1426 1427 assert(anchor + llen <= iend); 1428 ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); 1429 ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); 1430 anchor += advance; 1431 ip = anchor; 1432 } } 1433 DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); 1434 1435 /* update all costs */ 1436 ZSTD_setBasePrices(optStatePtr, optLevel); 1437 } 1438 } /* while (ip < ilimit) */ 1439 1440 /* Return the last literals size */ 1441 return (size_t)(iend - anchor); 1442 } 1443 #endif /* build exclusions */ 1444 1445 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1446 static size_t ZSTD_compressBlock_opt0( 1447 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1448 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1449 { 1450 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); 1451 } 1452 #endif 1453 1454 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1455 static size_t ZSTD_compressBlock_opt2( 1456 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1457 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1458 { 1459 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); 1460 } 1461 #endif 1462 1463 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1464 size_t ZSTD_compressBlock_btopt( 1465 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1466 const void* src, size_t srcSize) 1467 { 1468 DEBUGLOG(5, "ZSTD_compressBlock_btopt"); 1469 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1470 } 1471 #endif 1472 1473 1474 1475 1476 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1477 /* ZSTD_initStats_ultra(): 1478 * make a first compression pass, just to seed stats with more accurate starting values. 1479 * only works on first block, with no dictionary and no ldm. 1480 * this function cannot error out, its narrow contract must be respected. 1481 */ 1482 static 1483 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1484 void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms, 1485 SeqStore_t* seqStore, 1486 U32 rep[ZSTD_REP_NUM], 1487 const void* src, size_t srcSize) 1488 { 1489 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ 1490 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); 1491 1492 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); 1493 assert(ms->opt.litLengthSum == 0); /* first block */ 1494 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ 1495 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ 1496 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ 1497 1498 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ 1499 1500 /* invalidate first scan from history, only keep entropy stats */ 1501 ZSTD_resetSeqStore(seqStore); 1502 ms->window.base -= srcSize; 1503 ms->window.dictLimit += (U32)srcSize; 1504 ms->window.lowLimit = ms->window.dictLimit; 1505 ms->nextToUpdate = ms->window.dictLimit; 1506 1507 } 1508 1509 size_t ZSTD_compressBlock_btultra( 1510 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1511 const void* src, size_t srcSize) 1512 { 1513 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); 1514 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1515 } 1516 1517 size_t ZSTD_compressBlock_btultra2( 1518 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1519 const void* src, size_t srcSize) 1520 { 1521 U32 const curr = (U32)((const BYTE*)src - ms->window.base); 1522 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); 1523 1524 /* 2-passes strategy: 1525 * this strategy makes a first pass over first block to collect statistics 1526 * in order to seed next round's statistics with it. 1527 * After 1st pass, function forgets history, and starts a new block. 1528 * Consequently, this can only work if no data has been previously loaded in tables, 1529 * aka, no dictionary, no prefix, no ldm preprocessing. 1530 * The compression ratio gain is generally small (~0.5% on first block), 1531 * the cost is 2x cpu time on first block. */ 1532 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 1533 if ( (ms->opt.litLengthSum==0) /* first block */ 1534 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ 1535 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ 1536 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ 1537 && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ 1538 ) { 1539 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); 1540 } 1541 1542 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1543 } 1544 #endif 1545 1546 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1547 size_t ZSTD_compressBlock_btopt_dictMatchState( 1548 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1549 const void* src, size_t srcSize) 1550 { 1551 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1552 } 1553 1554 size_t ZSTD_compressBlock_btopt_extDict( 1555 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1556 const void* src, size_t srcSize) 1557 { 1558 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1559 } 1560 #endif 1561 1562 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1563 size_t ZSTD_compressBlock_btultra_dictMatchState( 1564 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1565 const void* src, size_t srcSize) 1566 { 1567 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1568 } 1569 1570 size_t ZSTD_compressBlock_btultra_extDict( 1571 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1572 const void* src, size_t srcSize) 1573 { 1574 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1575 } 1576 #endif 1577 1578 /* note : no btultra2 variant for extDict nor dictMatchState, 1579 * because btultra2 is not meant to work with dictionaries 1580 * and is only specific for the first block (no prefix) */ 1581