Home | History | Annotate | Line # | Download | only in zopfli
lz77.c revision 1.1.1.1.8.2
      1 /*
      2 Copyright 2011 Google Inc. All Rights Reserved.
      3 
      4 Licensed under the Apache License, Version 2.0 (the "License");
      5 you may not use this file except in compliance with the License.
      6 You may obtain a copy of the License at
      7 
      8     http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 Unless required by applicable law or agreed to in writing, software
     11 distributed under the License is distributed on an "AS IS" BASIS,
     12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 See the License for the specific language governing permissions and
     14 limitations under the License.
     15 
     16 Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
     17 Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
     18 */
     19 
     20 #include "lz77.h"
     21 #include "util.h"
     22 
     23 #include <assert.h>
     24 #include <stdio.h>
     25 #include <stdlib.h>
     26 
     27 void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
     28   store->size = 0;
     29   store->litlens = 0;
     30   store->dists = 0;
     31 }
     32 
     33 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
     34   free(store->litlens);
     35   free(store->dists);
     36 }
     37 
     38 void ZopfliCopyLZ77Store(
     39     const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
     40   size_t i;
     41   ZopfliCleanLZ77Store(dest);
     42   dest->litlens =
     43       (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
     44   dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
     45 
     46   if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
     47 
     48   dest->size = source->size;
     49   for (i = 0; i < source->size; i++) {
     50     dest->litlens[i] = source->litlens[i];
     51     dest->dists[i] = source->dists[i];
     52   }
     53 }
     54 
     55 /*
     56 Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
     57 context must be a ZopfliLZ77Store*.
     58 */
     59 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
     60                            ZopfliLZ77Store* store) {
     61   size_t size2 = store->size;  /* Needed for using ZOPFLI_APPEND_DATA twice. */
     62   ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
     63   ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
     64 }
     65 
     66 /*
     67 Gets the value of the length given the distance. Typically, the value of the
     68 length is the length, but if the distance is very long, decrease the value of
     69 the length a bit to make up for the fact that long distances use large amounts
     70 of extra bits.
     71 */
     72 static int GetLengthValue(int length, int distance) {
     73   /*
     74   At distance > 1024, using length 3 is no longer good, due to the large amount
     75   of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and
     76   this seems to be the sweet spot.
     77   */
     78   return distance > 1024 ? length - 1 : length;
     79 }
     80 
     81 void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
     82                          unsigned short dist, unsigned short length) {
     83 
     84   /* TODO(lode): make this only run in a debug compile, it's for assert only. */
     85   size_t i;
     86 
     87   assert(pos + length <= datasize);
     88   for (i = 0; i < length; i++) {
     89     if (data[pos - dist + i] != data[pos + i]) {
     90       assert(data[pos - dist + i] == data[pos + i]);
     91       break;
     92     }
     93   }
     94 }
     95 
     96 /*
     97 Finds how long the match of scan and match is. Can be used to find how many
     98 bytes starting from scan, and from match, are equal. Returns the last byte
     99 after scan, which is still equal to the correspondinb byte after match.
    100 scan is the position to compare
    101 match is the earlier position to compare.
    102 end is the last possible byte, beyond which to stop looking.
    103 safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
    104 */
    105 static const unsigned char* GetMatch(const unsigned char* scan,
    106                                      const unsigned char* match,
    107                                      const unsigned char* end,
    108                                      const unsigned char* safe_end) {
    109 
    110   if (sizeof(size_t) == 8) {
    111     /* 8 checks at once per array bounds check (size_t is 64-bit). */
    112     while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
    113       scan += 8;
    114       match += 8;
    115     }
    116   } else if (sizeof(unsigned int) == 4) {
    117     /* 4 checks at once per array bounds check (unsigned int is 32-bit). */
    118     while (scan < safe_end
    119         && *((unsigned int*)scan) == *((unsigned int*)match)) {
    120       scan += 4;
    121       match += 4;
    122     }
    123   } else {
    124     /* do 8 checks at once per array bounds check. */
    125     while (scan < safe_end && *scan == *match && *++scan == *++match
    126           && *++scan == *++match && *++scan == *++match
    127           && *++scan == *++match && *++scan == *++match
    128           && *++scan == *++match && *++scan == *++match) {
    129       scan++; match++;
    130     }
    131   }
    132 
    133   /* The remaining few bytes. */
    134   while (scan != end && *scan == *match) {
    135     scan++; match++;
    136   }
    137 
    138   return scan;
    139 }
    140 
    141 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    142 /*
    143 Gets distance, length and sublen values from the cache if possible.
    144 Returns 1 if it got the values from the cache, 0 if not.
    145 Updates the limit value to a smaller one if possible with more limited
    146 information from the cache.
    147 */
    148 static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
    149     size_t pos, size_t* limit,
    150     unsigned short* sublen, unsigned short* distance, unsigned short* length) {
    151   /* The LMC cache starts at the beginning of the block rather than the
    152      beginning of the whole array. */
    153   size_t lmcpos = pos - s->blockstart;
    154 
    155   /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
    156      that this cache value is not filled in yet. */
    157   unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
    158       s->lmc->dist[lmcpos] != 0);
    159   unsigned char limit_ok_for_cache = cache_available &&
    160       (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
    161       (sublen && ZopfliMaxCachedSublen(s->lmc,
    162           lmcpos, s->lmc->length[lmcpos]) >= *limit));
    163 
    164   if (s->lmc && limit_ok_for_cache && cache_available) {
    165     if (!sublen || s->lmc->length[lmcpos]
    166         <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
    167       *length = s->lmc->length[lmcpos];
    168       if (*length > *limit) *length = *limit;
    169       if (sublen) {
    170         ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
    171         *distance = sublen[*length];
    172         if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
    173           assert(sublen[*length] == s->lmc->dist[lmcpos]);
    174         }
    175       } else {
    176         *distance = s->lmc->dist[lmcpos];
    177       }
    178       return 1;
    179     }
    180     /* Can't use much of the cache, since the "sublens" need to be calculated,
    181        but at  least we already know when to stop. */
    182     *limit = s->lmc->length[lmcpos];
    183   }
    184 
    185   return 0;
    186 }
    187 
    188 /*
    189 Stores the found sublen, distance and length in the longest match cache, if
    190 possible.
    191 */
    192 static void StoreInLongestMatchCache(ZopfliBlockState* s,
    193     size_t pos, size_t limit,
    194     const unsigned short* sublen,
    195     unsigned short distance, unsigned short length) {
    196   /* The LMC cache starts at the beginning of the block rather than the
    197      beginning of the whole array. */
    198   size_t lmcpos = pos - s->blockstart;
    199 
    200   /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
    201      that this cache value is not filled in yet. */
    202   unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
    203       s->lmc->dist[lmcpos] != 0);
    204 
    205   if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
    206     assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
    207     s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
    208     s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
    209     assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
    210     ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
    211   }
    212 }
    213 #endif
    214 
    215 void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
    216     const unsigned char* array,
    217     size_t pos, size_t size, size_t limit,
    218     unsigned short* sublen, unsigned short* distance, unsigned short* length) {
    219   unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
    220   unsigned short bestdist = 0;
    221   unsigned short bestlength = 1;
    222   const unsigned char* scan;
    223   const unsigned char* match;
    224   const unsigned char* arrayend;
    225   const unsigned char* arrayend_safe;
    226 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
    227   int chain_counter = ZOPFLI_MAX_CHAIN_HITS;  /* For quitting early. */
    228 #endif
    229 
    230   unsigned dist = 0;  /* Not unsigned short on purpose. */
    231 
    232   int* hhead = h->head;
    233   unsigned short* hprev = h->prev;
    234   int* hhashval = h->hashval;
    235   int hval = h->val;
    236 
    237 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    238   if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
    239     assert(pos + *length <= size);
    240     return;
    241   }
    242 #endif
    243 
    244   assert(limit <= ZOPFLI_MAX_MATCH);
    245   assert(limit >= ZOPFLI_MIN_MATCH);
    246   assert(pos < size);
    247 
    248   if (size - pos < ZOPFLI_MIN_MATCH) {
    249     /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
    250        try. */
    251     *length = 0;
    252     *distance = 0;
    253     return;
    254   }
    255 
    256   if (pos + limit > size) {
    257     limit = size - pos;
    258   }
    259   arrayend = &array[pos] + limit;
    260   arrayend_safe = arrayend - 8;
    261 
    262   assert(hval < 65536);
    263 
    264   pp = hhead[hval];  /* During the whole loop, p == hprev[pp]. */
    265   p = hprev[pp];
    266 
    267   assert(pp == hpos);
    268 
    269   dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
    270 
    271   /* Go through all distances. */
    272   while (dist < ZOPFLI_WINDOW_SIZE) {
    273     unsigned short currentlength = 0;
    274 
    275     assert(p < ZOPFLI_WINDOW_SIZE);
    276     assert(p == hprev[pp]);
    277     assert(hhashval[p] == hval);
    278 
    279     if (dist > 0) {
    280       assert(pos < size);
    281       assert(dist <= pos);
    282       scan = &array[pos];
    283       match = &array[pos - dist];
    284 
    285       /* Testing the byte at position bestlength first, goes slightly faster. */
    286       if (pos + bestlength >= size
    287           || *(scan + bestlength) == *(match + bestlength)) {
    288 
    289 #ifdef ZOPFLI_HASH_SAME
    290         unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
    291         if (same0 > 2 && *scan == *match) {
    292           unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
    293           unsigned short same = same0 < same1 ? same0 : same1;
    294           if (same > limit) same = limit;
    295           scan += same;
    296           match += same;
    297         }
    298 #endif
    299         scan = GetMatch(scan, match, arrayend, arrayend_safe);
    300         currentlength = scan - &array[pos];  /* The found length. */
    301       }
    302 
    303       if (currentlength > bestlength) {
    304         if (sublen) {
    305           unsigned short j;
    306           for (j = bestlength + 1; j <= currentlength; j++) {
    307             sublen[j] = dist;
    308           }
    309         }
    310         bestdist = dist;
    311         bestlength = currentlength;
    312         if (currentlength >= limit) break;
    313       }
    314     }
    315 
    316 
    317 #ifdef ZOPFLI_HASH_SAME_HASH
    318     /* Switch to the other hash once this will be more efficient. */
    319     if (hhead != h->head2 && bestlength >= h->same[hpos] &&
    320         h->val2 == h->hashval2[p]) {
    321       /* Now use the hash that encodes the length and first byte. */
    322       hhead = h->head2;
    323       hprev = h->prev2;
    324       hhashval = h->hashval2;
    325       hval = h->val2;
    326     }
    327 #endif
    328 
    329     pp = p;
    330     p = hprev[p];
    331     if (p == pp) break;  /* Uninited prev value. */
    332 
    333     dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
    334 
    335 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
    336     chain_counter--;
    337     if (chain_counter <= 0) break;
    338 #endif
    339   }
    340 
    341 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    342   StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
    343 #endif
    344 
    345   assert(bestlength <= limit);
    346 
    347   *distance = bestdist;
    348   *length = bestlength;
    349   assert(pos + *length <= size);
    350 }
    351 
    352 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
    353                       size_t instart, size_t inend,
    354                       ZopfliLZ77Store* store) {
    355   size_t i = 0, j;
    356   unsigned short leng;
    357   unsigned short dist;
    358   int lengvalue;
    359   size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
    360       ? instart - ZOPFLI_WINDOW_SIZE : 0;
    361   unsigned short dummysublen[259];
    362 
    363   ZopfliHash hash;
    364   ZopfliHash* h = &hash;
    365 
    366 #ifdef ZOPFLI_LAZY_MATCHING
    367   /* Lazy matching. */
    368   unsigned prev_length = 0;
    369   unsigned prev_match = 0;
    370   int prevlengvalue;
    371   int match_available = 0;
    372 #endif
    373 
    374   if (instart == inend) return;
    375 
    376   ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
    377   ZopfliWarmupHash(in, windowstart, inend, h);
    378   for (i = windowstart; i < instart; i++) {
    379     ZopfliUpdateHash(in, i, inend, h);
    380   }
    381 
    382   for (i = instart; i < inend; i++) {
    383     ZopfliUpdateHash(in, i, inend, h);
    384 
    385     ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
    386                            &dist, &leng);
    387     lengvalue = GetLengthValue(leng, dist);
    388 
    389 #ifdef ZOPFLI_LAZY_MATCHING
    390     /* Lazy matching. */
    391     prevlengvalue = GetLengthValue(prev_length, prev_match);
    392     if (match_available) {
    393       match_available = 0;
    394       if (lengvalue > prevlengvalue + 1) {
    395         ZopfliStoreLitLenDist(in[i - 1], 0, store);
    396         if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) {
    397           match_available = 1;
    398           prev_length = leng;
    399           prev_match = dist;
    400           continue;
    401         }
    402       } else {
    403         /* Add previous to output. */
    404         leng = prev_length;
    405         dist = prev_match;
    406         lengvalue = prevlengvalue;
    407         /* Add to output. */
    408         ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
    409         ZopfliStoreLitLenDist(leng, dist, store);
    410         for (j = 2; j < leng; j++) {
    411           assert(i < inend);
    412           i++;
    413           ZopfliUpdateHash(in, i, inend, h);
    414         }
    415         continue;
    416       }
    417     }
    418     else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
    419       match_available = 1;
    420       prev_length = leng;
    421       prev_match = dist;
    422       continue;
    423     }
    424     /* End of lazy matching. */
    425 #endif
    426 
    427     /* Add to output. */
    428     if (lengvalue >= ZOPFLI_MIN_MATCH) {
    429       ZopfliVerifyLenDist(in, inend, i, dist, leng);
    430       ZopfliStoreLitLenDist(leng, dist, store);
    431     } else {
    432       leng = 1;
    433       ZopfliStoreLitLenDist(in[i], 0, store);
    434     }
    435     for (j = 1; j < leng; j++) {
    436       assert(i < inend);
    437       i++;
    438       ZopfliUpdateHash(in, i, inend, h);
    439     }
    440   }
    441 
    442   ZopfliCleanHash(h);
    443 }
    444 
    445 void ZopfliLZ77Counts(const unsigned short* litlens,
    446                       const unsigned short* dists,
    447                       size_t start, size_t end,
    448                       size_t* ll_count, size_t* d_count) {
    449   size_t i;
    450 
    451   for (i = 0; i < 288; i++) {
    452     ll_count[i] = 0;
    453   }
    454   for (i = 0; i < 32; i++) {
    455     d_count[i] = 0;
    456   }
    457 
    458   for (i = start; i < end; i++) {
    459     if (dists[i] == 0) {
    460       ll_count[litlens[i]]++;
    461     } else {
    462       ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
    463       d_count[ZopfliGetDistSymbol(dists[i])]++;
    464     }
    465   }
    466 
    467   ll_count[256] = 1;  /* End symbol. */
    468 }
    469