lz77.c revision 1.1 1 1.1 tls /*
2 1.1 tls Copyright 2011 Google Inc. All Rights Reserved.
3 1.1 tls
4 1.1 tls Licensed under the Apache License, Version 2.0 (the "License");
5 1.1 tls you may not use this file except in compliance with the License.
6 1.1 tls You may obtain a copy of the License at
7 1.1 tls
8 1.1 tls http://www.apache.org/licenses/LICENSE-2.0
9 1.1 tls
10 1.1 tls Unless required by applicable law or agreed to in writing, software
11 1.1 tls distributed under the License is distributed on an "AS IS" BASIS,
12 1.1 tls WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 1.1 tls See the License for the specific language governing permissions and
14 1.1 tls limitations under the License.
15 1.1 tls
16 1.1 tls Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
17 1.1 tls Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
18 1.1 tls */
19 1.1 tls
20 1.1 tls #include "lz77.h"
21 1.1 tls #include "util.h"
22 1.1 tls
23 1.1 tls #include <assert.h>
24 1.1 tls #include <stdio.h>
25 1.1 tls #include <stdlib.h>
26 1.1 tls
27 1.1 tls void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28 1.1 tls store->size = 0;
29 1.1 tls store->litlens = 0;
30 1.1 tls store->dists = 0;
31 1.1 tls }
32 1.1 tls
33 1.1 tls void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34 1.1 tls free(store->litlens);
35 1.1 tls free(store->dists);
36 1.1 tls }
37 1.1 tls
38 1.1 tls void ZopfliCopyLZ77Store(
39 1.1 tls const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40 1.1 tls size_t i;
41 1.1 tls ZopfliCleanLZ77Store(dest);
42 1.1 tls dest->litlens =
43 1.1 tls (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44 1.1 tls dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45 1.1 tls
46 1.1 tls if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
47 1.1 tls
48 1.1 tls dest->size = source->size;
49 1.1 tls for (i = 0; i < source->size; i++) {
50 1.1 tls dest->litlens[i] = source->litlens[i];
51 1.1 tls dest->dists[i] = source->dists[i];
52 1.1 tls }
53 1.1 tls }
54 1.1 tls
55 1.1 tls /*
56 1.1 tls Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57 1.1 tls context must be a ZopfliLZ77Store*.
58 1.1 tls */
59 1.1 tls void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60 1.1 tls ZopfliLZ77Store* store) {
61 1.1 tls size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
62 1.1 tls ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63 1.1 tls ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
64 1.1 tls }
65 1.1 tls
66 1.1 tls /*
67 1.1 tls Gets the value of the length given the distance. Typically, the value of the
68 1.1 tls length is the length, but if the distance is very long, decrease the value of
69 1.1 tls the length a bit to make up for the fact that long distances use large amounts
70 1.1 tls of extra bits.
71 1.1 tls */
72 1.1 tls static int GetLengthValue(int length, int distance) {
73 1.1 tls /*
74 1.1 tls At distance > 1024, using length 3 is no longer good, due to the large amount
75 1.1 tls of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and
76 1.1 tls this seems to be the sweet spot.
77 1.1 tls */
78 1.1 tls return distance > 1024 ? length - 1 : length;
79 1.1 tls }
80 1.1 tls
81 1.1 tls void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
82 1.1 tls unsigned short dist, unsigned short length) {
83 1.1 tls
84 1.1 tls /* TODO(lode): make this only run in a debug compile, it's for assert only. */
85 1.1 tls size_t i;
86 1.1 tls
87 1.1 tls assert(pos + length <= datasize);
88 1.1 tls for (i = 0; i < length; i++) {
89 1.1 tls if (data[pos - dist + i] != data[pos + i]) {
90 1.1 tls assert(data[pos - dist + i] == data[pos + i]);
91 1.1 tls break;
92 1.1 tls }
93 1.1 tls }
94 1.1 tls }
95 1.1 tls
96 1.1 tls /*
97 1.1 tls Finds how long the match of scan and match is. Can be used to find how many
98 1.1 tls bytes starting from scan, and from match, are equal. Returns the last byte
99 1.1 tls after scan, which is still equal to the correspondinb byte after match.
100 1.1 tls scan is the position to compare
101 1.1 tls match is the earlier position to compare.
102 1.1 tls end is the last possible byte, beyond which to stop looking.
103 1.1 tls safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
104 1.1 tls */
105 1.1 tls static const unsigned char* GetMatch(const unsigned char* scan,
106 1.1 tls const unsigned char* match,
107 1.1 tls const unsigned char* end,
108 1.1 tls const unsigned char* safe_end) {
109 1.1 tls
110 1.1 tls if (sizeof(size_t) == 8) {
111 1.1 tls /* 8 checks at once per array bounds check (size_t is 64-bit). */
112 1.1 tls while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
113 1.1 tls scan += 8;
114 1.1 tls match += 8;
115 1.1 tls }
116 1.1 tls } else if (sizeof(unsigned int) == 4) {
117 1.1 tls /* 4 checks at once per array bounds check (unsigned int is 32-bit). */
118 1.1 tls while (scan < safe_end
119 1.1 tls && *((unsigned int*)scan) == *((unsigned int*)match)) {
120 1.1 tls scan += 4;
121 1.1 tls match += 4;
122 1.1 tls }
123 1.1 tls } else {
124 1.1 tls /* do 8 checks at once per array bounds check. */
125 1.1 tls while (scan < safe_end && *scan == *match && *++scan == *++match
126 1.1 tls && *++scan == *++match && *++scan == *++match
127 1.1 tls && *++scan == *++match && *++scan == *++match
128 1.1 tls && *++scan == *++match && *++scan == *++match) {
129 1.1 tls scan++; match++;
130 1.1 tls }
131 1.1 tls }
132 1.1 tls
133 1.1 tls /* The remaining few bytes. */
134 1.1 tls while (scan != end && *scan == *match) {
135 1.1 tls scan++; match++;
136 1.1 tls }
137 1.1 tls
138 1.1 tls return scan;
139 1.1 tls }
140 1.1 tls
141 1.1 tls #ifdef ZOPFLI_LONGEST_MATCH_CACHE
142 1.1 tls /*
143 1.1 tls Gets distance, length and sublen values from the cache if possible.
144 1.1 tls Returns 1 if it got the values from the cache, 0 if not.
145 1.1 tls Updates the limit value to a smaller one if possible with more limited
146 1.1 tls information from the cache.
147 1.1 tls */
148 1.1 tls static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
149 1.1 tls size_t pos, size_t* limit,
150 1.1 tls unsigned short* sublen, unsigned short* distance, unsigned short* length) {
151 1.1 tls /* The LMC cache starts at the beginning of the block rather than the
152 1.1 tls beginning of the whole array. */
153 1.1 tls size_t lmcpos = pos - s->blockstart;
154 1.1 tls
155 1.1 tls /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
156 1.1 tls that this cache value is not filled in yet. */
157 1.1 tls unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
158 1.1 tls s->lmc->dist[lmcpos] != 0);
159 1.1 tls unsigned char limit_ok_for_cache = cache_available &&
160 1.1 tls (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
161 1.1 tls (sublen && ZopfliMaxCachedSublen(s->lmc,
162 1.1 tls lmcpos, s->lmc->length[lmcpos]) >= *limit));
163 1.1 tls
164 1.1 tls if (s->lmc && limit_ok_for_cache && cache_available) {
165 1.1 tls if (!sublen || s->lmc->length[lmcpos]
166 1.1 tls <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
167 1.1 tls *length = s->lmc->length[lmcpos];
168 1.1 tls if (*length > *limit) *length = *limit;
169 1.1 tls if (sublen) {
170 1.1 tls ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
171 1.1 tls *distance = sublen[*length];
172 1.1 tls if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
173 1.1 tls assert(sublen[*length] == s->lmc->dist[lmcpos]);
174 1.1 tls }
175 1.1 tls } else {
176 1.1 tls *distance = s->lmc->dist[lmcpos];
177 1.1 tls }
178 1.1 tls return 1;
179 1.1 tls }
180 1.1 tls /* Can't use much of the cache, since the "sublens" need to be calculated,
181 1.1 tls but at least we already know when to stop. */
182 1.1 tls *limit = s->lmc->length[lmcpos];
183 1.1 tls }
184 1.1 tls
185 1.1 tls return 0;
186 1.1 tls }
187 1.1 tls
188 1.1 tls /*
189 1.1 tls Stores the found sublen, distance and length in the longest match cache, if
190 1.1 tls possible.
191 1.1 tls */
192 1.1 tls static void StoreInLongestMatchCache(ZopfliBlockState* s,
193 1.1 tls size_t pos, size_t limit,
194 1.1 tls const unsigned short* sublen,
195 1.1 tls unsigned short distance, unsigned short length) {
196 1.1 tls /* The LMC cache starts at the beginning of the block rather than the
197 1.1 tls beginning of the whole array. */
198 1.1 tls size_t lmcpos = pos - s->blockstart;
199 1.1 tls
200 1.1 tls /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
201 1.1 tls that this cache value is not filled in yet. */
202 1.1 tls unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
203 1.1 tls s->lmc->dist[lmcpos] != 0);
204 1.1 tls
205 1.1 tls if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
206 1.1 tls assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
207 1.1 tls s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
208 1.1 tls s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
209 1.1 tls assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
210 1.1 tls ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
211 1.1 tls }
212 1.1 tls }
213 1.1 tls #endif
214 1.1 tls
215 1.1 tls void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
216 1.1 tls const unsigned char* array,
217 1.1 tls size_t pos, size_t size, size_t limit,
218 1.1 tls unsigned short* sublen, unsigned short* distance, unsigned short* length) {
219 1.1 tls unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
220 1.1 tls unsigned short bestdist = 0;
221 1.1 tls unsigned short bestlength = 1;
222 1.1 tls const unsigned char* scan;
223 1.1 tls const unsigned char* match;
224 1.1 tls const unsigned char* arrayend;
225 1.1 tls const unsigned char* arrayend_safe;
226 1.1 tls #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
227 1.1 tls int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */
228 1.1 tls #endif
229 1.1 tls
230 1.1 tls unsigned dist = 0; /* Not unsigned short on purpose. */
231 1.1 tls
232 1.1 tls int* hhead = h->head;
233 1.1 tls unsigned short* hprev = h->prev;
234 1.1 tls int* hhashval = h->hashval;
235 1.1 tls int hval = h->val;
236 1.1 tls
237 1.1 tls #ifdef ZOPFLI_LONGEST_MATCH_CACHE
238 1.1 tls if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
239 1.1 tls assert(pos + *length <= size);
240 1.1 tls return;
241 1.1 tls }
242 1.1 tls #endif
243 1.1 tls
244 1.1 tls assert(limit <= ZOPFLI_MAX_MATCH);
245 1.1 tls assert(limit >= ZOPFLI_MIN_MATCH);
246 1.1 tls assert(pos < size);
247 1.1 tls
248 1.1 tls if (size - pos < ZOPFLI_MIN_MATCH) {
249 1.1 tls /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
250 1.1 tls try. */
251 1.1 tls *length = 0;
252 1.1 tls *distance = 0;
253 1.1 tls return;
254 1.1 tls }
255 1.1 tls
256 1.1 tls if (pos + limit > size) {
257 1.1 tls limit = size - pos;
258 1.1 tls }
259 1.1 tls arrayend = &array[pos] + limit;
260 1.1 tls arrayend_safe = arrayend - 8;
261 1.1 tls
262 1.1 tls assert(hval < 65536);
263 1.1 tls
264 1.1 tls pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */
265 1.1 tls p = hprev[pp];
266 1.1 tls
267 1.1 tls assert(pp == hpos);
268 1.1 tls
269 1.1 tls dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
270 1.1 tls
271 1.1 tls /* Go through all distances. */
272 1.1 tls while (dist < ZOPFLI_WINDOW_SIZE) {
273 1.1 tls unsigned short currentlength = 0;
274 1.1 tls
275 1.1 tls assert(p < ZOPFLI_WINDOW_SIZE);
276 1.1 tls assert(p == hprev[pp]);
277 1.1 tls assert(hhashval[p] == hval);
278 1.1 tls
279 1.1 tls if (dist > 0) {
280 1.1 tls assert(pos < size);
281 1.1 tls assert(dist <= pos);
282 1.1 tls scan = &array[pos];
283 1.1 tls match = &array[pos - dist];
284 1.1 tls
285 1.1 tls /* Testing the byte at position bestlength first, goes slightly faster. */
286 1.1 tls if (pos + bestlength >= size
287 1.1 tls || *(scan + bestlength) == *(match + bestlength)) {
288 1.1 tls
289 1.1 tls #ifdef ZOPFLI_HASH_SAME
290 1.1 tls unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
291 1.1 tls if (same0 > 2 && *scan == *match) {
292 1.1 tls unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
293 1.1 tls unsigned short same = same0 < same1 ? same0 : same1;
294 1.1 tls if (same > limit) same = limit;
295 1.1 tls scan += same;
296 1.1 tls match += same;
297 1.1 tls }
298 1.1 tls #endif
299 1.1 tls scan = GetMatch(scan, match, arrayend, arrayend_safe);
300 1.1 tls currentlength = scan - &array[pos]; /* The found length. */
301 1.1 tls }
302 1.1 tls
303 1.1 tls if (currentlength > bestlength) {
304 1.1 tls if (sublen) {
305 1.1 tls unsigned short j;
306 1.1 tls for (j = bestlength + 1; j <= currentlength; j++) {
307 1.1 tls sublen[j] = dist;
308 1.1 tls }
309 1.1 tls }
310 1.1 tls bestdist = dist;
311 1.1 tls bestlength = currentlength;
312 1.1 tls if (currentlength >= limit) break;
313 1.1 tls }
314 1.1 tls }
315 1.1 tls
316 1.1 tls
317 1.1 tls #ifdef ZOPFLI_HASH_SAME_HASH
318 1.1 tls /* Switch to the other hash once this will be more efficient. */
319 1.1 tls if (hhead != h->head2 && bestlength >= h->same[hpos] &&
320 1.1 tls h->val2 == h->hashval2[p]) {
321 1.1 tls /* Now use the hash that encodes the length and first byte. */
322 1.1 tls hhead = h->head2;
323 1.1 tls hprev = h->prev2;
324 1.1 tls hhashval = h->hashval2;
325 1.1 tls hval = h->val2;
326 1.1 tls }
327 1.1 tls #endif
328 1.1 tls
329 1.1 tls pp = p;
330 1.1 tls p = hprev[p];
331 1.1 tls if (p == pp) break; /* Uninited prev value. */
332 1.1 tls
333 1.1 tls dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
334 1.1 tls
335 1.1 tls #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
336 1.1 tls chain_counter--;
337 1.1 tls if (chain_counter <= 0) break;
338 1.1 tls #endif
339 1.1 tls }
340 1.1 tls
341 1.1 tls #ifdef ZOPFLI_LONGEST_MATCH_CACHE
342 1.1 tls StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
343 1.1 tls #endif
344 1.1 tls
345 1.1 tls assert(bestlength <= limit);
346 1.1 tls
347 1.1 tls *distance = bestdist;
348 1.1 tls *length = bestlength;
349 1.1 tls assert(pos + *length <= size);
350 1.1 tls }
351 1.1 tls
352 1.1 tls void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
353 1.1 tls size_t instart, size_t inend,
354 1.1 tls ZopfliLZ77Store* store) {
355 1.1 tls size_t i = 0, j;
356 1.1 tls unsigned short leng;
357 1.1 tls unsigned short dist;
358 1.1 tls int lengvalue;
359 1.1 tls size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
360 1.1 tls ? instart - ZOPFLI_WINDOW_SIZE : 0;
361 1.1 tls unsigned short dummysublen[259];
362 1.1 tls
363 1.1 tls ZopfliHash hash;
364 1.1 tls ZopfliHash* h = &hash;
365 1.1 tls
366 1.1 tls #ifdef ZOPFLI_LAZY_MATCHING
367 1.1 tls /* Lazy matching. */
368 1.1 tls unsigned prev_length = 0;
369 1.1 tls unsigned prev_match = 0;
370 1.1 tls int prevlengvalue;
371 1.1 tls int match_available = 0;
372 1.1 tls #endif
373 1.1 tls
374 1.1 tls if (instart == inend) return;
375 1.1 tls
376 1.1 tls ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
377 1.1 tls ZopfliWarmupHash(in, windowstart, inend, h);
378 1.1 tls for (i = windowstart; i < instart; i++) {
379 1.1 tls ZopfliUpdateHash(in, i, inend, h);
380 1.1 tls }
381 1.1 tls
382 1.1 tls for (i = instart; i < inend; i++) {
383 1.1 tls ZopfliUpdateHash(in, i, inend, h);
384 1.1 tls
385 1.1 tls ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
386 1.1 tls &dist, &leng);
387 1.1 tls lengvalue = GetLengthValue(leng, dist);
388 1.1 tls
389 1.1 tls #ifdef ZOPFLI_LAZY_MATCHING
390 1.1 tls /* Lazy matching. */
391 1.1 tls prevlengvalue = GetLengthValue(prev_length, prev_match);
392 1.1 tls if (match_available) {
393 1.1 tls match_available = 0;
394 1.1 tls if (lengvalue > prevlengvalue + 1) {
395 1.1 tls ZopfliStoreLitLenDist(in[i - 1], 0, store);
396 1.1 tls if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) {
397 1.1 tls match_available = 1;
398 1.1 tls prev_length = leng;
399 1.1 tls prev_match = dist;
400 1.1 tls continue;
401 1.1 tls }
402 1.1 tls } else {
403 1.1 tls /* Add previous to output. */
404 1.1 tls leng = prev_length;
405 1.1 tls dist = prev_match;
406 1.1 tls lengvalue = prevlengvalue;
407 1.1 tls /* Add to output. */
408 1.1 tls ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
409 1.1 tls ZopfliStoreLitLenDist(leng, dist, store);
410 1.1 tls for (j = 2; j < leng; j++) {
411 1.1 tls assert(i < inend);
412 1.1 tls i++;
413 1.1 tls ZopfliUpdateHash(in, i, inend, h);
414 1.1 tls }
415 1.1 tls continue;
416 1.1 tls }
417 1.1 tls }
418 1.1 tls else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
419 1.1 tls match_available = 1;
420 1.1 tls prev_length = leng;
421 1.1 tls prev_match = dist;
422 1.1 tls continue;
423 1.1 tls }
424 1.1 tls /* End of lazy matching. */
425 1.1 tls #endif
426 1.1 tls
427 1.1 tls /* Add to output. */
428 1.1 tls if (lengvalue >= ZOPFLI_MIN_MATCH) {
429 1.1 tls ZopfliVerifyLenDist(in, inend, i, dist, leng);
430 1.1 tls ZopfliStoreLitLenDist(leng, dist, store);
431 1.1 tls } else {
432 1.1 tls leng = 1;
433 1.1 tls ZopfliStoreLitLenDist(in[i], 0, store);
434 1.1 tls }
435 1.1 tls for (j = 1; j < leng; j++) {
436 1.1 tls assert(i < inend);
437 1.1 tls i++;
438 1.1 tls ZopfliUpdateHash(in, i, inend, h);
439 1.1 tls }
440 1.1 tls }
441 1.1 tls
442 1.1 tls ZopfliCleanHash(h);
443 1.1 tls }
444 1.1 tls
445 1.1 tls void ZopfliLZ77Counts(const unsigned short* litlens,
446 1.1 tls const unsigned short* dists,
447 1.1 tls size_t start, size_t end,
448 1.1 tls size_t* ll_count, size_t* d_count) {
449 1.1 tls size_t i;
450 1.1 tls
451 1.1 tls for (i = 0; i < 288; i++) {
452 1.1 tls ll_count[i] = 0;
453 1.1 tls }
454 1.1 tls for (i = 0; i < 32; i++) {
455 1.1 tls d_count[i] = 0;
456 1.1 tls }
457 1.1 tls
458 1.1 tls for (i = start; i < end; i++) {
459 1.1 tls if (dists[i] == 0) {
460 1.1 tls ll_count[litlens[i]]++;
461 1.1 tls } else {
462 1.1 tls ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
463 1.1 tls d_count[ZopfliGetDistSymbol(dists[i])]++;
464 1.1 tls }
465 1.1 tls }
466 1.1 tls
467 1.1 tls ll_count[256] = 1; /* End symbol. */
468 1.1 tls }
469