blocksplitter.c revision 1.1.1.1.8.2 1 1.1.1.1.8.2 snj /*
2 1.1.1.1.8.2 snj Copyright 2011 Google Inc. All Rights Reserved.
3 1.1.1.1.8.2 snj
4 1.1.1.1.8.2 snj Licensed under the Apache License, Version 2.0 (the "License");
5 1.1.1.1.8.2 snj you may not use this file except in compliance with the License.
6 1.1.1.1.8.2 snj You may obtain a copy of the License at
7 1.1.1.1.8.2 snj
8 1.1.1.1.8.2 snj http://www.apache.org/licenses/LICENSE-2.0
9 1.1.1.1.8.2 snj
10 1.1.1.1.8.2 snj Unless required by applicable law or agreed to in writing, software
11 1.1.1.1.8.2 snj distributed under the License is distributed on an "AS IS" BASIS,
12 1.1.1.1.8.2 snj WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 1.1.1.1.8.2 snj See the License for the specific language governing permissions and
14 1.1.1.1.8.2 snj limitations under the License.
15 1.1.1.1.8.2 snj
16 1.1.1.1.8.2 snj Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
17 1.1.1.1.8.2 snj Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
18 1.1.1.1.8.2 snj */
19 1.1.1.1.8.2 snj
20 1.1.1.1.8.2 snj #include "blocksplitter.h"
21 1.1.1.1.8.2 snj
22 1.1.1.1.8.2 snj #include <assert.h>
23 1.1.1.1.8.2 snj #include <stdio.h>
24 1.1.1.1.8.2 snj #include <stdlib.h>
25 1.1.1.1.8.2 snj
26 1.1.1.1.8.2 snj #include "deflate.h"
27 1.1.1.1.8.2 snj #include "lz77.h"
28 1.1.1.1.8.2 snj #include "squeeze.h"
29 1.1.1.1.8.2 snj #include "tree.h"
30 1.1.1.1.8.2 snj #include "util.h"
31 1.1.1.1.8.2 snj
32 1.1.1.1.8.2 snj /*
33 1.1.1.1.8.2 snj The "f" for the FindMinimum function below.
34 1.1.1.1.8.2 snj i: the current parameter of f(i)
35 1.1.1.1.8.2 snj context: for your implementation
36 1.1.1.1.8.2 snj */
37 1.1.1.1.8.2 snj typedef double FindMinimumFun(size_t i, void* context);
38 1.1.1.1.8.2 snj
39 1.1.1.1.8.2 snj /*
40 1.1.1.1.8.2 snj Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41 1.1.1.1.8.2 snj double, i is in range start-end (excluding end).
42 1.1.1.1.8.2 snj */
43 1.1.1.1.8.2 snj static size_t FindMinimum(FindMinimumFun f, void* context,
44 1.1.1.1.8.2 snj size_t start, size_t end) {
45 1.1.1.1.8.2 snj if (end - start < 1024) {
46 1.1.1.1.8.2 snj double best = ZOPFLI_LARGE_FLOAT;
47 1.1.1.1.8.2 snj size_t result = start;
48 1.1.1.1.8.2 snj size_t i;
49 1.1.1.1.8.2 snj for (i = start; i < end; i++) {
50 1.1.1.1.8.2 snj double v = f(i, context);
51 1.1.1.1.8.2 snj if (v < best) {
52 1.1.1.1.8.2 snj best = v;
53 1.1.1.1.8.2 snj result = i;
54 1.1.1.1.8.2 snj }
55 1.1.1.1.8.2 snj }
56 1.1.1.1.8.2 snj return result;
57 1.1.1.1.8.2 snj } else {
58 1.1.1.1.8.2 snj /* Try to find minimum faster by recursively checking multiple points. */
59 1.1.1.1.8.2 snj #define NUM 9 /* Good value: 9. */
60 1.1.1.1.8.2 snj size_t i;
61 1.1.1.1.8.2 snj size_t p[NUM];
62 1.1.1.1.8.2 snj double vp[NUM];
63 1.1.1.1.8.2 snj size_t besti;
64 1.1.1.1.8.2 snj double best;
65 1.1.1.1.8.2 snj double lastbest = ZOPFLI_LARGE_FLOAT;
66 1.1.1.1.8.2 snj size_t pos = start;
67 1.1.1.1.8.2 snj
68 1.1.1.1.8.2 snj for (;;) {
69 1.1.1.1.8.2 snj if (end - start <= NUM) break;
70 1.1.1.1.8.2 snj
71 1.1.1.1.8.2 snj for (i = 0; i < NUM; i++) {
72 1.1.1.1.8.2 snj p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73 1.1.1.1.8.2 snj vp[i] = f(p[i], context);
74 1.1.1.1.8.2 snj }
75 1.1.1.1.8.2 snj besti = 0;
76 1.1.1.1.8.2 snj best = vp[0];
77 1.1.1.1.8.2 snj for (i = 1; i < NUM; i++) {
78 1.1.1.1.8.2 snj if (vp[i] < best) {
79 1.1.1.1.8.2 snj best = vp[i];
80 1.1.1.1.8.2 snj besti = i;
81 1.1.1.1.8.2 snj }
82 1.1.1.1.8.2 snj }
83 1.1.1.1.8.2 snj if (best > lastbest) break;
84 1.1.1.1.8.2 snj
85 1.1.1.1.8.2 snj start = besti == 0 ? start : p[besti - 1];
86 1.1.1.1.8.2 snj end = besti == NUM - 1 ? end : p[besti + 1];
87 1.1.1.1.8.2 snj
88 1.1.1.1.8.2 snj pos = p[besti];
89 1.1.1.1.8.2 snj lastbest = best;
90 1.1.1.1.8.2 snj }
91 1.1.1.1.8.2 snj return pos;
92 1.1.1.1.8.2 snj #undef NUM
93 1.1.1.1.8.2 snj }
94 1.1.1.1.8.2 snj }
95 1.1.1.1.8.2 snj
96 1.1.1.1.8.2 snj /*
97 1.1.1.1.8.2 snj Returns estimated cost of a block in bits. It includes the size to encode the
98 1.1.1.1.8.2 snj tree and the size to encode all literal, length and distance symbols and their
99 1.1.1.1.8.2 snj extra bits.
100 1.1.1.1.8.2 snj
101 1.1.1.1.8.2 snj litlens: lz77 lit/lengths
102 1.1.1.1.8.2 snj dists: ll77 distances
103 1.1.1.1.8.2 snj lstart: start of block
104 1.1.1.1.8.2 snj lend: end of block (not inclusive)
105 1.1.1.1.8.2 snj */
106 1.1.1.1.8.2 snj static double EstimateCost(const unsigned short* litlens,
107 1.1.1.1.8.2 snj const unsigned short* dists,
108 1.1.1.1.8.2 snj size_t lstart, size_t lend) {
109 1.1.1.1.8.2 snj return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110 1.1.1.1.8.2 snj }
111 1.1.1.1.8.2 snj
112 1.1.1.1.8.2 snj typedef struct SplitCostContext {
113 1.1.1.1.8.2 snj const unsigned short* litlens;
114 1.1.1.1.8.2 snj const unsigned short* dists;
115 1.1.1.1.8.2 snj size_t llsize;
116 1.1.1.1.8.2 snj size_t start;
117 1.1.1.1.8.2 snj size_t end;
118 1.1.1.1.8.2 snj } SplitCostContext;
119 1.1.1.1.8.2 snj
120 1.1.1.1.8.2 snj
121 1.1.1.1.8.2 snj /*
122 1.1.1.1.8.2 snj Gets the cost which is the sum of the cost of the left and the right section
123 1.1.1.1.8.2 snj of the data.
124 1.1.1.1.8.2 snj type: FindMinimumFun
125 1.1.1.1.8.2 snj */
126 1.1.1.1.8.2 snj static double SplitCost(size_t i, void* context) {
127 1.1.1.1.8.2 snj SplitCostContext* c = (SplitCostContext*)context;
128 1.1.1.1.8.2 snj return EstimateCost(c->litlens, c->dists, c->start, i) +
129 1.1.1.1.8.2 snj EstimateCost(c->litlens, c->dists, i, c->end);
130 1.1.1.1.8.2 snj }
131 1.1.1.1.8.2 snj
132 1.1.1.1.8.2 snj static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133 1.1.1.1.8.2 snj size_t i;
134 1.1.1.1.8.2 snj ZOPFLI_APPEND_DATA(value, out, outsize);
135 1.1.1.1.8.2 snj if (*outsize > 0) {
136 1.1.1.1.8.2 snj for (i = 0; i < *outsize - 1; i++) {
137 1.1.1.1.8.2 snj if ((*out)[i] > value) {
138 1.1.1.1.8.2 snj size_t j;
139 1.1.1.1.8.2 snj for (j = *outsize - 1; j > i; j--) {
140 1.1.1.1.8.2 snj (*out)[j] = (*out)[j - 1];
141 1.1.1.1.8.2 snj }
142 1.1.1.1.8.2 snj (*out)[i] = value;
143 1.1.1.1.8.2 snj break;
144 1.1.1.1.8.2 snj }
145 1.1.1.1.8.2 snj }
146 1.1.1.1.8.2 snj }
147 1.1.1.1.8.2 snj }
148 1.1.1.1.8.2 snj
149 1.1.1.1.8.2 snj /*
150 1.1.1.1.8.2 snj Prints the block split points as decimal and hex values in the terminal.
151 1.1.1.1.8.2 snj */
152 1.1.1.1.8.2 snj static void PrintBlockSplitPoints(const unsigned short* litlens,
153 1.1.1.1.8.2 snj const unsigned short* dists,
154 1.1.1.1.8.2 snj size_t llsize, const size_t* lz77splitpoints,
155 1.1.1.1.8.2 snj size_t nlz77points) {
156 1.1.1.1.8.2 snj size_t* splitpoints = 0;
157 1.1.1.1.8.2 snj size_t npoints = 0;
158 1.1.1.1.8.2 snj size_t i;
159 1.1.1.1.8.2 snj /* The input is given as lz77 indices, but we want to see the uncompressed
160 1.1.1.1.8.2 snj index values. */
161 1.1.1.1.8.2 snj size_t pos = 0;
162 1.1.1.1.8.2 snj if (nlz77points > 0) {
163 1.1.1.1.8.2 snj for (i = 0; i < llsize; i++) {
164 1.1.1.1.8.2 snj size_t length = dists[i] == 0 ? 1 : litlens[i];
165 1.1.1.1.8.2 snj if (lz77splitpoints[npoints] == i) {
166 1.1.1.1.8.2 snj ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
167 1.1.1.1.8.2 snj if (npoints == nlz77points) break;
168 1.1.1.1.8.2 snj }
169 1.1.1.1.8.2 snj pos += length;
170 1.1.1.1.8.2 snj }
171 1.1.1.1.8.2 snj }
172 1.1.1.1.8.2 snj assert(npoints == nlz77points);
173 1.1.1.1.8.2 snj
174 1.1.1.1.8.2 snj fprintf(stderr, "block split points: ");
175 1.1.1.1.8.2 snj for (i = 0; i < npoints; i++) {
176 1.1.1.1.8.2 snj fprintf(stderr, "%d ", (int)splitpoints[i]);
177 1.1.1.1.8.2 snj }
178 1.1.1.1.8.2 snj fprintf(stderr, "(hex:");
179 1.1.1.1.8.2 snj for (i = 0; i < npoints; i++) {
180 1.1.1.1.8.2 snj fprintf(stderr, " %x", (int)splitpoints[i]);
181 1.1.1.1.8.2 snj }
182 1.1.1.1.8.2 snj fprintf(stderr, ")\n");
183 1.1.1.1.8.2 snj
184 1.1.1.1.8.2 snj free(splitpoints);
185 1.1.1.1.8.2 snj }
186 1.1.1.1.8.2 snj
187 1.1.1.1.8.2 snj /*
188 1.1.1.1.8.2 snj Finds next block to try to split, the largest of the available ones.
189 1.1.1.1.8.2 snj The largest is chosen to make sure that if only a limited amount of blocks is
190 1.1.1.1.8.2 snj requested, their sizes are spread evenly.
191 1.1.1.1.8.2 snj llsize: the size of the LL77 data, which is the size of the done array here.
192 1.1.1.1.8.2 snj done: array indicating which blocks starting at that position are no longer
193 1.1.1.1.8.2 snj splittable (splitting them increases rather than decreases cost).
194 1.1.1.1.8.2 snj splitpoints: the splitpoints found so far.
195 1.1.1.1.8.2 snj npoints: the amount of splitpoints found so far.
196 1.1.1.1.8.2 snj lstart: output variable, giving start of block.
197 1.1.1.1.8.2 snj lend: output variable, giving end of block.
198 1.1.1.1.8.2 snj returns 1 if a block was found, 0 if no block found (all are done).
199 1.1.1.1.8.2 snj */
200 1.1.1.1.8.2 snj static int FindLargestSplittableBlock(
201 1.1.1.1.8.2 snj size_t llsize, const unsigned char* done,
202 1.1.1.1.8.2 snj const size_t* splitpoints, size_t npoints,
203 1.1.1.1.8.2 snj size_t* lstart, size_t* lend) {
204 1.1.1.1.8.2 snj size_t longest = 0;
205 1.1.1.1.8.2 snj int found = 0;
206 1.1.1.1.8.2 snj size_t i;
207 1.1.1.1.8.2 snj for (i = 0; i <= npoints; i++) {
208 1.1.1.1.8.2 snj size_t start = i == 0 ? 0 : splitpoints[i - 1];
209 1.1.1.1.8.2 snj size_t end = i == npoints ? llsize - 1 : splitpoints[i];
210 1.1.1.1.8.2 snj if (!done[start] && end - start > longest) {
211 1.1.1.1.8.2 snj *lstart = start;
212 1.1.1.1.8.2 snj *lend = end;
213 1.1.1.1.8.2 snj found = 1;
214 1.1.1.1.8.2 snj longest = end - start;
215 1.1.1.1.8.2 snj }
216 1.1.1.1.8.2 snj }
217 1.1.1.1.8.2 snj return found;
218 1.1.1.1.8.2 snj }
219 1.1.1.1.8.2 snj
220 1.1.1.1.8.2 snj void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
221 1.1.1.1.8.2 snj const unsigned short* litlens,
222 1.1.1.1.8.2 snj const unsigned short* dists,
223 1.1.1.1.8.2 snj size_t llsize, size_t maxblocks,
224 1.1.1.1.8.2 snj size_t** splitpoints, size_t* npoints) {
225 1.1.1.1.8.2 snj size_t lstart, lend;
226 1.1.1.1.8.2 snj size_t i;
227 1.1.1.1.8.2 snj size_t llpos = 0;
228 1.1.1.1.8.2 snj size_t numblocks = 1;
229 1.1.1.1.8.2 snj unsigned char* done;
230 1.1.1.1.8.2 snj double splitcost, origcost;
231 1.1.1.1.8.2 snj
232 1.1.1.1.8.2 snj if (llsize < 10) return; /* This code fails on tiny files. */
233 1.1.1.1.8.2 snj
234 1.1.1.1.8.2 snj done = (unsigned char*)malloc(llsize);
235 1.1.1.1.8.2 snj if (!done) exit(-1); /* Allocation failed. */
236 1.1.1.1.8.2 snj for (i = 0; i < llsize; i++) done[i] = 0;
237 1.1.1.1.8.2 snj
238 1.1.1.1.8.2 snj lstart = 0;
239 1.1.1.1.8.2 snj lend = llsize;
240 1.1.1.1.8.2 snj for (;;) {
241 1.1.1.1.8.2 snj SplitCostContext c;
242 1.1.1.1.8.2 snj
243 1.1.1.1.8.2 snj if (maxblocks > 0 && numblocks >= maxblocks) {
244 1.1.1.1.8.2 snj break;
245 1.1.1.1.8.2 snj }
246 1.1.1.1.8.2 snj
247 1.1.1.1.8.2 snj c.litlens = litlens;
248 1.1.1.1.8.2 snj c.dists = dists;
249 1.1.1.1.8.2 snj c.llsize = llsize;
250 1.1.1.1.8.2 snj c.start = lstart;
251 1.1.1.1.8.2 snj c.end = lend;
252 1.1.1.1.8.2 snj assert(lstart < lend);
253 1.1.1.1.8.2 snj llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
254 1.1.1.1.8.2 snj
255 1.1.1.1.8.2 snj assert(llpos > lstart);
256 1.1.1.1.8.2 snj assert(llpos < lend);
257 1.1.1.1.8.2 snj
258 1.1.1.1.8.2 snj splitcost = EstimateCost(litlens, dists, lstart, llpos) +
259 1.1.1.1.8.2 snj EstimateCost(litlens, dists, llpos, lend);
260 1.1.1.1.8.2 snj origcost = EstimateCost(litlens, dists, lstart, lend);
261 1.1.1.1.8.2 snj
262 1.1.1.1.8.2 snj if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
263 1.1.1.1.8.2 snj done[lstart] = 1;
264 1.1.1.1.8.2 snj } else {
265 1.1.1.1.8.2 snj AddSorted(llpos, splitpoints, npoints);
266 1.1.1.1.8.2 snj numblocks++;
267 1.1.1.1.8.2 snj }
268 1.1.1.1.8.2 snj
269 1.1.1.1.8.2 snj if (!FindLargestSplittableBlock(
270 1.1.1.1.8.2 snj llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
271 1.1.1.1.8.2 snj break; /* No further split will probably reduce compression. */
272 1.1.1.1.8.2 snj }
273 1.1.1.1.8.2 snj
274 1.1.1.1.8.2 snj if (lend - lstart < 10) {
275 1.1.1.1.8.2 snj break;
276 1.1.1.1.8.2 snj }
277 1.1.1.1.8.2 snj }
278 1.1.1.1.8.2 snj
279 1.1.1.1.8.2 snj if (options->verbose) {
280 1.1.1.1.8.2 snj PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
281 1.1.1.1.8.2 snj }
282 1.1.1.1.8.2 snj
283 1.1.1.1.8.2 snj free(done);
284 1.1.1.1.8.2 snj }
285 1.1.1.1.8.2 snj
286 1.1.1.1.8.2 snj void ZopfliBlockSplit(const ZopfliOptions* options,
287 1.1.1.1.8.2 snj const unsigned char* in, size_t instart, size_t inend,
288 1.1.1.1.8.2 snj size_t maxblocks, size_t** splitpoints, size_t* npoints) {
289 1.1.1.1.8.2 snj size_t pos = 0;
290 1.1.1.1.8.2 snj size_t i;
291 1.1.1.1.8.2 snj ZopfliBlockState s;
292 1.1.1.1.8.2 snj size_t* lz77splitpoints = 0;
293 1.1.1.1.8.2 snj size_t nlz77points = 0;
294 1.1.1.1.8.2 snj ZopfliLZ77Store store;
295 1.1.1.1.8.2 snj
296 1.1.1.1.8.2 snj ZopfliInitLZ77Store(&store);
297 1.1.1.1.8.2 snj
298 1.1.1.1.8.2 snj s.options = options;
299 1.1.1.1.8.2 snj s.blockstart = instart;
300 1.1.1.1.8.2 snj s.blockend = inend;
301 1.1.1.1.8.2 snj #ifdef ZOPFLI_LONGEST_MATCH_CACHE
302 1.1.1.1.8.2 snj s.lmc = 0;
303 1.1.1.1.8.2 snj #endif
304 1.1.1.1.8.2 snj
305 1.1.1.1.8.2 snj *npoints = 0;
306 1.1.1.1.8.2 snj *splitpoints = 0;
307 1.1.1.1.8.2 snj
308 1.1.1.1.8.2 snj /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
309 1.1.1.1.8.2 snj results in better blocks. */
310 1.1.1.1.8.2 snj ZopfliLZ77Greedy(&s, in, instart, inend, &store);
311 1.1.1.1.8.2 snj
312 1.1.1.1.8.2 snj ZopfliBlockSplitLZ77(options,
313 1.1.1.1.8.2 snj store.litlens, store.dists, store.size, maxblocks,
314 1.1.1.1.8.2 snj &lz77splitpoints, &nlz77points);
315 1.1.1.1.8.2 snj
316 1.1.1.1.8.2 snj /* Convert LZ77 positions to positions in the uncompressed input. */
317 1.1.1.1.8.2 snj pos = instart;
318 1.1.1.1.8.2 snj if (nlz77points > 0) {
319 1.1.1.1.8.2 snj for (i = 0; i < store.size; i++) {
320 1.1.1.1.8.2 snj size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
321 1.1.1.1.8.2 snj if (lz77splitpoints[*npoints] == i) {
322 1.1.1.1.8.2 snj ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
323 1.1.1.1.8.2 snj if (*npoints == nlz77points) break;
324 1.1.1.1.8.2 snj }
325 1.1.1.1.8.2 snj pos += length;
326 1.1.1.1.8.2 snj }
327 1.1.1.1.8.2 snj }
328 1.1.1.1.8.2 snj assert(*npoints == nlz77points);
329 1.1.1.1.8.2 snj
330 1.1.1.1.8.2 snj free(lz77splitpoints);
331 1.1.1.1.8.2 snj ZopfliCleanLZ77Store(&store);
332 1.1.1.1.8.2 snj }
333 1.1.1.1.8.2 snj
334 1.1.1.1.8.2 snj void ZopfliBlockSplitSimple(const unsigned char* in,
335 1.1.1.1.8.2 snj size_t instart, size_t inend,
336 1.1.1.1.8.2 snj size_t blocksize,
337 1.1.1.1.8.2 snj size_t** splitpoints, size_t* npoints) {
338 1.1.1.1.8.2 snj size_t i = instart;
339 1.1.1.1.8.2 snj while (i < inend) {
340 1.1.1.1.8.2 snj ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
341 1.1.1.1.8.2 snj i += blocksize;
342 1.1.1.1.8.2 snj }
343 1.1.1.1.8.2 snj (void)in;
344 1.1.1.1.8.2 snj }
345