unlz.c revision 1.7 1 1.7 rillig /* $NetBSD: unlz.c,v 1.7 2021/09/10 21:52:17 rillig Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Christos Zoulas.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos *
19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.1 christos */
31 1.1 christos
32 1.1 christos /* Lzd - Educational decompressor for the lzip format
33 1.1 christos Copyright (C) 2013-2018 Antonio Diaz Diaz.
34 1.1 christos
35 1.1 christos This program is free software. Redistribution and use in source and
36 1.1 christos binary forms, with or without modification, are permitted provided
37 1.1 christos that the following conditions are met:
38 1.1 christos
39 1.1 christos 1. Redistributions of source code must retain the above copyright
40 1.1 christos notice, this list of conditions and the following disclaimer.
41 1.1 christos
42 1.1 christos 2. Redistributions in binary form must reproduce the above copyright
43 1.1 christos notice, this list of conditions and the following disclaimer in the
44 1.1 christos documentation and/or other materials provided with the distribution.
45 1.1 christos
46 1.1 christos This program is distributed in the hope that it will be useful,
47 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
48 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
49 1.1 christos */
50 1.1 christos
51 1.1 christos #include <sys/param.h>
52 1.1 christos #include <stdio.h>
53 1.1 christos #include <string.h>
54 1.1 christos #include <stdlib.h>
55 1.1 christos #include <stdint.h>
56 1.1 christos #include <stdbool.h>
57 1.1 christos #include <errno.h>
58 1.1 christos #include <unistd.h>
59 1.1 christos
60 1.1 christos #define LZ_STATES 12
61 1.1 christos
62 1.1 christos #define LITERAL_CONTEXT_BITS 3
63 1.1 christos #define POS_STATE_BITS 2
64 1.1 christos #define POS_STATES (1 << POS_STATE_BITS)
65 1.1 christos #define POS_STATE_MASK (POS_STATES - 1)
66 1.1 christos
67 1.1 christos #define STATES 4
68 1.1 christos #define DIS_SLOT_BITS 6
69 1.1 christos
70 1.1 christos #define DIS_MODEL_START 4
71 1.1 christos #define DIS_MODEL_END 14
72 1.1 christos
73 1.1 christos #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
74 1.1 christos #define DIS_ALIGN_BITS 4
75 1.1 christos #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
76 1.1 christos
77 1.1 christos #define LOW_BITS 3
78 1.1 christos #define MID_BITS 3
79 1.1 christos #define HIGH_BITS 8
80 1.1 christos
81 1.1 christos #define LOW_SYMBOLS (1 << LOW_BITS)
82 1.1 christos #define MID_SYMBOLS (1 << MID_BITS)
83 1.1 christos #define HIGH_SYMBOLS (1 << HIGH_BITS)
84 1.1 christos
85 1.1 christos #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
86 1.1 christos
87 1.1 christos #define MIN_MATCH_LEN 2
88 1.1 christos
89 1.1 christos #define BIT_MODEL_MOVE_BITS 5
90 1.1 christos #define BIT_MODEL_TOTAL_BITS 11
91 1.1 christos #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
92 1.1 christos #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
93 1.1 christos
94 1.1 christos static const int lz_st_next[] = {
95 1.1 christos 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
96 1.1 christos };
97 1.1 christos
98 1.1 christos static bool
99 1.1 christos lz_st_is_char(int st) {
100 1.1 christos return st < 7;
101 1.1 christos }
102 1.1 christos
103 1.1 christos static int
104 1.1 christos lz_st_get_char(int st) {
105 1.1 christos return lz_st_next[st];
106 1.1 christos }
107 1.1 christos
108 1.1 christos static int
109 1.1 christos lz_st_get_match(int st) {
110 1.1 christos return st < 7 ? 7 : 10;
111 1.1 christos }
112 1.1 christos
113 1.1 christos static int
114 1.1 christos lz_st_get_rep(int st) {
115 1.1 christos return st < 7 ? 8 : 11;
116 1.1 christos }
117 1.1 christos
118 1.1 christos static int
119 1.1 christos lz_st_get_short_rep(int st) {
120 1.1 christos return st < 7 ? 9 : 11;
121 1.1 christos }
122 1.1 christos
123 1.1 christos struct lz_len_model {
124 1.1 christos int choice1;
125 1.1 christos int choice2;
126 1.1 christos int bm_low[POS_STATES][LOW_SYMBOLS];
127 1.1 christos int bm_mid[POS_STATES][MID_SYMBOLS];
128 1.1 christos int bm_high[HIGH_SYMBOLS];
129 1.1 christos };
130 1.1 christos
131 1.1 christos static uint32_t lz_crc[256];
132 1.1 christos
133 1.1 christos static void
134 1.1 christos lz_crc_init(void)
135 1.1 christos {
136 1.1 christos for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
137 1.1 christos unsigned c = i;
138 1.1 christos for (unsigned j = 0; j < 8; j++) {
139 1.1 christos if (c & 1)
140 1.1 christos c = 0xEDB88320U ^ (c >> 1);
141 1.1 christos else
142 1.1 christos c >>= 1;
143 1.1 christos }
144 1.1 christos lz_crc[i] = c;
145 1.1 christos }
146 1.1 christos }
147 1.1 christos
148 1.1 christos static void
149 1.1 christos lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
150 1.1 christos {
151 1.1 christos for (size_t i = 0; i < len; i++)
152 1.1 christos *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
153 1.1 christos }
154 1.1 christos
155 1.1 christos struct lz_range_decoder {
156 1.1 christos FILE *fp;
157 1.1 christos uint32_t code;
158 1.1 christos uint32_t range;
159 1.1 christos };
160 1.1 christos
161 1.1 christos static int
162 1.1 christos lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
163 1.1 christos {
164 1.1 christos rd->fp = fp;
165 1.1 christos rd->code = 0;
166 1.1 christos rd->range = ~0;
167 1.1 christos for (int i = 0; i < 5; i++)
168 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
169 1.1 christos return ferror(rd->fp) ? -1 : 0;
170 1.1 christos }
171 1.1 christos
172 1.1 christos static unsigned
173 1.1 christos lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
174 1.1 christos {
175 1.1 christos unsigned symbol = 0;
176 1.1 christos
177 1.1 christos for (int i = num_bits; i > 0; i--) {
178 1.1 christos rd->range >>= 1;
179 1.1 christos symbol <<= 1;
180 1.1 christos if (rd->code >= rd->range) {
181 1.1 christos rd->code -= rd->range;
182 1.1 christos symbol |= 1;
183 1.1 christos }
184 1.1 christos if (rd->range <= 0x00FFFFFFU) {
185 1.1 christos rd->range <<= 8;
186 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
187 1.1 christos }
188 1.1 christos }
189 1.1 christos
190 1.1 christos return symbol;
191 1.1 christos }
192 1.1 christos
193 1.1 christos static unsigned
194 1.1 christos lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
195 1.1 christos {
196 1.1 christos unsigned symbol;
197 1.1 christos const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
198 1.1 christos
199 1.1 christos if(rd->code < bound) {
200 1.1 christos rd->range = bound;
201 1.1 christos *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
202 1.1 christos symbol = 0;
203 1.1 christos }
204 1.1 christos else {
205 1.1 christos rd->range -= bound;
206 1.1 christos rd->code -= bound;
207 1.1 christos *bm -= *bm >> BIT_MODEL_MOVE_BITS;
208 1.1 christos symbol = 1;
209 1.1 christos }
210 1.1 christos
211 1.1 christos if (rd->range <= 0x00FFFFFFU) {
212 1.1 christos rd->range <<= 8;
213 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
214 1.1 christos }
215 1.1 christos return symbol;
216 1.1 christos }
217 1.1 christos
218 1.1 christos static unsigned
219 1.1 christos lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
220 1.1 christos {
221 1.1 christos unsigned symbol = 1;
222 1.1 christos
223 1.1 christos for (int i = 0; i < num_bits; i++)
224 1.1 christos symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
225 1.1 christos
226 1.1 christos return symbol - (1 << num_bits);
227 1.1 christos }
228 1.1 christos
229 1.1 christos static unsigned
230 1.1 christos lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
231 1.1 christos {
232 1.1 christos unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
233 1.1 christos unsigned reversed_symbol = 0;
234 1.1 christos
235 1.1 christos for (int i = 0; i < num_bits; i++) {
236 1.1 christos reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
237 1.1 christos symbol >>= 1;
238 1.1 christos }
239 1.1 christos
240 1.1 christos return reversed_symbol;
241 1.1 christos }
242 1.1 christos
243 1.1 christos static unsigned
244 1.1 christos lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
245 1.1 christos {
246 1.1 christos unsigned symbol = 1;
247 1.1 christos
248 1.1 christos for (int i = 7; i >= 0; i--) {
249 1.1 christos const unsigned match_bit = (match_byte >> i) & 1;
250 1.1 christos const unsigned bit = lz_rd_decode_bit(rd,
251 1.1 christos &bm[symbol + (match_bit << 8) + 0x100]);
252 1.1 christos symbol = (symbol << 1) | bit;
253 1.1 christos if (match_bit != bit) {
254 1.1 christos while (symbol < 0x100) {
255 1.1 christos symbol = (symbol << 1) |
256 1.1 christos lz_rd_decode_bit(rd, &bm[symbol]);
257 1.1 christos }
258 1.1 christos break;
259 1.1 christos }
260 1.1 christos }
261 1.1 christos return symbol & 0xFF;
262 1.1 christos }
263 1.1 christos
264 1.1 christos static unsigned
265 1.1 christos lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
266 1.1 christos int pos_state)
267 1.1 christos {
268 1.1 christos if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
269 1.1 christos return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
270 1.1 christos
271 1.1 christos if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
272 1.1 christos return LOW_SYMBOLS +
273 1.1 christos lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
274 1.1 christos }
275 1.1 christos
276 1.1 christos return LOW_SYMBOLS + MID_SYMBOLS +
277 1.1 christos lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
278 1.1 christos }
279 1.1 christos
280 1.1 christos struct lz_decoder {
281 1.1 christos FILE *fin, *fout;
282 1.1 christos off_t pos, ppos, spos, dict_size;
283 1.1 christos bool wrapped;
284 1.1 christos uint32_t crc;
285 1.1 christos uint8_t *obuf;
286 1.1 christos struct lz_range_decoder rdec;
287 1.1 christos };
288 1.1 christos
289 1.1 christos static int
290 1.1 christos lz_flush(struct lz_decoder *lz)
291 1.1 christos {
292 1.1 christos off_t offs = lz->pos - lz->spos;
293 1.1 christos if (offs <= 0)
294 1.1 christos return -1;
295 1.1 christos
296 1.1 christos size_t size = (size_t)offs;
297 1.1 christos lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
298 1.1 christos if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
299 1.1 christos return -1;
300 1.1 christos
301 1.1 christos lz->wrapped = lz->pos >= lz->dict_size;
302 1.1 christos if (lz->wrapped) {
303 1.1 christos lz->ppos += lz->pos;
304 1.1 christos lz->pos = 0;
305 1.1 christos }
306 1.1 christos lz->spos = lz->pos;
307 1.1 christos return 0;
308 1.1 christos }
309 1.1 christos
310 1.1 christos static void
311 1.1 christos lz_destroy(struct lz_decoder *lz)
312 1.1 christos {
313 1.1 christos if (lz->fin)
314 1.1 christos fclose(lz->fin);
315 1.1 christos if (lz->fout)
316 1.1 christos fclose(lz->fout);
317 1.1 christos free(lz->obuf);
318 1.1 christos }
319 1.1 christos
320 1.1 christos static int
321 1.1 christos lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
322 1.1 christos {
323 1.1 christos memset(lz, 0, sizeof(*lz));
324 1.1 christos
325 1.1 christos lz->fin = fdopen(dup(fin), "r");
326 1.1 christos if (lz->fin == NULL)
327 1.1 christos goto out;
328 1.1 christos
329 1.1 christos lz->fout = fdopen(dup(fdout), "w");
330 1.1 christos if (lz->fout == NULL)
331 1.1 christos goto out;
332 1.1 christos
333 1.1 christos lz->pos = lz->ppos = lz->spos = 0;
334 1.1 christos lz->crc = ~0;
335 1.1 christos lz->dict_size = dict_size;
336 1.1 christos lz->wrapped = false;
337 1.1 christos
338 1.1 christos lz->obuf = malloc(dict_size);
339 1.1 christos if (lz->obuf == NULL)
340 1.1 christos goto out;
341 1.1 christos
342 1.1 christos if (lz_rd_create(&lz->rdec, lz->fin) == -1)
343 1.1 christos goto out;
344 1.1 christos return 0;
345 1.1 christos out:
346 1.1 christos lz_destroy(lz);
347 1.1 christos return -1;
348 1.1 christos }
349 1.1 christos
350 1.1 christos static uint8_t
351 1.1 christos lz_peek(const struct lz_decoder *lz, unsigned ahead)
352 1.1 christos {
353 1.1 christos off_t diff = lz->pos - ahead - 1;
354 1.1 christos
355 1.1 christos if (diff >= 0)
356 1.1 christos return lz->obuf[diff];
357 1.1 christos
358 1.1 christos if (lz->wrapped)
359 1.1 christos return lz->obuf[lz->dict_size + diff];
360 1.1 christos
361 1.1 christos return 0;
362 1.1 christos }
363 1.1 christos
364 1.1 christos static void
365 1.1 christos lz_put(struct lz_decoder *lz, uint8_t b)
366 1.1 christos {
367 1.1 christos lz->obuf[lz->pos++] = b;
368 1.1 christos if (lz->dict_size == lz->pos)
369 1.1 christos lz_flush(lz);
370 1.1 christos }
371 1.1 christos
372 1.1 christos static off_t
373 1.1 christos lz_get_data_position(const struct lz_decoder *lz)
374 1.1 christos {
375 1.1 christos return lz->ppos + lz->pos;
376 1.1 christos }
377 1.1 christos
378 1.1 christos static unsigned
379 1.1 christos lz_get_crc(const struct lz_decoder *lz)
380 1.1 christos {
381 1.1 christos return lz->crc ^ 0xffffffffU;
382 1.1 christos }
383 1.1 christos
384 1.1 christos static void
385 1.1 christos lz_bm_init(int *a, size_t l)
386 1.1 christos {
387 1.1 christos for (size_t i = 0; i < l; i++)
388 1.1 christos a[i] = BIT_MODEL_INIT;
389 1.1 christos }
390 1.1 christos
391 1.1 christos #define LZ_BM_INIT(a) lz_bm_init(a, __arraycount(a))
392 1.1 christos #define LZ_BM_INIT2(a) do { \
393 1.1 christos size_t l = __arraycount(a[0]); \
394 1.1 christos for (size_t i = 0; i < __arraycount(a); i++) \
395 1.1 christos lz_bm_init(a[i], l); \
396 1.7 rillig } while (0)
397 1.1 christos
398 1.1 christos #define LZ_MODEL_INIT(a) do { \
399 1.1 christos a.choice1 = BIT_MODEL_INIT; \
400 1.1 christos a.choice2 = BIT_MODEL_INIT; \
401 1.1 christos LZ_BM_INIT2(a.bm_low); \
402 1.1 christos LZ_BM_INIT2(a.bm_mid); \
403 1.1 christos LZ_BM_INIT(a.bm_high); \
404 1.7 rillig } while (0)
405 1.1 christos
406 1.1 christos static bool
407 1.1 christos lz_decode_member(struct lz_decoder *lz)
408 1.1 christos {
409 1.1 christos int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
410 1.1 christos int bm_match[LZ_STATES][POS_STATES];
411 1.1 christos int bm_rep[4][LZ_STATES];
412 1.1 christos int bm_len[LZ_STATES][POS_STATES];
413 1.1 christos int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
414 1.1 christos int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
415 1.1 christos int bm_align[DIS_ALIGN_SIZE];
416 1.1 christos
417 1.1 christos LZ_BM_INIT2(bm_literal);
418 1.1 christos LZ_BM_INIT2(bm_match);
419 1.1 christos LZ_BM_INIT2(bm_rep);
420 1.1 christos LZ_BM_INIT2(bm_len);
421 1.1 christos LZ_BM_INIT2(bm_dis_slot);
422 1.1 christos LZ_BM_INIT(bm_dis);
423 1.1 christos LZ_BM_INIT(bm_align);
424 1.1 christos
425 1.1 christos struct lz_len_model match_len_model;
426 1.1 christos struct lz_len_model rep_len_model;
427 1.1 christos
428 1.1 christos LZ_MODEL_INIT(match_len_model);
429 1.1 christos LZ_MODEL_INIT(rep_len_model);
430 1.1 christos
431 1.1 christos struct lz_range_decoder *rd = &lz->rdec;
432 1.1 christos unsigned rep[4] = { 0 };
433 1.1 christos
434 1.1 christos
435 1.1 christos int state = 0;
436 1.1 christos
437 1.1 christos while (!feof(lz->fin) && !ferror(lz->fin)) {
438 1.1 christos const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
439 1.1 christos // bit 1
440 1.1 christos if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
441 1.1 christos const uint8_t prev_byte = lz_peek(lz, 0);
442 1.1 christos const int literal_state =
443 1.1 christos prev_byte >> (8 - LITERAL_CONTEXT_BITS);
444 1.1 christos int *bm = bm_literal[literal_state];
445 1.1 christos if (lz_st_is_char(state))
446 1.1 christos lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
447 1.1 christos else {
448 1.1 christos int peek = lz_peek(lz, rep[0]);
449 1.1 christos lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
450 1.1 christos }
451 1.1 christos state = lz_st_get_char(state);
452 1.1 christos continue;
453 1.1 christos }
454 1.1 christos int len;
455 1.1 christos // bit 2
456 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
457 1.1 christos // bit 3
458 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
459 1.1 christos // bit 4
460 1.1 christos if (lz_rd_decode_bit(rd,
461 1.1 christos &bm_len[state][pos_state]) == 0)
462 1.1 christos {
463 1.1 christos state = lz_st_get_short_rep(state);
464 1.1 christos lz_put(lz, lz_peek(lz, rep[0]));
465 1.1 christos continue;
466 1.1 christos }
467 1.1 christos } else {
468 1.1 christos unsigned distance;
469 1.1 christos // bit 4
470 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[2][state])
471 1.1 christos == 0)
472 1.1 christos distance = rep[1];
473 1.1 christos else {
474 1.1 christos // bit 5
475 1.1 christos if (lz_rd_decode_bit(rd,
476 1.1 christos &bm_rep[3][state]) == 0)
477 1.1 christos distance = rep[2];
478 1.1 christos else {
479 1.1 christos distance = rep[3];
480 1.1 christos rep[3] = rep[2];
481 1.1 christos }
482 1.1 christos rep[2] = rep[1];
483 1.1 christos }
484 1.1 christos rep[1] = rep[0];
485 1.1 christos rep[0] = distance;
486 1.1 christos }
487 1.1 christos state = lz_st_get_rep(state);
488 1.1 christos len = MIN_MATCH_LEN +
489 1.1 christos lz_rd_decode_len(rd, &rep_len_model, pos_state);
490 1.1 christos } else {
491 1.1 christos rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
492 1.1 christos len = MIN_MATCH_LEN +
493 1.1 christos lz_rd_decode_len(rd, &match_len_model, pos_state);
494 1.1 christos const int len_state =
495 1.1 christos MIN(len - MIN_MATCH_LEN, STATES - 1);
496 1.1 christos rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
497 1.1 christos DIS_SLOT_BITS);
498 1.1 christos if (rep[0] >= DIS_MODEL_START) {
499 1.1 christos const unsigned dis_slot = rep[0];
500 1.1 christos const int direct_bits = (dis_slot >> 1) - 1;
501 1.1 christos rep[0] = (2 | (dis_slot & 1)) << direct_bits;
502 1.1 christos if (dis_slot < DIS_MODEL_END)
503 1.1 christos rep[0] += lz_rd_decode_tree_reversed(rd,
504 1.1 christos &bm_dis[rep[0] - dis_slot],
505 1.1 christos direct_bits);
506 1.1 christos else {
507 1.1 christos rep[0] += lz_rd_decode(rd, direct_bits
508 1.1 christos - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
509 1.1 christos rep[0] += lz_rd_decode_tree_reversed(rd,
510 1.1 christos bm_align, DIS_ALIGN_BITS);
511 1.1 christos if (rep[0] == 0xFFFFFFFFU) {
512 1.1 christos lz_flush(lz);
513 1.1 christos return len == MIN_MATCH_LEN;
514 1.1 christos }
515 1.1 christos }
516 1.1 christos }
517 1.1 christos state = lz_st_get_match(state);
518 1.1 christos if (rep[0] >= lz->dict_size ||
519 1.1 christos (rep[0] >= lz->pos && !lz->wrapped)) {
520 1.1 christos lz_flush(lz);
521 1.1 christos return false;
522 1.1 christos }
523 1.1 christos }
524 1.1 christos for (int i = 0; i < len; i++)
525 1.1 christos lz_put(lz, lz_peek(lz, rep[0]));
526 1.1 christos }
527 1.1 christos lz_flush(lz);
528 1.1 christos return false;
529 1.1 christos }
530 1.1 christos
531 1.1 christos /*
532 1.1 christos * 0-3 CRC32 of the uncompressed data
533 1.1 christos * 4-11 size of the uncompressed data
534 1.1 christos * 12-19 member size including header and trailer
535 1.1 christos */
536 1.1 christos #define TRAILER_SIZE 20
537 1.1 christos
538 1.1 christos
539 1.1 christos static off_t
540 1.4 christos lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
541 1.1 christos {
542 1.1 christos struct lz_decoder lz;
543 1.1 christos off_t rv = -1;
544 1.1 christos
545 1.1 christos if (lz_create(&lz, fin, fdout, dict_size) == -1)
546 1.1 christos return -1;
547 1.1 christos
548 1.1 christos if (!lz_decode_member(&lz))
549 1.1 christos goto out;
550 1.1 christos
551 1.1 christos uint8_t trailer[TRAILER_SIZE];
552 1.1 christos
553 1.1 christos for(size_t i = 0; i < __arraycount(trailer); i++)
554 1.1 christos trailer[i] = (uint8_t)getc(lz.fin);
555 1.1 christos
556 1.1 christos unsigned crc = 0;
557 1.1 christos for (int i = 3; i >= 0; --i) {
558 1.1 christos crc <<= 8;
559 1.1 christos crc += trailer[i];
560 1.1 christos }
561 1.1 christos
562 1.1 christos int64_t data_size = 0;
563 1.1 christos for (int i = 11; i >= 4; --i) {
564 1.1 christos data_size <<= 8;
565 1.1 christos data_size += trailer[i];
566 1.1 christos }
567 1.1 christos
568 1.1 christos if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
569 1.1 christos goto out;
570 1.1 christos
571 1.1 christos rv = 0;
572 1.1 christos for (int i = 19; i >= 12; --i) {
573 1.1 christos rv <<= 8;
574 1.1 christos rv += trailer[i];
575 1.1 christos }
576 1.4 christos if (insize)
577 1.4 christos *insize = rv;
578 1.6 christos #if 0
579 1.6 christos /* Does not work with pipes */
580 1.1 christos rv = ftello(lz.fout);
581 1.6 christos #else
582 1.6 christos rv = data_size;
583 1.6 christos #endif
584 1.1 christos out:
585 1.1 christos lz_destroy(&lz);
586 1.1 christos return rv;
587 1.1 christos }
588 1.1 christos
589 1.1 christos
590 1.1 christos /*
591 1.1 christos * 0-3 magic
592 1.1 christos * 4 version
593 1.1 christos * 5 coded dict_size
594 1.1 christos */
595 1.1 christos #define HDR_SIZE 6
596 1.1 christos #define MIN_DICTIONARY_SIZE (1 << 12)
597 1.1 christos #define MAX_DICTIONARY_SIZE (1 << 29)
598 1.1 christos
599 1.1 christos static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
600 1.1 christos
601 1.1 christos static unsigned
602 1.1 christos lz_get_dict_size(unsigned char c)
603 1.1 christos {
604 1.1 christos unsigned dict_size = 1 << (c & 0x1f);
605 1.5 christos dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
606 1.1 christos if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
607 1.1 christos return 0;
608 1.1 christos return dict_size;
609 1.1 christos }
610 1.1 christos
611 1.1 christos static off_t
612 1.1 christos unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
613 1.1 christos {
614 1.1 christos if (lz_crc[0] == 0)
615 1.1 christos lz_crc_init();
616 1.1 christos
617 1.1 christos char header[HDR_SIZE];
618 1.1 christos
619 1.4 christos if (prelen > sizeof(header))
620 1.4 christos return -1;
621 1.4 christos if (pre && prelen)
622 1.4 christos memcpy(header, pre, prelen);
623 1.4 christos
624 1.4 christos ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
625 1.4 christos switch (nr) {
626 1.4 christos case -1:
627 1.4 christos return -1;
628 1.1 christos case 0:
629 1.4 christos return prelen ? -1 : 0;
630 1.4 christos default:
631 1.4 christos if ((size_t)nr != sizeof(header) - prelen)
632 1.4 christos return -1;
633 1.1 christos break;
634 1.1 christos }
635 1.1 christos
636 1.1 christos if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
637 1.1 christos return -1;
638 1.1 christos
639 1.1 christos unsigned dict_size = lz_get_dict_size(header[5]);
640 1.1 christos if (dict_size == 0)
641 1.1 christos return -1;
642 1.1 christos
643 1.4 christos return lz_decode(fin, fout, dict_size, bytes_in);
644 1.1 christos }
645