unlz.c revision 1.6.2.2 1 1.6.2.2 pgoyette /* $NetBSD: unlz.c,v 1.6.2.2 2018/11/26 01:52:54 pgoyette Exp $ */
2 1.6.2.2 pgoyette
3 1.6.2.2 pgoyette /*-
4 1.6.2.2 pgoyette * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 1.6.2.2 pgoyette * All rights reserved.
6 1.6.2.2 pgoyette *
7 1.6.2.2 pgoyette * This code is derived from software contributed to The NetBSD Foundation
8 1.6.2.2 pgoyette * by Christos Zoulas.
9 1.6.2.2 pgoyette *
10 1.6.2.2 pgoyette * Redistribution and use in source and binary forms, with or without
11 1.6.2.2 pgoyette * modification, are permitted provided that the following conditions
12 1.6.2.2 pgoyette * are met:
13 1.6.2.2 pgoyette * 1. Redistributions of source code must retain the above copyright
14 1.6.2.2 pgoyette * notice, this list of conditions and the following disclaimer.
15 1.6.2.2 pgoyette * 2. Redistributions in binary form must reproduce the above copyright
16 1.6.2.2 pgoyette * notice, this list of conditions and the following disclaimer in the
17 1.6.2.2 pgoyette * documentation and/or other materials provided with the distribution.
18 1.6.2.2 pgoyette *
19 1.6.2.2 pgoyette * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.6.2.2 pgoyette * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.6.2.2 pgoyette * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.6.2.2 pgoyette * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.6.2.2 pgoyette * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.6.2.2 pgoyette * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.6.2.2 pgoyette * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.6.2.2 pgoyette * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.6.2.2 pgoyette * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.6.2.2 pgoyette * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.6.2.2 pgoyette * POSSIBILITY OF SUCH DAMAGE.
30 1.6.2.2 pgoyette */
31 1.6.2.2 pgoyette
32 1.6.2.2 pgoyette /* Lzd - Educational decompressor for the lzip format
33 1.6.2.2 pgoyette Copyright (C) 2013-2018 Antonio Diaz Diaz.
34 1.6.2.2 pgoyette
35 1.6.2.2 pgoyette This program is free software. Redistribution and use in source and
36 1.6.2.2 pgoyette binary forms, with or without modification, are permitted provided
37 1.6.2.2 pgoyette that the following conditions are met:
38 1.6.2.2 pgoyette
39 1.6.2.2 pgoyette 1. Redistributions of source code must retain the above copyright
40 1.6.2.2 pgoyette notice, this list of conditions and the following disclaimer.
41 1.6.2.2 pgoyette
42 1.6.2.2 pgoyette 2. Redistributions in binary form must reproduce the above copyright
43 1.6.2.2 pgoyette notice, this list of conditions and the following disclaimer in the
44 1.6.2.2 pgoyette documentation and/or other materials provided with the distribution.
45 1.6.2.2 pgoyette
46 1.6.2.2 pgoyette This program is distributed in the hope that it will be useful,
47 1.6.2.2 pgoyette but WITHOUT ANY WARRANTY; without even the implied warranty of
48 1.6.2.2 pgoyette MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
49 1.6.2.2 pgoyette */
50 1.6.2.2 pgoyette
51 1.6.2.2 pgoyette #include <sys/param.h>
52 1.6.2.2 pgoyette #include <stdio.h>
53 1.6.2.2 pgoyette #include <string.h>
54 1.6.2.2 pgoyette #include <stdlib.h>
55 1.6.2.2 pgoyette #include <stdint.h>
56 1.6.2.2 pgoyette #include <stdbool.h>
57 1.6.2.2 pgoyette #include <errno.h>
58 1.6.2.2 pgoyette #include <unistd.h>
59 1.6.2.2 pgoyette
60 1.6.2.2 pgoyette #define LZ_STATES 12
61 1.6.2.2 pgoyette
62 1.6.2.2 pgoyette #define LITERAL_CONTEXT_BITS 3
63 1.6.2.2 pgoyette #define POS_STATE_BITS 2
64 1.6.2.2 pgoyette #define POS_STATES (1 << POS_STATE_BITS)
65 1.6.2.2 pgoyette #define POS_STATE_MASK (POS_STATES - 1)
66 1.6.2.2 pgoyette
67 1.6.2.2 pgoyette #define STATES 4
68 1.6.2.2 pgoyette #define DIS_SLOT_BITS 6
69 1.6.2.2 pgoyette
70 1.6.2.2 pgoyette #define DIS_MODEL_START 4
71 1.6.2.2 pgoyette #define DIS_MODEL_END 14
72 1.6.2.2 pgoyette
73 1.6.2.2 pgoyette #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
74 1.6.2.2 pgoyette #define DIS_ALIGN_BITS 4
75 1.6.2.2 pgoyette #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
76 1.6.2.2 pgoyette
77 1.6.2.2 pgoyette #define LOW_BITS 3
78 1.6.2.2 pgoyette #define MID_BITS 3
79 1.6.2.2 pgoyette #define HIGH_BITS 8
80 1.6.2.2 pgoyette
81 1.6.2.2 pgoyette #define LOW_SYMBOLS (1 << LOW_BITS)
82 1.6.2.2 pgoyette #define MID_SYMBOLS (1 << MID_BITS)
83 1.6.2.2 pgoyette #define HIGH_SYMBOLS (1 << HIGH_BITS)
84 1.6.2.2 pgoyette
85 1.6.2.2 pgoyette #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
86 1.6.2.2 pgoyette
87 1.6.2.2 pgoyette #define MIN_MATCH_LEN 2
88 1.6.2.2 pgoyette
89 1.6.2.2 pgoyette #define BIT_MODEL_MOVE_BITS 5
90 1.6.2.2 pgoyette #define BIT_MODEL_TOTAL_BITS 11
91 1.6.2.2 pgoyette #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
92 1.6.2.2 pgoyette #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
93 1.6.2.2 pgoyette
94 1.6.2.2 pgoyette static const int lz_st_next[] = {
95 1.6.2.2 pgoyette 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
96 1.6.2.2 pgoyette };
97 1.6.2.2 pgoyette
98 1.6.2.2 pgoyette static bool
99 1.6.2.2 pgoyette lz_st_is_char(int st) {
100 1.6.2.2 pgoyette return st < 7;
101 1.6.2.2 pgoyette }
102 1.6.2.2 pgoyette
103 1.6.2.2 pgoyette static int
104 1.6.2.2 pgoyette lz_st_get_char(int st) {
105 1.6.2.2 pgoyette return lz_st_next[st];
106 1.6.2.2 pgoyette }
107 1.6.2.2 pgoyette
108 1.6.2.2 pgoyette static int
109 1.6.2.2 pgoyette lz_st_get_match(int st) {
110 1.6.2.2 pgoyette return st < 7 ? 7 : 10;
111 1.6.2.2 pgoyette }
112 1.6.2.2 pgoyette
113 1.6.2.2 pgoyette static int
114 1.6.2.2 pgoyette lz_st_get_rep(int st) {
115 1.6.2.2 pgoyette return st < 7 ? 8 : 11;
116 1.6.2.2 pgoyette }
117 1.6.2.2 pgoyette
118 1.6.2.2 pgoyette static int
119 1.6.2.2 pgoyette lz_st_get_short_rep(int st) {
120 1.6.2.2 pgoyette return st < 7 ? 9 : 11;
121 1.6.2.2 pgoyette }
122 1.6.2.2 pgoyette
123 1.6.2.2 pgoyette struct lz_len_model {
124 1.6.2.2 pgoyette int choice1;
125 1.6.2.2 pgoyette int choice2;
126 1.6.2.2 pgoyette int bm_low[POS_STATES][LOW_SYMBOLS];
127 1.6.2.2 pgoyette int bm_mid[POS_STATES][MID_SYMBOLS];
128 1.6.2.2 pgoyette int bm_high[HIGH_SYMBOLS];
129 1.6.2.2 pgoyette };
130 1.6.2.2 pgoyette
131 1.6.2.2 pgoyette static uint32_t lz_crc[256];
132 1.6.2.2 pgoyette
133 1.6.2.2 pgoyette static void
134 1.6.2.2 pgoyette lz_crc_init(void)
135 1.6.2.2 pgoyette {
136 1.6.2.2 pgoyette for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
137 1.6.2.2 pgoyette unsigned c = i;
138 1.6.2.2 pgoyette for (unsigned j = 0; j < 8; j++) {
139 1.6.2.2 pgoyette if (c & 1)
140 1.6.2.2 pgoyette c = 0xEDB88320U ^ (c >> 1);
141 1.6.2.2 pgoyette else
142 1.6.2.2 pgoyette c >>= 1;
143 1.6.2.2 pgoyette }
144 1.6.2.2 pgoyette lz_crc[i] = c;
145 1.6.2.2 pgoyette }
146 1.6.2.2 pgoyette }
147 1.6.2.2 pgoyette
148 1.6.2.2 pgoyette static void
149 1.6.2.2 pgoyette lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
150 1.6.2.2 pgoyette {
151 1.6.2.2 pgoyette for (size_t i = 0; i < len; i++)
152 1.6.2.2 pgoyette *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
153 1.6.2.2 pgoyette }
154 1.6.2.2 pgoyette
155 1.6.2.2 pgoyette struct lz_range_decoder {
156 1.6.2.2 pgoyette FILE *fp;
157 1.6.2.2 pgoyette uint32_t code;
158 1.6.2.2 pgoyette uint32_t range;
159 1.6.2.2 pgoyette };
160 1.6.2.2 pgoyette
161 1.6.2.2 pgoyette static int
162 1.6.2.2 pgoyette lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
163 1.6.2.2 pgoyette {
164 1.6.2.2 pgoyette rd->fp = fp;
165 1.6.2.2 pgoyette rd->code = 0;
166 1.6.2.2 pgoyette rd->range = ~0;
167 1.6.2.2 pgoyette for (int i = 0; i < 5; i++)
168 1.6.2.2 pgoyette rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
169 1.6.2.2 pgoyette return ferror(rd->fp) ? -1 : 0;
170 1.6.2.2 pgoyette }
171 1.6.2.2 pgoyette
172 1.6.2.2 pgoyette static unsigned
173 1.6.2.2 pgoyette lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
174 1.6.2.2 pgoyette {
175 1.6.2.2 pgoyette unsigned symbol = 0;
176 1.6.2.2 pgoyette
177 1.6.2.2 pgoyette for (int i = num_bits; i > 0; i--) {
178 1.6.2.2 pgoyette rd->range >>= 1;
179 1.6.2.2 pgoyette symbol <<= 1;
180 1.6.2.2 pgoyette if (rd->code >= rd->range) {
181 1.6.2.2 pgoyette rd->code -= rd->range;
182 1.6.2.2 pgoyette symbol |= 1;
183 1.6.2.2 pgoyette }
184 1.6.2.2 pgoyette if (rd->range <= 0x00FFFFFFU) {
185 1.6.2.2 pgoyette rd->range <<= 8;
186 1.6.2.2 pgoyette rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
187 1.6.2.2 pgoyette }
188 1.6.2.2 pgoyette }
189 1.6.2.2 pgoyette
190 1.6.2.2 pgoyette return symbol;
191 1.6.2.2 pgoyette }
192 1.6.2.2 pgoyette
193 1.6.2.2 pgoyette static unsigned
194 1.6.2.2 pgoyette lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
195 1.6.2.2 pgoyette {
196 1.6.2.2 pgoyette unsigned symbol;
197 1.6.2.2 pgoyette const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
198 1.6.2.2 pgoyette
199 1.6.2.2 pgoyette if(rd->code < bound) {
200 1.6.2.2 pgoyette rd->range = bound;
201 1.6.2.2 pgoyette *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
202 1.6.2.2 pgoyette symbol = 0;
203 1.6.2.2 pgoyette }
204 1.6.2.2 pgoyette else {
205 1.6.2.2 pgoyette rd->range -= bound;
206 1.6.2.2 pgoyette rd->code -= bound;
207 1.6.2.2 pgoyette *bm -= *bm >> BIT_MODEL_MOVE_BITS;
208 1.6.2.2 pgoyette symbol = 1;
209 1.6.2.2 pgoyette }
210 1.6.2.2 pgoyette
211 1.6.2.2 pgoyette if (rd->range <= 0x00FFFFFFU) {
212 1.6.2.2 pgoyette rd->range <<= 8;
213 1.6.2.2 pgoyette rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
214 1.6.2.2 pgoyette }
215 1.6.2.2 pgoyette return symbol;
216 1.6.2.2 pgoyette }
217 1.6.2.2 pgoyette
218 1.6.2.2 pgoyette static unsigned
219 1.6.2.2 pgoyette lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
220 1.6.2.2 pgoyette {
221 1.6.2.2 pgoyette unsigned symbol = 1;
222 1.6.2.2 pgoyette
223 1.6.2.2 pgoyette for (int i = 0; i < num_bits; i++)
224 1.6.2.2 pgoyette symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
225 1.6.2.2 pgoyette
226 1.6.2.2 pgoyette return symbol - (1 << num_bits);
227 1.6.2.2 pgoyette }
228 1.6.2.2 pgoyette
229 1.6.2.2 pgoyette static unsigned
230 1.6.2.2 pgoyette lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
231 1.6.2.2 pgoyette {
232 1.6.2.2 pgoyette unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
233 1.6.2.2 pgoyette unsigned reversed_symbol = 0;
234 1.6.2.2 pgoyette
235 1.6.2.2 pgoyette for (int i = 0; i < num_bits; i++) {
236 1.6.2.2 pgoyette reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
237 1.6.2.2 pgoyette symbol >>= 1;
238 1.6.2.2 pgoyette }
239 1.6.2.2 pgoyette
240 1.6.2.2 pgoyette return reversed_symbol;
241 1.6.2.2 pgoyette }
242 1.6.2.2 pgoyette
243 1.6.2.2 pgoyette static unsigned
244 1.6.2.2 pgoyette lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
245 1.6.2.2 pgoyette {
246 1.6.2.2 pgoyette unsigned symbol = 1;
247 1.6.2.2 pgoyette
248 1.6.2.2 pgoyette for (int i = 7; i >= 0; i--) {
249 1.6.2.2 pgoyette const unsigned match_bit = (match_byte >> i) & 1;
250 1.6.2.2 pgoyette const unsigned bit = lz_rd_decode_bit(rd,
251 1.6.2.2 pgoyette &bm[symbol + (match_bit << 8) + 0x100]);
252 1.6.2.2 pgoyette symbol = (symbol << 1) | bit;
253 1.6.2.2 pgoyette if (match_bit != bit) {
254 1.6.2.2 pgoyette while (symbol < 0x100) {
255 1.6.2.2 pgoyette symbol = (symbol << 1) |
256 1.6.2.2 pgoyette lz_rd_decode_bit(rd, &bm[symbol]);
257 1.6.2.2 pgoyette }
258 1.6.2.2 pgoyette break;
259 1.6.2.2 pgoyette }
260 1.6.2.2 pgoyette }
261 1.6.2.2 pgoyette return symbol & 0xFF;
262 1.6.2.2 pgoyette }
263 1.6.2.2 pgoyette
264 1.6.2.2 pgoyette static unsigned
265 1.6.2.2 pgoyette lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
266 1.6.2.2 pgoyette int pos_state)
267 1.6.2.2 pgoyette {
268 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
269 1.6.2.2 pgoyette return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
270 1.6.2.2 pgoyette
271 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
272 1.6.2.2 pgoyette return LOW_SYMBOLS +
273 1.6.2.2 pgoyette lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
274 1.6.2.2 pgoyette }
275 1.6.2.2 pgoyette
276 1.6.2.2 pgoyette return LOW_SYMBOLS + MID_SYMBOLS +
277 1.6.2.2 pgoyette lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
278 1.6.2.2 pgoyette }
279 1.6.2.2 pgoyette
280 1.6.2.2 pgoyette struct lz_decoder {
281 1.6.2.2 pgoyette FILE *fin, *fout;
282 1.6.2.2 pgoyette off_t pos, ppos, spos, dict_size;
283 1.6.2.2 pgoyette bool wrapped;
284 1.6.2.2 pgoyette uint32_t crc;
285 1.6.2.2 pgoyette uint8_t *obuf;
286 1.6.2.2 pgoyette struct lz_range_decoder rdec;
287 1.6.2.2 pgoyette };
288 1.6.2.2 pgoyette
289 1.6.2.2 pgoyette static int
290 1.6.2.2 pgoyette lz_flush(struct lz_decoder *lz)
291 1.6.2.2 pgoyette {
292 1.6.2.2 pgoyette off_t offs = lz->pos - lz->spos;
293 1.6.2.2 pgoyette if (offs <= 0)
294 1.6.2.2 pgoyette return -1;
295 1.6.2.2 pgoyette
296 1.6.2.2 pgoyette size_t size = (size_t)offs;
297 1.6.2.2 pgoyette lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
298 1.6.2.2 pgoyette if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
299 1.6.2.2 pgoyette return -1;
300 1.6.2.2 pgoyette
301 1.6.2.2 pgoyette lz->wrapped = lz->pos >= lz->dict_size;
302 1.6.2.2 pgoyette if (lz->wrapped) {
303 1.6.2.2 pgoyette lz->ppos += lz->pos;
304 1.6.2.2 pgoyette lz->pos = 0;
305 1.6.2.2 pgoyette }
306 1.6.2.2 pgoyette lz->spos = lz->pos;
307 1.6.2.2 pgoyette return 0;
308 1.6.2.2 pgoyette }
309 1.6.2.2 pgoyette
310 1.6.2.2 pgoyette static void
311 1.6.2.2 pgoyette lz_destroy(struct lz_decoder *lz)
312 1.6.2.2 pgoyette {
313 1.6.2.2 pgoyette if (lz->fin)
314 1.6.2.2 pgoyette fclose(lz->fin);
315 1.6.2.2 pgoyette if (lz->fout)
316 1.6.2.2 pgoyette fclose(lz->fout);
317 1.6.2.2 pgoyette free(lz->obuf);
318 1.6.2.2 pgoyette }
319 1.6.2.2 pgoyette
320 1.6.2.2 pgoyette static int
321 1.6.2.2 pgoyette lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
322 1.6.2.2 pgoyette {
323 1.6.2.2 pgoyette memset(lz, 0, sizeof(*lz));
324 1.6.2.2 pgoyette
325 1.6.2.2 pgoyette lz->fin = fdopen(dup(fin), "r");
326 1.6.2.2 pgoyette if (lz->fin == NULL)
327 1.6.2.2 pgoyette goto out;
328 1.6.2.2 pgoyette
329 1.6.2.2 pgoyette lz->fout = fdopen(dup(fdout), "w");
330 1.6.2.2 pgoyette if (lz->fout == NULL)
331 1.6.2.2 pgoyette goto out;
332 1.6.2.2 pgoyette
333 1.6.2.2 pgoyette lz->pos = lz->ppos = lz->spos = 0;
334 1.6.2.2 pgoyette lz->crc = ~0;
335 1.6.2.2 pgoyette lz->dict_size = dict_size;
336 1.6.2.2 pgoyette lz->wrapped = false;
337 1.6.2.2 pgoyette
338 1.6.2.2 pgoyette lz->obuf = malloc(dict_size);
339 1.6.2.2 pgoyette if (lz->obuf == NULL)
340 1.6.2.2 pgoyette goto out;
341 1.6.2.2 pgoyette
342 1.6.2.2 pgoyette if (lz_rd_create(&lz->rdec, lz->fin) == -1)
343 1.6.2.2 pgoyette goto out;
344 1.6.2.2 pgoyette return 0;
345 1.6.2.2 pgoyette out:
346 1.6.2.2 pgoyette lz_destroy(lz);
347 1.6.2.2 pgoyette return -1;
348 1.6.2.2 pgoyette }
349 1.6.2.2 pgoyette
350 1.6.2.2 pgoyette static uint8_t
351 1.6.2.2 pgoyette lz_peek(const struct lz_decoder *lz, unsigned ahead)
352 1.6.2.2 pgoyette {
353 1.6.2.2 pgoyette off_t diff = lz->pos - ahead - 1;
354 1.6.2.2 pgoyette
355 1.6.2.2 pgoyette if (diff >= 0)
356 1.6.2.2 pgoyette return lz->obuf[diff];
357 1.6.2.2 pgoyette
358 1.6.2.2 pgoyette if (lz->wrapped)
359 1.6.2.2 pgoyette return lz->obuf[lz->dict_size + diff];
360 1.6.2.2 pgoyette
361 1.6.2.2 pgoyette return 0;
362 1.6.2.2 pgoyette }
363 1.6.2.2 pgoyette
364 1.6.2.2 pgoyette static void
365 1.6.2.2 pgoyette lz_put(struct lz_decoder *lz, uint8_t b)
366 1.6.2.2 pgoyette {
367 1.6.2.2 pgoyette lz->obuf[lz->pos++] = b;
368 1.6.2.2 pgoyette if (lz->dict_size == lz->pos)
369 1.6.2.2 pgoyette lz_flush(lz);
370 1.6.2.2 pgoyette }
371 1.6.2.2 pgoyette
372 1.6.2.2 pgoyette static off_t
373 1.6.2.2 pgoyette lz_get_data_position(const struct lz_decoder *lz)
374 1.6.2.2 pgoyette {
375 1.6.2.2 pgoyette return lz->ppos + lz->pos;
376 1.6.2.2 pgoyette }
377 1.6.2.2 pgoyette
378 1.6.2.2 pgoyette static unsigned
379 1.6.2.2 pgoyette lz_get_crc(const struct lz_decoder *lz)
380 1.6.2.2 pgoyette {
381 1.6.2.2 pgoyette return lz->crc ^ 0xffffffffU;
382 1.6.2.2 pgoyette }
383 1.6.2.2 pgoyette
384 1.6.2.2 pgoyette static void
385 1.6.2.2 pgoyette lz_bm_init(int *a, size_t l)
386 1.6.2.2 pgoyette {
387 1.6.2.2 pgoyette for (size_t i = 0; i < l; i++)
388 1.6.2.2 pgoyette a[i] = BIT_MODEL_INIT;
389 1.6.2.2 pgoyette }
390 1.6.2.2 pgoyette
391 1.6.2.2 pgoyette #define LZ_BM_INIT(a) lz_bm_init(a, __arraycount(a))
392 1.6.2.2 pgoyette #define LZ_BM_INIT2(a) do { \
393 1.6.2.2 pgoyette size_t l = __arraycount(a[0]); \
394 1.6.2.2 pgoyette for (size_t i = 0; i < __arraycount(a); i++) \
395 1.6.2.2 pgoyette lz_bm_init(a[i], l); \
396 1.6.2.2 pgoyette } while (/*CONSTCOND*/0)
397 1.6.2.2 pgoyette
398 1.6.2.2 pgoyette #define LZ_MODEL_INIT(a) do { \
399 1.6.2.2 pgoyette a.choice1 = BIT_MODEL_INIT; \
400 1.6.2.2 pgoyette a.choice2 = BIT_MODEL_INIT; \
401 1.6.2.2 pgoyette LZ_BM_INIT2(a.bm_low); \
402 1.6.2.2 pgoyette LZ_BM_INIT2(a.bm_mid); \
403 1.6.2.2 pgoyette LZ_BM_INIT(a.bm_high); \
404 1.6.2.2 pgoyette } while (/*CONSTCOND*/0)
405 1.6.2.2 pgoyette
406 1.6.2.2 pgoyette static bool
407 1.6.2.2 pgoyette lz_decode_member(struct lz_decoder *lz)
408 1.6.2.2 pgoyette {
409 1.6.2.2 pgoyette int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
410 1.6.2.2 pgoyette int bm_match[LZ_STATES][POS_STATES];
411 1.6.2.2 pgoyette int bm_rep[4][LZ_STATES];
412 1.6.2.2 pgoyette int bm_len[LZ_STATES][POS_STATES];
413 1.6.2.2 pgoyette int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
414 1.6.2.2 pgoyette int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
415 1.6.2.2 pgoyette int bm_align[DIS_ALIGN_SIZE];
416 1.6.2.2 pgoyette
417 1.6.2.2 pgoyette LZ_BM_INIT2(bm_literal);
418 1.6.2.2 pgoyette LZ_BM_INIT2(bm_match);
419 1.6.2.2 pgoyette LZ_BM_INIT2(bm_rep);
420 1.6.2.2 pgoyette LZ_BM_INIT2(bm_len);
421 1.6.2.2 pgoyette LZ_BM_INIT2(bm_dis_slot);
422 1.6.2.2 pgoyette LZ_BM_INIT(bm_dis);
423 1.6.2.2 pgoyette LZ_BM_INIT(bm_align);
424 1.6.2.2 pgoyette
425 1.6.2.2 pgoyette struct lz_len_model match_len_model;
426 1.6.2.2 pgoyette struct lz_len_model rep_len_model;
427 1.6.2.2 pgoyette
428 1.6.2.2 pgoyette LZ_MODEL_INIT(match_len_model);
429 1.6.2.2 pgoyette LZ_MODEL_INIT(rep_len_model);
430 1.6.2.2 pgoyette
431 1.6.2.2 pgoyette struct lz_range_decoder *rd = &lz->rdec;
432 1.6.2.2 pgoyette unsigned rep[4] = { 0 };
433 1.6.2.2 pgoyette
434 1.6.2.2 pgoyette
435 1.6.2.2 pgoyette int state = 0;
436 1.6.2.2 pgoyette
437 1.6.2.2 pgoyette while (!feof(lz->fin) && !ferror(lz->fin)) {
438 1.6.2.2 pgoyette const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
439 1.6.2.2 pgoyette // bit 1
440 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
441 1.6.2.2 pgoyette const uint8_t prev_byte = lz_peek(lz, 0);
442 1.6.2.2 pgoyette const int literal_state =
443 1.6.2.2 pgoyette prev_byte >> (8 - LITERAL_CONTEXT_BITS);
444 1.6.2.2 pgoyette int *bm = bm_literal[literal_state];
445 1.6.2.2 pgoyette if (lz_st_is_char(state))
446 1.6.2.2 pgoyette lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
447 1.6.2.2 pgoyette else {
448 1.6.2.2 pgoyette int peek = lz_peek(lz, rep[0]);
449 1.6.2.2 pgoyette lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
450 1.6.2.2 pgoyette }
451 1.6.2.2 pgoyette state = lz_st_get_char(state);
452 1.6.2.2 pgoyette continue;
453 1.6.2.2 pgoyette }
454 1.6.2.2 pgoyette int len;
455 1.6.2.2 pgoyette // bit 2
456 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
457 1.6.2.2 pgoyette // bit 3
458 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
459 1.6.2.2 pgoyette // bit 4
460 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd,
461 1.6.2.2 pgoyette &bm_len[state][pos_state]) == 0)
462 1.6.2.2 pgoyette {
463 1.6.2.2 pgoyette state = lz_st_get_short_rep(state);
464 1.6.2.2 pgoyette lz_put(lz, lz_peek(lz, rep[0]));
465 1.6.2.2 pgoyette continue;
466 1.6.2.2 pgoyette }
467 1.6.2.2 pgoyette } else {
468 1.6.2.2 pgoyette unsigned distance;
469 1.6.2.2 pgoyette // bit 4
470 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd, &bm_rep[2][state])
471 1.6.2.2 pgoyette == 0)
472 1.6.2.2 pgoyette distance = rep[1];
473 1.6.2.2 pgoyette else {
474 1.6.2.2 pgoyette // bit 5
475 1.6.2.2 pgoyette if (lz_rd_decode_bit(rd,
476 1.6.2.2 pgoyette &bm_rep[3][state]) == 0)
477 1.6.2.2 pgoyette distance = rep[2];
478 1.6.2.2 pgoyette else {
479 1.6.2.2 pgoyette distance = rep[3];
480 1.6.2.2 pgoyette rep[3] = rep[2];
481 1.6.2.2 pgoyette }
482 1.6.2.2 pgoyette rep[2] = rep[1];
483 1.6.2.2 pgoyette }
484 1.6.2.2 pgoyette rep[1] = rep[0];
485 1.6.2.2 pgoyette rep[0] = distance;
486 1.6.2.2 pgoyette }
487 1.6.2.2 pgoyette state = lz_st_get_rep(state);
488 1.6.2.2 pgoyette len = MIN_MATCH_LEN +
489 1.6.2.2 pgoyette lz_rd_decode_len(rd, &rep_len_model, pos_state);
490 1.6.2.2 pgoyette } else {
491 1.6.2.2 pgoyette rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
492 1.6.2.2 pgoyette len = MIN_MATCH_LEN +
493 1.6.2.2 pgoyette lz_rd_decode_len(rd, &match_len_model, pos_state);
494 1.6.2.2 pgoyette const int len_state =
495 1.6.2.2 pgoyette MIN(len - MIN_MATCH_LEN, STATES - 1);
496 1.6.2.2 pgoyette rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
497 1.6.2.2 pgoyette DIS_SLOT_BITS);
498 1.6.2.2 pgoyette if (rep[0] >= DIS_MODEL_START) {
499 1.6.2.2 pgoyette const unsigned dis_slot = rep[0];
500 1.6.2.2 pgoyette const int direct_bits = (dis_slot >> 1) - 1;
501 1.6.2.2 pgoyette rep[0] = (2 | (dis_slot & 1)) << direct_bits;
502 1.6.2.2 pgoyette if (dis_slot < DIS_MODEL_END)
503 1.6.2.2 pgoyette rep[0] += lz_rd_decode_tree_reversed(rd,
504 1.6.2.2 pgoyette &bm_dis[rep[0] - dis_slot],
505 1.6.2.2 pgoyette direct_bits);
506 1.6.2.2 pgoyette else {
507 1.6.2.2 pgoyette rep[0] += lz_rd_decode(rd, direct_bits
508 1.6.2.2 pgoyette - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
509 1.6.2.2 pgoyette rep[0] += lz_rd_decode_tree_reversed(rd,
510 1.6.2.2 pgoyette bm_align, DIS_ALIGN_BITS);
511 1.6.2.2 pgoyette if (rep[0] == 0xFFFFFFFFU) {
512 1.6.2.2 pgoyette lz_flush(lz);
513 1.6.2.2 pgoyette return len == MIN_MATCH_LEN;
514 1.6.2.2 pgoyette }
515 1.6.2.2 pgoyette }
516 1.6.2.2 pgoyette }
517 1.6.2.2 pgoyette state = lz_st_get_match(state);
518 1.6.2.2 pgoyette if (rep[0] >= lz->dict_size ||
519 1.6.2.2 pgoyette (rep[0] >= lz->pos && !lz->wrapped)) {
520 1.6.2.2 pgoyette lz_flush(lz);
521 1.6.2.2 pgoyette return false;
522 1.6.2.2 pgoyette }
523 1.6.2.2 pgoyette }
524 1.6.2.2 pgoyette for (int i = 0; i < len; i++)
525 1.6.2.2 pgoyette lz_put(lz, lz_peek(lz, rep[0]));
526 1.6.2.2 pgoyette }
527 1.6.2.2 pgoyette lz_flush(lz);
528 1.6.2.2 pgoyette return false;
529 1.6.2.2 pgoyette }
530 1.6.2.2 pgoyette
531 1.6.2.2 pgoyette /*
532 1.6.2.2 pgoyette * 0-3 CRC32 of the uncompressed data
533 1.6.2.2 pgoyette * 4-11 size of the uncompressed data
534 1.6.2.2 pgoyette * 12-19 member size including header and trailer
535 1.6.2.2 pgoyette */
536 1.6.2.2 pgoyette #define TRAILER_SIZE 20
537 1.6.2.2 pgoyette
538 1.6.2.2 pgoyette
539 1.6.2.2 pgoyette static off_t
540 1.6.2.2 pgoyette lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
541 1.6.2.2 pgoyette {
542 1.6.2.2 pgoyette struct lz_decoder lz;
543 1.6.2.2 pgoyette off_t rv = -1;
544 1.6.2.2 pgoyette
545 1.6.2.2 pgoyette if (lz_create(&lz, fin, fdout, dict_size) == -1)
546 1.6.2.2 pgoyette return -1;
547 1.6.2.2 pgoyette
548 1.6.2.2 pgoyette if (!lz_decode_member(&lz))
549 1.6.2.2 pgoyette goto out;
550 1.6.2.2 pgoyette
551 1.6.2.2 pgoyette uint8_t trailer[TRAILER_SIZE];
552 1.6.2.2 pgoyette
553 1.6.2.2 pgoyette for(size_t i = 0; i < __arraycount(trailer); i++)
554 1.6.2.2 pgoyette trailer[i] = (uint8_t)getc(lz.fin);
555 1.6.2.2 pgoyette
556 1.6.2.2 pgoyette unsigned crc = 0;
557 1.6.2.2 pgoyette for (int i = 3; i >= 0; --i) {
558 1.6.2.2 pgoyette crc <<= 8;
559 1.6.2.2 pgoyette crc += trailer[i];
560 1.6.2.2 pgoyette }
561 1.6.2.2 pgoyette
562 1.6.2.2 pgoyette int64_t data_size = 0;
563 1.6.2.2 pgoyette for (int i = 11; i >= 4; --i) {
564 1.6.2.2 pgoyette data_size <<= 8;
565 1.6.2.2 pgoyette data_size += trailer[i];
566 1.6.2.2 pgoyette }
567 1.6.2.2 pgoyette
568 1.6.2.2 pgoyette if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
569 1.6.2.2 pgoyette goto out;
570 1.6.2.2 pgoyette
571 1.6.2.2 pgoyette rv = 0;
572 1.6.2.2 pgoyette for (int i = 19; i >= 12; --i) {
573 1.6.2.2 pgoyette rv <<= 8;
574 1.6.2.2 pgoyette rv += trailer[i];
575 1.6.2.2 pgoyette }
576 1.6.2.2 pgoyette if (insize)
577 1.6.2.2 pgoyette *insize = rv;
578 1.6.2.2 pgoyette #if 0
579 1.6.2.2 pgoyette /* Does not work with pipes */
580 1.6.2.2 pgoyette rv = ftello(lz.fout);
581 1.6.2.2 pgoyette #else
582 1.6.2.2 pgoyette rv = data_size;
583 1.6.2.2 pgoyette #endif
584 1.6.2.2 pgoyette out:
585 1.6.2.2 pgoyette lz_destroy(&lz);
586 1.6.2.2 pgoyette return rv;
587 1.6.2.2 pgoyette }
588 1.6.2.2 pgoyette
589 1.6.2.2 pgoyette
590 1.6.2.2 pgoyette /*
591 1.6.2.2 pgoyette * 0-3 magic
592 1.6.2.2 pgoyette * 4 version
593 1.6.2.2 pgoyette * 5 coded dict_size
594 1.6.2.2 pgoyette */
595 1.6.2.2 pgoyette #define HDR_SIZE 6
596 1.6.2.2 pgoyette #define MIN_DICTIONARY_SIZE (1 << 12)
597 1.6.2.2 pgoyette #define MAX_DICTIONARY_SIZE (1 << 29)
598 1.6.2.2 pgoyette
599 1.6.2.2 pgoyette static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
600 1.6.2.2 pgoyette
601 1.6.2.2 pgoyette static unsigned
602 1.6.2.2 pgoyette lz_get_dict_size(unsigned char c)
603 1.6.2.2 pgoyette {
604 1.6.2.2 pgoyette unsigned dict_size = 1 << (c & 0x1f);
605 1.6.2.2 pgoyette dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
606 1.6.2.2 pgoyette if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
607 1.6.2.2 pgoyette return 0;
608 1.6.2.2 pgoyette return dict_size;
609 1.6.2.2 pgoyette }
610 1.6.2.2 pgoyette
611 1.6.2.2 pgoyette static off_t
612 1.6.2.2 pgoyette unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
613 1.6.2.2 pgoyette {
614 1.6.2.2 pgoyette if (lz_crc[0] == 0)
615 1.6.2.2 pgoyette lz_crc_init();
616 1.6.2.2 pgoyette
617 1.6.2.2 pgoyette char header[HDR_SIZE];
618 1.6.2.2 pgoyette
619 1.6.2.2 pgoyette if (prelen > sizeof(header))
620 1.6.2.2 pgoyette return -1;
621 1.6.2.2 pgoyette if (pre && prelen)
622 1.6.2.2 pgoyette memcpy(header, pre, prelen);
623 1.6.2.2 pgoyette
624 1.6.2.2 pgoyette ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
625 1.6.2.2 pgoyette switch (nr) {
626 1.6.2.2 pgoyette case -1:
627 1.6.2.2 pgoyette return -1;
628 1.6.2.2 pgoyette case 0:
629 1.6.2.2 pgoyette return prelen ? -1 : 0;
630 1.6.2.2 pgoyette default:
631 1.6.2.2 pgoyette if ((size_t)nr != sizeof(header) - prelen)
632 1.6.2.2 pgoyette return -1;
633 1.6.2.2 pgoyette break;
634 1.6.2.2 pgoyette }
635 1.6.2.2 pgoyette
636 1.6.2.2 pgoyette if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
637 1.6.2.2 pgoyette return -1;
638 1.6.2.2 pgoyette
639 1.6.2.2 pgoyette unsigned dict_size = lz_get_dict_size(header[5]);
640 1.6.2.2 pgoyette if (dict_size == 0)
641 1.6.2.2 pgoyette return -1;
642 1.6.2.2 pgoyette
643 1.6.2.2 pgoyette return lz_decode(fin, fout, dict_size, bytes_in);
644 1.6.2.2 pgoyette }
645