zuncompress.c revision 1.11 1 1.11 joerg /* $NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */
2 1.1 mrg
3 1.1 mrg /*-
4 1.1 mrg * Copyright (c) 1985, 1986, 1992, 1993
5 1.1 mrg * The Regents of the University of California. All rights reserved.
6 1.1 mrg *
7 1.1 mrg * This code is derived from software contributed to Berkeley by
8 1.1 mrg * Diomidis Spinellis and James A. Woods, derived from original
9 1.1 mrg * work by Spencer Thomas and Joseph Orost.
10 1.1 mrg *
11 1.1 mrg * Redistribution and use in source and binary forms, with or without
12 1.1 mrg * modification, are permitted provided that the following conditions
13 1.1 mrg * are met:
14 1.1 mrg * 1. Redistributions of source code must retain the above copyright
15 1.1 mrg * notice, this list of conditions and the following disclaimer.
16 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 mrg * notice, this list of conditions and the following disclaimer in the
18 1.1 mrg * documentation and/or other materials provided with the distribution.
19 1.1 mrg * 3. Neither the name of the University nor the names of its contributors
20 1.1 mrg * may be used to endorse or promote products derived from this software
21 1.1 mrg * without specific prior written permission.
22 1.1 mrg *
23 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.1 mrg * SUCH DAMAGE.
34 1.1 mrg *
35 1.1 mrg * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
36 1.1 mrg */
37 1.1 mrg
38 1.1 mrg /* This file is #included by gzip.c */
39 1.1 mrg
40 1.2 he static int zread(void *, char *, int);
41 1.1 mrg
42 1.1 mrg #define tab_prefixof(i) (zs->zs_codetab[i])
43 1.1 mrg #define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i]
44 1.1 mrg #define de_stack ((char_type *)&tab_suffixof(1 << BITS))
45 1.1 mrg
46 1.1 mrg #define BITS 16 /* Default bits. */
47 1.1 mrg #define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */
48 1.1 mrg #define BIT_MASK 0x1f /* Defines for third byte of header. */
49 1.1 mrg #define BLOCK_MASK 0x80
50 1.1 mrg #define CHECK_GAP 10000 /* Ratio check interval. */
51 1.1 mrg #define BUFSIZE (64 * 1024)
52 1.1 mrg
53 1.1 mrg /*
54 1.1 mrg * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
55 1.1 mrg * a fourth header byte (for expansion).
56 1.1 mrg */
57 1.1 mrg #define INIT_BITS 9 /* Initial number of bits/code. */
58 1.1 mrg
59 1.1 mrg /*
60 1.1 mrg * the next two codes should not be changed lightly, as they must not
61 1.1 mrg * lie within the contiguous general code space.
62 1.1 mrg */
63 1.1 mrg #define FIRST 257 /* First free entry. */
64 1.1 mrg #define CLEAR 256 /* Table clear output code. */
65 1.1 mrg
66 1.1 mrg
67 1.1 mrg #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
68 1.1 mrg
69 1.1 mrg typedef long code_int;
70 1.1 mrg typedef long count_int;
71 1.1 mrg typedef u_char char_type;
72 1.1 mrg
73 1.1 mrg static char_type magic_header[] =
74 1.1 mrg {'\037', '\235'}; /* 1F 9D */
75 1.1 mrg
76 1.1 mrg static char_type rmask[9] =
77 1.1 mrg {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
78 1.1 mrg
79 1.3 mrg /* XXX zuncompress global */
80 1.3 mrg off_t total_compressed_bytes;
81 1.3 mrg size_t compressed_prelen;
82 1.3 mrg char *compressed_pre;
83 1.1 mrg
84 1.1 mrg struct s_zstate {
85 1.1 mrg FILE *zs_fp; /* File stream for I/O */
86 1.1 mrg char zs_mode; /* r or w */
87 1.1 mrg enum {
88 1.1 mrg S_START, S_MIDDLE, S_EOF
89 1.1 mrg } zs_state; /* State of computation */
90 1.1 mrg int zs_n_bits; /* Number of bits/code. */
91 1.1 mrg int zs_maxbits; /* User settable max # bits/code. */
92 1.1 mrg code_int zs_maxcode; /* Maximum code, given n_bits. */
93 1.1 mrg code_int zs_maxmaxcode; /* Should NEVER generate this code. */
94 1.1 mrg count_int zs_htab [HSIZE];
95 1.1 mrg u_short zs_codetab [HSIZE];
96 1.1 mrg code_int zs_hsize; /* For dynamic table sizing. */
97 1.1 mrg code_int zs_free_ent; /* First unused entry. */
98 1.1 mrg /*
99 1.1 mrg * Block compression parameters -- after all codes are used up,
100 1.1 mrg * and compression rate changes, start over.
101 1.1 mrg */
102 1.1 mrg int zs_block_compress;
103 1.1 mrg int zs_clear_flg;
104 1.1 mrg long zs_ratio;
105 1.1 mrg count_int zs_checkpoint;
106 1.1 mrg int zs_offset;
107 1.1 mrg long zs_in_count; /* Length of input. */
108 1.1 mrg long zs_bytes_out; /* Length of compressed output. */
109 1.1 mrg long zs_out_count; /* # of codes output (for debugging). */
110 1.1 mrg char_type zs_buf[BITS];
111 1.1 mrg union {
112 1.1 mrg struct {
113 1.1 mrg long zs_fcode;
114 1.1 mrg code_int zs_ent;
115 1.1 mrg code_int zs_hsize_reg;
116 1.1 mrg int zs_hshift;
117 1.8 mrg } w; /* Write parameters */
118 1.1 mrg struct {
119 1.1 mrg char_type *zs_stackp;
120 1.1 mrg int zs_finchar;
121 1.1 mrg code_int zs_code, zs_oldcode, zs_incode;
122 1.1 mrg int zs_roffset, zs_size;
123 1.1 mrg char_type zs_gbuf[BITS];
124 1.1 mrg } r; /* Read parameters */
125 1.1 mrg } u;
126 1.1 mrg };
127 1.1 mrg
128 1.1 mrg static code_int getcode(struct s_zstate *zs);
129 1.1 mrg
130 1.1 mrg static off_t
131 1.3 mrg zuncompress(FILE *in, FILE *out, char *pre, size_t prelen,
132 1.3 mrg off_t *compressed_bytes)
133 1.1 mrg {
134 1.1 mrg off_t bin, bout = 0;
135 1.5 dsl char *buf;
136 1.5 dsl
137 1.5 dsl buf = malloc(BUFSIZE);
138 1.5 dsl if (buf == NULL)
139 1.5 dsl return -1;
140 1.1 mrg
141 1.3 mrg /* XXX */
142 1.3 mrg compressed_prelen = prelen;
143 1.3 mrg if (prelen != 0)
144 1.3 mrg compressed_pre = pre;
145 1.3 mrg else
146 1.3 mrg compressed_pre = NULL;
147 1.3 mrg
148 1.1 mrg while ((bin = fread(buf, 1, sizeof(buf), in)) != 0) {
149 1.7 lukem if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) {
150 1.5 dsl free(buf);
151 1.4 mrg return -1;
152 1.5 dsl }
153 1.1 mrg bout += bin;
154 1.1 mrg }
155 1.1 mrg
156 1.3 mrg if (compressed_bytes)
157 1.3 mrg *compressed_bytes = total_compressed_bytes;
158 1.3 mrg
159 1.5 dsl free(buf);
160 1.1 mrg return bout;
161 1.1 mrg }
162 1.1 mrg
163 1.5 dsl static int
164 1.5 dsl zclose(void *zs)
165 1.5 dsl {
166 1.5 dsl free(zs);
167 1.5 dsl /* We leave the caller to close the fd passed to zdopen() */
168 1.5 dsl return 0;
169 1.5 dsl }
170 1.5 dsl
171 1.1 mrg FILE *
172 1.5 dsl zdopen(int fd)
173 1.1 mrg {
174 1.1 mrg struct s_zstate *zs;
175 1.1 mrg
176 1.1 mrg if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
177 1.1 mrg return (NULL);
178 1.1 mrg
179 1.1 mrg zs->zs_state = S_START;
180 1.1 mrg
181 1.1 mrg /* XXX we can get rid of some of these */
182 1.1 mrg zs->zs_hsize = HSIZE; /* For dynamic table sizing. */
183 1.1 mrg zs->zs_free_ent = 0; /* First unused entry. */
184 1.1 mrg zs->zs_block_compress = BLOCK_MASK;
185 1.1 mrg zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */
186 1.1 mrg zs->zs_ratio = 0;
187 1.1 mrg zs->zs_checkpoint = CHECK_GAP;
188 1.1 mrg zs->zs_in_count = 1; /* Length of input. */
189 1.1 mrg zs->zs_out_count = 0; /* # of codes output (for debugging). */
190 1.1 mrg zs->u.r.zs_roffset = 0;
191 1.1 mrg zs->u.r.zs_size = 0;
192 1.1 mrg
193 1.1 mrg /*
194 1.1 mrg * Layering compress on top of stdio in order to provide buffering,
195 1.1 mrg * and ensure that reads and write work with the data specified.
196 1.1 mrg */
197 1.5 dsl if ((zs->zs_fp = fdopen(fd, "r")) == NULL) {
198 1.1 mrg free(zs);
199 1.1 mrg return NULL;
200 1.1 mrg }
201 1.1 mrg
202 1.5 dsl return funopen(zs, zread, NULL, NULL, zclose);
203 1.1 mrg }
204 1.1 mrg
205 1.1 mrg /*
206 1.1 mrg * Decompress read. This routine adapts to the codes in the file building
207 1.1 mrg * the "string" table on-the-fly; requiring no table to be stored in the
208 1.1 mrg * compressed file. The tables used herein are shared with those of the
209 1.1 mrg * compress() routine. See the definitions above.
210 1.1 mrg */
211 1.2 he static int
212 1.1 mrg zread(void *cookie, char *rbp, int num)
213 1.1 mrg {
214 1.3 mrg u_int count, i;
215 1.1 mrg struct s_zstate *zs;
216 1.1 mrg u_char *bp, header[3];
217 1.1 mrg
218 1.1 mrg if (num == 0)
219 1.1 mrg return (0);
220 1.1 mrg
221 1.1 mrg zs = cookie;
222 1.1 mrg count = num;
223 1.1 mrg bp = (u_char *)rbp;
224 1.1 mrg switch (zs->zs_state) {
225 1.1 mrg case S_START:
226 1.1 mrg zs->zs_state = S_MIDDLE;
227 1.1 mrg break;
228 1.1 mrg case S_MIDDLE:
229 1.1 mrg goto middle;
230 1.1 mrg case S_EOF:
231 1.1 mrg goto eof;
232 1.1 mrg }
233 1.1 mrg
234 1.1 mrg /* Check the magic number */
235 1.3 mrg for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--)
236 1.3 mrg header[i] = *compressed_pre++;
237 1.3 mrg
238 1.3 mrg if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) !=
239 1.3 mrg sizeof(header) - i ||
240 1.1 mrg memcmp(header, magic_header, sizeof(magic_header)) != 0) {
241 1.1 mrg errno = EFTYPE;
242 1.1 mrg return (-1);
243 1.1 mrg }
244 1.3 mrg total_compressed_bytes = 0;
245 1.1 mrg zs->zs_maxbits = header[2]; /* Set -b from file. */
246 1.1 mrg zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
247 1.1 mrg zs->zs_maxbits &= BIT_MASK;
248 1.1 mrg zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
249 1.11 joerg if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) {
250 1.1 mrg errno = EFTYPE;
251 1.1 mrg return (-1);
252 1.1 mrg }
253 1.1 mrg /* As above, initialize the first 256 entries in the table. */
254 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
255 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
256 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
257 1.1 mrg tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
258 1.1 mrg }
259 1.1 mrg zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
260 1.1 mrg
261 1.11 joerg zs->u.r.zs_oldcode = -1;
262 1.1 mrg zs->u.r.zs_stackp = de_stack;
263 1.1 mrg
264 1.1 mrg while ((zs->u.r.zs_code = getcode(zs)) > -1) {
265 1.1 mrg
266 1.1 mrg if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
267 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
268 1.1 mrg zs->u.r.zs_code--)
269 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
270 1.1 mrg zs->zs_clear_flg = 1;
271 1.11 joerg zs->zs_free_ent = FIRST;
272 1.11 joerg zs->u.r.zs_oldcode = -1;
273 1.11 joerg continue;
274 1.1 mrg }
275 1.1 mrg zs->u.r.zs_incode = zs->u.r.zs_code;
276 1.1 mrg
277 1.1 mrg /* Special case for KwKwK string. */
278 1.1 mrg if (zs->u.r.zs_code >= zs->zs_free_ent) {
279 1.11 joerg if (zs->u.r.zs_code > zs->zs_free_ent ||
280 1.11 joerg zs->u.r.zs_oldcode == -1) {
281 1.11 joerg /* Bad stream. */
282 1.11 joerg errno = EINVAL;
283 1.11 joerg return (-1);
284 1.11 joerg }
285 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
286 1.1 mrg zs->u.r.zs_code = zs->u.r.zs_oldcode;
287 1.1 mrg }
288 1.11 joerg /*
289 1.11 joerg * The above condition ensures that code < free_ent.
290 1.11 joerg * The construction of tab_prefixof in turn guarantees that
291 1.11 joerg * each iteration decreases code and therefore stack usage is
292 1.11 joerg * bound by 1 << BITS - 256.
293 1.11 joerg */
294 1.1 mrg
295 1.1 mrg /* Generate output characters in reverse order. */
296 1.1 mrg while (zs->u.r.zs_code >= 256) {
297 1.1 mrg *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
298 1.1 mrg zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
299 1.1 mrg }
300 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
301 1.1 mrg
302 1.1 mrg /* And put them out in forward order. */
303 1.1 mrg middle: do {
304 1.1 mrg if (count-- == 0)
305 1.1 mrg return (num);
306 1.1 mrg *bp++ = *--zs->u.r.zs_stackp;
307 1.1 mrg } while (zs->u.r.zs_stackp > de_stack);
308 1.1 mrg
309 1.1 mrg /* Generate the new entry. */
310 1.11 joerg if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode &&
311 1.11 joerg zs->u.r.zs_oldcode != -1) {
312 1.1 mrg tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
313 1.1 mrg tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
314 1.1 mrg zs->zs_free_ent = zs->u.r.zs_code + 1;
315 1.1 mrg }
316 1.1 mrg
317 1.1 mrg /* Remember previous code. */
318 1.1 mrg zs->u.r.zs_oldcode = zs->u.r.zs_incode;
319 1.1 mrg }
320 1.1 mrg zs->zs_state = S_EOF;
321 1.1 mrg eof: return (num - count);
322 1.1 mrg }
323 1.1 mrg
324 1.1 mrg /*-
325 1.1 mrg * Read one code from the standard input. If EOF, return -1.
326 1.1 mrg * Inputs:
327 1.1 mrg * stdin
328 1.1 mrg * Outputs:
329 1.1 mrg * code or -1 is returned.
330 1.1 mrg */
331 1.1 mrg static code_int
332 1.1 mrg getcode(struct s_zstate *zs)
333 1.1 mrg {
334 1.1 mrg code_int gcode;
335 1.3 mrg int r_off, bits, i;
336 1.1 mrg char_type *bp;
337 1.1 mrg
338 1.1 mrg bp = zs->u.r.zs_gbuf;
339 1.1 mrg if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
340 1.1 mrg zs->zs_free_ent > zs->zs_maxcode) {
341 1.1 mrg /*
342 1.1 mrg * If the next entry will be too big for the current gcode
343 1.1 mrg * size, then we must increase the size. This implies reading
344 1.1 mrg * a new buffer full, too.
345 1.1 mrg */
346 1.1 mrg if (zs->zs_free_ent > zs->zs_maxcode) {
347 1.1 mrg zs->zs_n_bits++;
348 1.1 mrg if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */
349 1.1 mrg zs->zs_maxcode = zs->zs_maxmaxcode;
350 1.1 mrg else
351 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
352 1.1 mrg }
353 1.1 mrg if (zs->zs_clear_flg > 0) {
354 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
355 1.1 mrg zs->zs_clear_flg = 0;
356 1.1 mrg }
357 1.3 mrg /* XXX */
358 1.3 mrg for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--)
359 1.3 mrg zs->u.r.zs_gbuf[i] = *compressed_pre++;
360 1.3 mrg zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp);
361 1.3 mrg zs->u.r.zs_size += i;
362 1.1 mrg if (zs->u.r.zs_size <= 0) /* End of file. */
363 1.1 mrg return (-1);
364 1.1 mrg zs->u.r.zs_roffset = 0;
365 1.3 mrg
366 1.3 mrg total_compressed_bytes += zs->u.r.zs_size;
367 1.3 mrg
368 1.1 mrg /* Round size down to integral number of codes. */
369 1.1 mrg zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
370 1.1 mrg }
371 1.1 mrg r_off = zs->u.r.zs_roffset;
372 1.1 mrg bits = zs->zs_n_bits;
373 1.1 mrg
374 1.1 mrg /* Get to the first byte. */
375 1.1 mrg bp += (r_off >> 3);
376 1.1 mrg r_off &= 7;
377 1.1 mrg
378 1.1 mrg /* Get first part (low order bits). */
379 1.1 mrg gcode = (*bp++ >> r_off);
380 1.1 mrg bits -= (8 - r_off);
381 1.1 mrg r_off = 8 - r_off; /* Now, roffset into gcode word. */
382 1.1 mrg
383 1.1 mrg /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
384 1.1 mrg if (bits >= 8) {
385 1.1 mrg gcode |= *bp++ << r_off;
386 1.1 mrg r_off += 8;
387 1.1 mrg bits -= 8;
388 1.1 mrg }
389 1.1 mrg
390 1.1 mrg /* High order bits. */
391 1.1 mrg gcode |= (*bp & rmask[bits]) << r_off;
392 1.1 mrg zs->u.r.zs_roffset += zs->zs_n_bits;
393 1.1 mrg
394 1.1 mrg return (gcode);
395 1.1 mrg }
396