zuncompress.c revision 1.3 1 1.3 mrg /* $NetBSD: zuncompress.c,v 1.3 2004/04/25 16:20:33 mrg Exp $ */
2 1.1 mrg
3 1.1 mrg /*-
4 1.1 mrg * Copyright (c) 1985, 1986, 1992, 1993
5 1.1 mrg * The Regents of the University of California. All rights reserved.
6 1.1 mrg *
7 1.1 mrg * This code is derived from software contributed to Berkeley by
8 1.1 mrg * Diomidis Spinellis and James A. Woods, derived from original
9 1.1 mrg * work by Spencer Thomas and Joseph Orost.
10 1.1 mrg *
11 1.1 mrg * Redistribution and use in source and binary forms, with or without
12 1.1 mrg * modification, are permitted provided that the following conditions
13 1.1 mrg * are met:
14 1.1 mrg * 1. Redistributions of source code must retain the above copyright
15 1.1 mrg * notice, this list of conditions and the following disclaimer.
16 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 mrg * notice, this list of conditions and the following disclaimer in the
18 1.1 mrg * documentation and/or other materials provided with the distribution.
19 1.1 mrg * 3. Neither the name of the University nor the names of its contributors
20 1.1 mrg * may be used to endorse or promote products derived from this software
21 1.1 mrg * without specific prior written permission.
22 1.1 mrg *
23 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.1 mrg * SUCH DAMAGE.
34 1.1 mrg *
35 1.1 mrg * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
36 1.1 mrg */
37 1.1 mrg
38 1.1 mrg /* This file is #included by gzip.c */
39 1.1 mrg
40 1.2 he static int zread(void *, char *, int);
41 1.1 mrg
42 1.1 mrg #define tab_prefixof(i) (zs->zs_codetab[i])
43 1.1 mrg #define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i]
44 1.1 mrg #define de_stack ((char_type *)&tab_suffixof(1 << BITS))
45 1.1 mrg
46 1.1 mrg #define BITS 16 /* Default bits. */
47 1.1 mrg #define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */
48 1.1 mrg #define BIT_MASK 0x1f /* Defines for third byte of header. */
49 1.1 mrg #define BLOCK_MASK 0x80
50 1.1 mrg #define CHECK_GAP 10000 /* Ratio check interval. */
51 1.1 mrg #define BUFSIZE (64 * 1024)
52 1.1 mrg
53 1.1 mrg /*
54 1.1 mrg * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
55 1.1 mrg * a fourth header byte (for expansion).
56 1.1 mrg */
57 1.1 mrg #define INIT_BITS 9 /* Initial number of bits/code. */
58 1.1 mrg
59 1.1 mrg /*
60 1.1 mrg * the next two codes should not be changed lightly, as they must not
61 1.1 mrg * lie within the contiguous general code space.
62 1.1 mrg */
63 1.1 mrg #define FIRST 257 /* First free entry. */
64 1.1 mrg #define CLEAR 256 /* Table clear output code. */
65 1.1 mrg
66 1.1 mrg
67 1.1 mrg #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
68 1.1 mrg
69 1.1 mrg typedef long code_int;
70 1.1 mrg typedef long count_int;
71 1.1 mrg typedef u_char char_type;
72 1.1 mrg
73 1.1 mrg static char_type magic_header[] =
74 1.1 mrg {'\037', '\235'}; /* 1F 9D */
75 1.1 mrg
76 1.1 mrg static char_type rmask[9] =
77 1.1 mrg {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
78 1.1 mrg
79 1.3 mrg /* XXX zuncompress global */
80 1.3 mrg off_t total_compressed_bytes;
81 1.3 mrg size_t compressed_prelen;
82 1.3 mrg char *compressed_pre;
83 1.1 mrg
84 1.1 mrg struct s_zstate {
85 1.1 mrg FILE *zs_fp; /* File stream for I/O */
86 1.1 mrg char zs_mode; /* r or w */
87 1.1 mrg enum {
88 1.1 mrg S_START, S_MIDDLE, S_EOF
89 1.1 mrg } zs_state; /* State of computation */
90 1.1 mrg int zs_n_bits; /* Number of bits/code. */
91 1.1 mrg int zs_maxbits; /* User settable max # bits/code. */
92 1.1 mrg code_int zs_maxcode; /* Maximum code, given n_bits. */
93 1.1 mrg code_int zs_maxmaxcode; /* Should NEVER generate this code. */
94 1.1 mrg count_int zs_htab [HSIZE];
95 1.1 mrg u_short zs_codetab [HSIZE];
96 1.1 mrg code_int zs_hsize; /* For dynamic table sizing. */
97 1.1 mrg code_int zs_free_ent; /* First unused entry. */
98 1.1 mrg /*
99 1.1 mrg * Block compression parameters -- after all codes are used up,
100 1.1 mrg * and compression rate changes, start over.
101 1.1 mrg */
102 1.1 mrg int zs_block_compress;
103 1.1 mrg int zs_clear_flg;
104 1.1 mrg long zs_ratio;
105 1.1 mrg count_int zs_checkpoint;
106 1.1 mrg int zs_offset;
107 1.1 mrg long zs_in_count; /* Length of input. */
108 1.1 mrg long zs_bytes_out; /* Length of compressed output. */
109 1.1 mrg long zs_out_count; /* # of codes output (for debugging). */
110 1.1 mrg char_type zs_buf[BITS];
111 1.1 mrg union {
112 1.1 mrg struct {
113 1.1 mrg long zs_fcode;
114 1.1 mrg code_int zs_ent;
115 1.1 mrg code_int zs_hsize_reg;
116 1.1 mrg int zs_hshift;
117 1.1 mrg } w; /* Write paramenters */
118 1.1 mrg struct {
119 1.1 mrg char_type *zs_stackp;
120 1.1 mrg int zs_finchar;
121 1.1 mrg code_int zs_code, zs_oldcode, zs_incode;
122 1.1 mrg int zs_roffset, zs_size;
123 1.1 mrg char_type zs_gbuf[BITS];
124 1.1 mrg } r; /* Read parameters */
125 1.1 mrg } u;
126 1.1 mrg };
127 1.1 mrg
128 1.1 mrg static code_int getcode(struct s_zstate *zs);
129 1.1 mrg
130 1.1 mrg static off_t
131 1.3 mrg zuncompress(FILE *in, FILE *out, char *pre, size_t prelen,
132 1.3 mrg off_t *compressed_bytes)
133 1.1 mrg {
134 1.1 mrg off_t bin, bout = 0;
135 1.1 mrg char buf[BUFSIZE];
136 1.1 mrg
137 1.3 mrg /* XXX */
138 1.3 mrg compressed_prelen = prelen;
139 1.3 mrg if (prelen != 0)
140 1.3 mrg compressed_pre = pre;
141 1.3 mrg else
142 1.3 mrg compressed_pre = NULL;
143 1.3 mrg
144 1.1 mrg while ((bin = fread(buf, 1, sizeof(buf), in)) != 0) {
145 1.1 mrg if (fwrite(buf, 1, bin, out) != bin)
146 1.1 mrg return 0;
147 1.1 mrg bout += bin;
148 1.1 mrg }
149 1.1 mrg
150 1.3 mrg if (compressed_bytes)
151 1.3 mrg *compressed_bytes = total_compressed_bytes;
152 1.3 mrg
153 1.1 mrg return bout;
154 1.1 mrg }
155 1.1 mrg
156 1.1 mrg FILE *
157 1.3 mrg zopen(const char *fname, FILE *preopen)
158 1.1 mrg {
159 1.1 mrg struct s_zstate *zs;
160 1.1 mrg
161 1.1 mrg if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
162 1.1 mrg return (NULL);
163 1.1 mrg
164 1.1 mrg zs->zs_state = S_START;
165 1.1 mrg
166 1.1 mrg /* XXX we can get rid of some of these */
167 1.1 mrg zs->zs_hsize = HSIZE; /* For dynamic table sizing. */
168 1.1 mrg zs->zs_free_ent = 0; /* First unused entry. */
169 1.1 mrg zs->zs_block_compress = BLOCK_MASK;
170 1.1 mrg zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */
171 1.1 mrg zs->zs_ratio = 0;
172 1.1 mrg zs->zs_checkpoint = CHECK_GAP;
173 1.1 mrg zs->zs_in_count = 1; /* Length of input. */
174 1.1 mrg zs->zs_out_count = 0; /* # of codes output (for debugging). */
175 1.1 mrg zs->u.r.zs_roffset = 0;
176 1.1 mrg zs->u.r.zs_size = 0;
177 1.1 mrg
178 1.1 mrg /*
179 1.1 mrg * Layering compress on top of stdio in order to provide buffering,
180 1.1 mrg * and ensure that reads and write work with the data specified.
181 1.1 mrg */
182 1.3 mrg if ((zs->zs_fp = preopen) == NULL &&
183 1.3 mrg (zs->zs_fp = fopen(fname, "r")) == NULL) {
184 1.1 mrg free(zs);
185 1.1 mrg return NULL;
186 1.1 mrg }
187 1.1 mrg
188 1.1 mrg return fropen(zs, zread);
189 1.1 mrg }
190 1.1 mrg
191 1.1 mrg /*
192 1.1 mrg * Decompress read. This routine adapts to the codes in the file building
193 1.1 mrg * the "string" table on-the-fly; requiring no table to be stored in the
194 1.1 mrg * compressed file. The tables used herein are shared with those of the
195 1.1 mrg * compress() routine. See the definitions above.
196 1.1 mrg */
197 1.2 he static int
198 1.1 mrg zread(void *cookie, char *rbp, int num)
199 1.1 mrg {
200 1.3 mrg u_int count, i;
201 1.1 mrg struct s_zstate *zs;
202 1.1 mrg u_char *bp, header[3];
203 1.1 mrg
204 1.1 mrg if (num == 0)
205 1.1 mrg return (0);
206 1.1 mrg
207 1.1 mrg zs = cookie;
208 1.1 mrg count = num;
209 1.1 mrg bp = (u_char *)rbp;
210 1.1 mrg switch (zs->zs_state) {
211 1.1 mrg case S_START:
212 1.1 mrg zs->zs_state = S_MIDDLE;
213 1.1 mrg break;
214 1.1 mrg case S_MIDDLE:
215 1.1 mrg goto middle;
216 1.1 mrg case S_EOF:
217 1.1 mrg goto eof;
218 1.1 mrg }
219 1.1 mrg
220 1.1 mrg /* Check the magic number */
221 1.3 mrg for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--)
222 1.3 mrg header[i] = *compressed_pre++;
223 1.3 mrg
224 1.3 mrg if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) !=
225 1.3 mrg sizeof(header) - i ||
226 1.1 mrg memcmp(header, magic_header, sizeof(magic_header)) != 0) {
227 1.1 mrg errno = EFTYPE;
228 1.1 mrg return (-1);
229 1.1 mrg }
230 1.3 mrg total_compressed_bytes = 0;
231 1.1 mrg zs->zs_maxbits = header[2]; /* Set -b from file. */
232 1.1 mrg zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
233 1.1 mrg zs->zs_maxbits &= BIT_MASK;
234 1.1 mrg zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
235 1.1 mrg if (zs->zs_maxbits > BITS) {
236 1.1 mrg errno = EFTYPE;
237 1.1 mrg return (-1);
238 1.1 mrg }
239 1.1 mrg /* As above, initialize the first 256 entries in the table. */
240 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
241 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
242 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
243 1.1 mrg tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
244 1.1 mrg }
245 1.1 mrg zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
246 1.1 mrg
247 1.1 mrg zs->u.r.zs_finchar = zs->u.r.zs_oldcode = getcode(zs);
248 1.1 mrg if (zs->u.r.zs_oldcode == -1) /* EOF already? */
249 1.1 mrg return (0); /* Get out of here */
250 1.1 mrg
251 1.1 mrg /* First code must be 8 bits = char. */
252 1.1 mrg *bp++ = (u_char)zs->u.r.zs_finchar;
253 1.1 mrg count--;
254 1.1 mrg zs->u.r.zs_stackp = de_stack;
255 1.1 mrg
256 1.1 mrg while ((zs->u.r.zs_code = getcode(zs)) > -1) {
257 1.1 mrg
258 1.1 mrg if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
259 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
260 1.1 mrg zs->u.r.zs_code--)
261 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
262 1.1 mrg zs->zs_clear_flg = 1;
263 1.1 mrg zs->zs_free_ent = FIRST - 1;
264 1.1 mrg if ((zs->u.r.zs_code = getcode(zs)) == -1) /* O, untimely death! */
265 1.1 mrg break;
266 1.1 mrg }
267 1.1 mrg zs->u.r.zs_incode = zs->u.r.zs_code;
268 1.1 mrg
269 1.1 mrg /* Special case for KwKwK string. */
270 1.1 mrg if (zs->u.r.zs_code >= zs->zs_free_ent) {
271 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
272 1.1 mrg zs->u.r.zs_code = zs->u.r.zs_oldcode;
273 1.1 mrg }
274 1.1 mrg
275 1.1 mrg /* Generate output characters in reverse order. */
276 1.1 mrg while (zs->u.r.zs_code >= 256) {
277 1.1 mrg *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
278 1.1 mrg zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
279 1.1 mrg }
280 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
281 1.1 mrg
282 1.1 mrg /* And put them out in forward order. */
283 1.1 mrg middle: do {
284 1.1 mrg if (count-- == 0)
285 1.1 mrg return (num);
286 1.1 mrg *bp++ = *--zs->u.r.zs_stackp;
287 1.1 mrg } while (zs->u.r.zs_stackp > de_stack);
288 1.1 mrg
289 1.1 mrg /* Generate the new entry. */
290 1.1 mrg if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode) {
291 1.1 mrg tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
292 1.1 mrg tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
293 1.1 mrg zs->zs_free_ent = zs->u.r.zs_code + 1;
294 1.1 mrg }
295 1.1 mrg
296 1.1 mrg /* Remember previous code. */
297 1.1 mrg zs->u.r.zs_oldcode = zs->u.r.zs_incode;
298 1.1 mrg }
299 1.1 mrg zs->zs_state = S_EOF;
300 1.1 mrg eof: return (num - count);
301 1.1 mrg }
302 1.1 mrg
303 1.1 mrg /*-
304 1.1 mrg * Read one code from the standard input. If EOF, return -1.
305 1.1 mrg * Inputs:
306 1.1 mrg * stdin
307 1.1 mrg * Outputs:
308 1.1 mrg * code or -1 is returned.
309 1.1 mrg */
310 1.1 mrg static code_int
311 1.1 mrg getcode(struct s_zstate *zs)
312 1.1 mrg {
313 1.1 mrg code_int gcode;
314 1.3 mrg int r_off, bits, i;
315 1.1 mrg char_type *bp;
316 1.1 mrg
317 1.1 mrg bp = zs->u.r.zs_gbuf;
318 1.1 mrg if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
319 1.1 mrg zs->zs_free_ent > zs->zs_maxcode) {
320 1.1 mrg /*
321 1.1 mrg * If the next entry will be too big for the current gcode
322 1.1 mrg * size, then we must increase the size. This implies reading
323 1.1 mrg * a new buffer full, too.
324 1.1 mrg */
325 1.1 mrg if (zs->zs_free_ent > zs->zs_maxcode) {
326 1.1 mrg zs->zs_n_bits++;
327 1.1 mrg if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */
328 1.1 mrg zs->zs_maxcode = zs->zs_maxmaxcode;
329 1.1 mrg else
330 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
331 1.1 mrg }
332 1.1 mrg if (zs->zs_clear_flg > 0) {
333 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
334 1.1 mrg zs->zs_clear_flg = 0;
335 1.1 mrg }
336 1.3 mrg /* XXX */
337 1.3 mrg for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--)
338 1.3 mrg zs->u.r.zs_gbuf[i] = *compressed_pre++;
339 1.3 mrg zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp);
340 1.3 mrg zs->u.r.zs_size += i;
341 1.1 mrg if (zs->u.r.zs_size <= 0) /* End of file. */
342 1.1 mrg return (-1);
343 1.1 mrg zs->u.r.zs_roffset = 0;
344 1.3 mrg
345 1.3 mrg total_compressed_bytes += zs->u.r.zs_size;
346 1.3 mrg
347 1.1 mrg /* Round size down to integral number of codes. */
348 1.1 mrg zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
349 1.1 mrg }
350 1.1 mrg r_off = zs->u.r.zs_roffset;
351 1.1 mrg bits = zs->zs_n_bits;
352 1.1 mrg
353 1.1 mrg /* Get to the first byte. */
354 1.1 mrg bp += (r_off >> 3);
355 1.1 mrg r_off &= 7;
356 1.1 mrg
357 1.1 mrg /* Get first part (low order bits). */
358 1.1 mrg gcode = (*bp++ >> r_off);
359 1.1 mrg bits -= (8 - r_off);
360 1.1 mrg r_off = 8 - r_off; /* Now, roffset into gcode word. */
361 1.1 mrg
362 1.1 mrg /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
363 1.1 mrg if (bits >= 8) {
364 1.1 mrg gcode |= *bp++ << r_off;
365 1.1 mrg r_off += 8;
366 1.1 mrg bits -= 8;
367 1.1 mrg }
368 1.1 mrg
369 1.1 mrg /* High order bits. */
370 1.1 mrg gcode |= (*bp & rmask[bits]) << r_off;
371 1.1 mrg zs->u.r.zs_roffset += zs->zs_n_bits;
372 1.1 mrg
373 1.1 mrg return (gcode);
374 1.1 mrg }
375