zuncompress.c revision 1.1 1 1.1 mrg /* $NetBSD: zuncompress.c,v 1.1 2004/02/18 08:19:48 mrg Exp $ */
2 1.1 mrg
3 1.1 mrg /*-
4 1.1 mrg * Copyright (c) 1985, 1986, 1992, 1993
5 1.1 mrg * The Regents of the University of California. All rights reserved.
6 1.1 mrg *
7 1.1 mrg * This code is derived from software contributed to Berkeley by
8 1.1 mrg * Diomidis Spinellis and James A. Woods, derived from original
9 1.1 mrg * work by Spencer Thomas and Joseph Orost.
10 1.1 mrg *
11 1.1 mrg * Redistribution and use in source and binary forms, with or without
12 1.1 mrg * modification, are permitted provided that the following conditions
13 1.1 mrg * are met:
14 1.1 mrg * 1. Redistributions of source code must retain the above copyright
15 1.1 mrg * notice, this list of conditions and the following disclaimer.
16 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 mrg * notice, this list of conditions and the following disclaimer in the
18 1.1 mrg * documentation and/or other materials provided with the distribution.
19 1.1 mrg * 3. Neither the name of the University nor the names of its contributors
20 1.1 mrg * may be used to endorse or promote products derived from this software
21 1.1 mrg * without specific prior written permission.
22 1.1 mrg *
23 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.1 mrg * SUCH DAMAGE.
34 1.1 mrg *
35 1.1 mrg * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
36 1.1 mrg */
37 1.1 mrg
38 1.1 mrg /* This file is #included by gzip.c */
39 1.1 mrg
40 1.1 mrg static ssize_t zread(void *, char *, int);
41 1.1 mrg
42 1.1 mrg #define tab_prefixof(i) (zs->zs_codetab[i])
43 1.1 mrg #define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i]
44 1.1 mrg #define de_stack ((char_type *)&tab_suffixof(1 << BITS))
45 1.1 mrg
46 1.1 mrg #define BITS 16 /* Default bits. */
47 1.1 mrg #define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */
48 1.1 mrg #define BIT_MASK 0x1f /* Defines for third byte of header. */
49 1.1 mrg #define BLOCK_MASK 0x80
50 1.1 mrg #define CHECK_GAP 10000 /* Ratio check interval. */
51 1.1 mrg #define BUFSIZE (64 * 1024)
52 1.1 mrg
53 1.1 mrg /*
54 1.1 mrg * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
55 1.1 mrg * a fourth header byte (for expansion).
56 1.1 mrg */
57 1.1 mrg #define INIT_BITS 9 /* Initial number of bits/code. */
58 1.1 mrg
59 1.1 mrg /*
60 1.1 mrg * the next two codes should not be changed lightly, as they must not
61 1.1 mrg * lie within the contiguous general code space.
62 1.1 mrg */
63 1.1 mrg #define FIRST 257 /* First free entry. */
64 1.1 mrg #define CLEAR 256 /* Table clear output code. */
65 1.1 mrg
66 1.1 mrg
67 1.1 mrg #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
68 1.1 mrg
69 1.1 mrg typedef long code_int;
70 1.1 mrg typedef long count_int;
71 1.1 mrg typedef u_char char_type;
72 1.1 mrg
73 1.1 mrg static char_type magic_header[] =
74 1.1 mrg {'\037', '\235'}; /* 1F 9D */
75 1.1 mrg
76 1.1 mrg static char_type rmask[9] =
77 1.1 mrg {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
78 1.1 mrg
79 1.1 mrg
80 1.1 mrg struct s_zstate {
81 1.1 mrg FILE *zs_fp; /* File stream for I/O */
82 1.1 mrg char zs_mode; /* r or w */
83 1.1 mrg enum {
84 1.1 mrg S_START, S_MIDDLE, S_EOF
85 1.1 mrg } zs_state; /* State of computation */
86 1.1 mrg int zs_n_bits; /* Number of bits/code. */
87 1.1 mrg int zs_maxbits; /* User settable max # bits/code. */
88 1.1 mrg code_int zs_maxcode; /* Maximum code, given n_bits. */
89 1.1 mrg code_int zs_maxmaxcode; /* Should NEVER generate this code. */
90 1.1 mrg count_int zs_htab [HSIZE];
91 1.1 mrg u_short zs_codetab [HSIZE];
92 1.1 mrg code_int zs_hsize; /* For dynamic table sizing. */
93 1.1 mrg code_int zs_free_ent; /* First unused entry. */
94 1.1 mrg /*
95 1.1 mrg * Block compression parameters -- after all codes are used up,
96 1.1 mrg * and compression rate changes, start over.
97 1.1 mrg */
98 1.1 mrg int zs_block_compress;
99 1.1 mrg int zs_clear_flg;
100 1.1 mrg long zs_ratio;
101 1.1 mrg count_int zs_checkpoint;
102 1.1 mrg int zs_offset;
103 1.1 mrg long zs_in_count; /* Length of input. */
104 1.1 mrg long zs_bytes_out; /* Length of compressed output. */
105 1.1 mrg long zs_out_count; /* # of codes output (for debugging). */
106 1.1 mrg char_type zs_buf[BITS];
107 1.1 mrg union {
108 1.1 mrg struct {
109 1.1 mrg long zs_fcode;
110 1.1 mrg code_int zs_ent;
111 1.1 mrg code_int zs_hsize_reg;
112 1.1 mrg int zs_hshift;
113 1.1 mrg } w; /* Write paramenters */
114 1.1 mrg struct {
115 1.1 mrg char_type *zs_stackp;
116 1.1 mrg int zs_finchar;
117 1.1 mrg code_int zs_code, zs_oldcode, zs_incode;
118 1.1 mrg int zs_roffset, zs_size;
119 1.1 mrg char_type zs_gbuf[BITS];
120 1.1 mrg } r; /* Read parameters */
121 1.1 mrg } u;
122 1.1 mrg };
123 1.1 mrg
124 1.1 mrg static code_int getcode(struct s_zstate *zs);
125 1.1 mrg
126 1.1 mrg static off_t
127 1.1 mrg zuncompress(FILE *in, FILE *out)
128 1.1 mrg {
129 1.1 mrg off_t bin, bout = 0;
130 1.1 mrg char buf[BUFSIZE];
131 1.1 mrg
132 1.1 mrg while ((bin = fread(buf, 1, sizeof(buf), in)) != 0) {
133 1.1 mrg if (fwrite(buf, 1, bin, out) != bin)
134 1.1 mrg return 0;
135 1.1 mrg bout += bin;
136 1.1 mrg }
137 1.1 mrg
138 1.1 mrg return bout;
139 1.1 mrg }
140 1.1 mrg
141 1.1 mrg FILE *
142 1.1 mrg zopen(const char *fname)
143 1.1 mrg {
144 1.1 mrg struct s_zstate *zs;
145 1.1 mrg
146 1.1 mrg if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
147 1.1 mrg return (NULL);
148 1.1 mrg
149 1.1 mrg zs->zs_state = S_START;
150 1.1 mrg
151 1.1 mrg /* XXX we can get rid of some of these */
152 1.1 mrg zs->zs_hsize = HSIZE; /* For dynamic table sizing. */
153 1.1 mrg zs->zs_free_ent = 0; /* First unused entry. */
154 1.1 mrg zs->zs_block_compress = BLOCK_MASK;
155 1.1 mrg zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */
156 1.1 mrg zs->zs_ratio = 0;
157 1.1 mrg zs->zs_checkpoint = CHECK_GAP;
158 1.1 mrg zs->zs_in_count = 1; /* Length of input. */
159 1.1 mrg zs->zs_out_count = 0; /* # of codes output (for debugging). */
160 1.1 mrg zs->u.r.zs_roffset = 0;
161 1.1 mrg zs->u.r.zs_size = 0;
162 1.1 mrg
163 1.1 mrg /*
164 1.1 mrg * Layering compress on top of stdio in order to provide buffering,
165 1.1 mrg * and ensure that reads and write work with the data specified.
166 1.1 mrg */
167 1.1 mrg if ((zs->zs_fp = fopen(fname, "r")) == NULL) {
168 1.1 mrg free(zs);
169 1.1 mrg return NULL;
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg return fropen(zs, zread);
173 1.1 mrg }
174 1.1 mrg
175 1.1 mrg /*
176 1.1 mrg * Decompress read. This routine adapts to the codes in the file building
177 1.1 mrg * the "string" table on-the-fly; requiring no table to be stored in the
178 1.1 mrg * compressed file. The tables used herein are shared with those of the
179 1.1 mrg * compress() routine. See the definitions above.
180 1.1 mrg */
181 1.1 mrg static ssize_t
182 1.1 mrg zread(void *cookie, char *rbp, int num)
183 1.1 mrg {
184 1.1 mrg u_int count;
185 1.1 mrg struct s_zstate *zs;
186 1.1 mrg u_char *bp, header[3];
187 1.1 mrg
188 1.1 mrg if (num == 0)
189 1.1 mrg return (0);
190 1.1 mrg
191 1.1 mrg zs = cookie;
192 1.1 mrg count = num;
193 1.1 mrg bp = (u_char *)rbp;
194 1.1 mrg switch (zs->zs_state) {
195 1.1 mrg case S_START:
196 1.1 mrg zs->zs_state = S_MIDDLE;
197 1.1 mrg break;
198 1.1 mrg case S_MIDDLE:
199 1.1 mrg goto middle;
200 1.1 mrg case S_EOF:
201 1.1 mrg goto eof;
202 1.1 mrg }
203 1.1 mrg
204 1.1 mrg /* Check the magic number */
205 1.1 mrg if (fread(header,
206 1.1 mrg sizeof(char), sizeof(header), zs->zs_fp) != sizeof(header) ||
207 1.1 mrg memcmp(header, magic_header, sizeof(magic_header)) != 0) {
208 1.1 mrg errno = EFTYPE;
209 1.1 mrg return (-1);
210 1.1 mrg }
211 1.1 mrg zs->zs_maxbits = header[2]; /* Set -b from file. */
212 1.1 mrg zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
213 1.1 mrg zs->zs_maxbits &= BIT_MASK;
214 1.1 mrg zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
215 1.1 mrg if (zs->zs_maxbits > BITS) {
216 1.1 mrg errno = EFTYPE;
217 1.1 mrg return (-1);
218 1.1 mrg }
219 1.1 mrg /* As above, initialize the first 256 entries in the table. */
220 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
221 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
222 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
223 1.1 mrg tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
224 1.1 mrg }
225 1.1 mrg zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
226 1.1 mrg
227 1.1 mrg zs->u.r.zs_finchar = zs->u.r.zs_oldcode = getcode(zs);
228 1.1 mrg if (zs->u.r.zs_oldcode == -1) /* EOF already? */
229 1.1 mrg return (0); /* Get out of here */
230 1.1 mrg
231 1.1 mrg /* First code must be 8 bits = char. */
232 1.1 mrg *bp++ = (u_char)zs->u.r.zs_finchar;
233 1.1 mrg count--;
234 1.1 mrg zs->u.r.zs_stackp = de_stack;
235 1.1 mrg
236 1.1 mrg while ((zs->u.r.zs_code = getcode(zs)) > -1) {
237 1.1 mrg
238 1.1 mrg if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
239 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
240 1.1 mrg zs->u.r.zs_code--)
241 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0;
242 1.1 mrg zs->zs_clear_flg = 1;
243 1.1 mrg zs->zs_free_ent = FIRST - 1;
244 1.1 mrg if ((zs->u.r.zs_code = getcode(zs)) == -1) /* O, untimely death! */
245 1.1 mrg break;
246 1.1 mrg }
247 1.1 mrg zs->u.r.zs_incode = zs->u.r.zs_code;
248 1.1 mrg
249 1.1 mrg /* Special case for KwKwK string. */
250 1.1 mrg if (zs->u.r.zs_code >= zs->zs_free_ent) {
251 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
252 1.1 mrg zs->u.r.zs_code = zs->u.r.zs_oldcode;
253 1.1 mrg }
254 1.1 mrg
255 1.1 mrg /* Generate output characters in reverse order. */
256 1.1 mrg while (zs->u.r.zs_code >= 256) {
257 1.1 mrg *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
258 1.1 mrg zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
259 1.1 mrg }
260 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
261 1.1 mrg
262 1.1 mrg /* And put them out in forward order. */
263 1.1 mrg middle: do {
264 1.1 mrg if (count-- == 0)
265 1.1 mrg return (num);
266 1.1 mrg *bp++ = *--zs->u.r.zs_stackp;
267 1.1 mrg } while (zs->u.r.zs_stackp > de_stack);
268 1.1 mrg
269 1.1 mrg /* Generate the new entry. */
270 1.1 mrg if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode) {
271 1.1 mrg tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
272 1.1 mrg tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
273 1.1 mrg zs->zs_free_ent = zs->u.r.zs_code + 1;
274 1.1 mrg }
275 1.1 mrg
276 1.1 mrg /* Remember previous code. */
277 1.1 mrg zs->u.r.zs_oldcode = zs->u.r.zs_incode;
278 1.1 mrg }
279 1.1 mrg zs->zs_state = S_EOF;
280 1.1 mrg eof: return (num - count);
281 1.1 mrg }
282 1.1 mrg
283 1.1 mrg /*-
284 1.1 mrg * Read one code from the standard input. If EOF, return -1.
285 1.1 mrg * Inputs:
286 1.1 mrg * stdin
287 1.1 mrg * Outputs:
288 1.1 mrg * code or -1 is returned.
289 1.1 mrg */
290 1.1 mrg static code_int
291 1.1 mrg getcode(struct s_zstate *zs)
292 1.1 mrg {
293 1.1 mrg code_int gcode;
294 1.1 mrg int r_off, bits;
295 1.1 mrg char_type *bp;
296 1.1 mrg
297 1.1 mrg bp = zs->u.r.zs_gbuf;
298 1.1 mrg if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
299 1.1 mrg zs->zs_free_ent > zs->zs_maxcode) {
300 1.1 mrg /*
301 1.1 mrg * If the next entry will be too big for the current gcode
302 1.1 mrg * size, then we must increase the size. This implies reading
303 1.1 mrg * a new buffer full, too.
304 1.1 mrg */
305 1.1 mrg if (zs->zs_free_ent > zs->zs_maxcode) {
306 1.1 mrg zs->zs_n_bits++;
307 1.1 mrg if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */
308 1.1 mrg zs->zs_maxcode = zs->zs_maxmaxcode;
309 1.1 mrg else
310 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
311 1.1 mrg }
312 1.1 mrg if (zs->zs_clear_flg > 0) {
313 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
314 1.1 mrg zs->zs_clear_flg = 0;
315 1.1 mrg }
316 1.1 mrg zs->u.r.zs_size = fread(zs->u.r.zs_gbuf, 1, zs->zs_n_bits, zs->zs_fp);
317 1.1 mrg if (zs->u.r.zs_size <= 0) /* End of file. */
318 1.1 mrg return (-1);
319 1.1 mrg zs->u.r.zs_roffset = 0;
320 1.1 mrg /* Round size down to integral number of codes. */
321 1.1 mrg zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
322 1.1 mrg }
323 1.1 mrg r_off = zs->u.r.zs_roffset;
324 1.1 mrg bits = zs->zs_n_bits;
325 1.1 mrg
326 1.1 mrg /* Get to the first byte. */
327 1.1 mrg bp += (r_off >> 3);
328 1.1 mrg r_off &= 7;
329 1.1 mrg
330 1.1 mrg /* Get first part (low order bits). */
331 1.1 mrg gcode = (*bp++ >> r_off);
332 1.1 mrg bits -= (8 - r_off);
333 1.1 mrg r_off = 8 - r_off; /* Now, roffset into gcode word. */
334 1.1 mrg
335 1.1 mrg /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
336 1.1 mrg if (bits >= 8) {
337 1.1 mrg gcode |= *bp++ << r_off;
338 1.1 mrg r_off += 8;
339 1.1 mrg bits -= 8;
340 1.1 mrg }
341 1.1 mrg
342 1.1 mrg /* High order bits. */
343 1.1 mrg gcode |= (*bp & rmask[bits]) << r_off;
344 1.1 mrg zs->u.r.zs_roffset += zs->zs_n_bits;
345 1.1 mrg
346 1.1 mrg return (gcode);
347 1.1 mrg }
348