zuncompress.c revision 1.1 1 /* $NetBSD: zuncompress.c,v 1.1 2004/02/18 08:19:48 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1985, 1986, 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis and James A. Woods, derived from original
9 * work by Spencer Thomas and Joseph Orost.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
36 */
37
38 /* This file is #included by gzip.c */
39
40 static ssize_t zread(void *, char *, int);
41
42 #define tab_prefixof(i) (zs->zs_codetab[i])
43 #define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i]
44 #define de_stack ((char_type *)&tab_suffixof(1 << BITS))
45
46 #define BITS 16 /* Default bits. */
47 #define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */
48 #define BIT_MASK 0x1f /* Defines for third byte of header. */
49 #define BLOCK_MASK 0x80
50 #define CHECK_GAP 10000 /* Ratio check interval. */
51 #define BUFSIZE (64 * 1024)
52
53 /*
54 * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
55 * a fourth header byte (for expansion).
56 */
57 #define INIT_BITS 9 /* Initial number of bits/code. */
58
59 /*
60 * the next two codes should not be changed lightly, as they must not
61 * lie within the contiguous general code space.
62 */
63 #define FIRST 257 /* First free entry. */
64 #define CLEAR 256 /* Table clear output code. */
65
66
67 #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
68
69 typedef long code_int;
70 typedef long count_int;
71 typedef u_char char_type;
72
73 static char_type magic_header[] =
74 {'\037', '\235'}; /* 1F 9D */
75
76 static char_type rmask[9] =
77 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
78
79
80 struct s_zstate {
81 FILE *zs_fp; /* File stream for I/O */
82 char zs_mode; /* r or w */
83 enum {
84 S_START, S_MIDDLE, S_EOF
85 } zs_state; /* State of computation */
86 int zs_n_bits; /* Number of bits/code. */
87 int zs_maxbits; /* User settable max # bits/code. */
88 code_int zs_maxcode; /* Maximum code, given n_bits. */
89 code_int zs_maxmaxcode; /* Should NEVER generate this code. */
90 count_int zs_htab [HSIZE];
91 u_short zs_codetab [HSIZE];
92 code_int zs_hsize; /* For dynamic table sizing. */
93 code_int zs_free_ent; /* First unused entry. */
94 /*
95 * Block compression parameters -- after all codes are used up,
96 * and compression rate changes, start over.
97 */
98 int zs_block_compress;
99 int zs_clear_flg;
100 long zs_ratio;
101 count_int zs_checkpoint;
102 int zs_offset;
103 long zs_in_count; /* Length of input. */
104 long zs_bytes_out; /* Length of compressed output. */
105 long zs_out_count; /* # of codes output (for debugging). */
106 char_type zs_buf[BITS];
107 union {
108 struct {
109 long zs_fcode;
110 code_int zs_ent;
111 code_int zs_hsize_reg;
112 int zs_hshift;
113 } w; /* Write paramenters */
114 struct {
115 char_type *zs_stackp;
116 int zs_finchar;
117 code_int zs_code, zs_oldcode, zs_incode;
118 int zs_roffset, zs_size;
119 char_type zs_gbuf[BITS];
120 } r; /* Read parameters */
121 } u;
122 };
123
124 static code_int getcode(struct s_zstate *zs);
125
126 static off_t
127 zuncompress(FILE *in, FILE *out)
128 {
129 off_t bin, bout = 0;
130 char buf[BUFSIZE];
131
132 while ((bin = fread(buf, 1, sizeof(buf), in)) != 0) {
133 if (fwrite(buf, 1, bin, out) != bin)
134 return 0;
135 bout += bin;
136 }
137
138 return bout;
139 }
140
141 FILE *
142 zopen(const char *fname)
143 {
144 struct s_zstate *zs;
145
146 if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
147 return (NULL);
148
149 zs->zs_state = S_START;
150
151 /* XXX we can get rid of some of these */
152 zs->zs_hsize = HSIZE; /* For dynamic table sizing. */
153 zs->zs_free_ent = 0; /* First unused entry. */
154 zs->zs_block_compress = BLOCK_MASK;
155 zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */
156 zs->zs_ratio = 0;
157 zs->zs_checkpoint = CHECK_GAP;
158 zs->zs_in_count = 1; /* Length of input. */
159 zs->zs_out_count = 0; /* # of codes output (for debugging). */
160 zs->u.r.zs_roffset = 0;
161 zs->u.r.zs_size = 0;
162
163 /*
164 * Layering compress on top of stdio in order to provide buffering,
165 * and ensure that reads and write work with the data specified.
166 */
167 if ((zs->zs_fp = fopen(fname, "r")) == NULL) {
168 free(zs);
169 return NULL;
170 }
171
172 return fropen(zs, zread);
173 }
174
175 /*
176 * Decompress read. This routine adapts to the codes in the file building
177 * the "string" table on-the-fly; requiring no table to be stored in the
178 * compressed file. The tables used herein are shared with those of the
179 * compress() routine. See the definitions above.
180 */
181 static ssize_t
182 zread(void *cookie, char *rbp, int num)
183 {
184 u_int count;
185 struct s_zstate *zs;
186 u_char *bp, header[3];
187
188 if (num == 0)
189 return (0);
190
191 zs = cookie;
192 count = num;
193 bp = (u_char *)rbp;
194 switch (zs->zs_state) {
195 case S_START:
196 zs->zs_state = S_MIDDLE;
197 break;
198 case S_MIDDLE:
199 goto middle;
200 case S_EOF:
201 goto eof;
202 }
203
204 /* Check the magic number */
205 if (fread(header,
206 sizeof(char), sizeof(header), zs->zs_fp) != sizeof(header) ||
207 memcmp(header, magic_header, sizeof(magic_header)) != 0) {
208 errno = EFTYPE;
209 return (-1);
210 }
211 zs->zs_maxbits = header[2]; /* Set -b from file. */
212 zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
213 zs->zs_maxbits &= BIT_MASK;
214 zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
215 if (zs->zs_maxbits > BITS) {
216 errno = EFTYPE;
217 return (-1);
218 }
219 /* As above, initialize the first 256 entries in the table. */
220 zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
221 for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
222 tab_prefixof(zs->u.r.zs_code) = 0;
223 tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
224 }
225 zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
226
227 zs->u.r.zs_finchar = zs->u.r.zs_oldcode = getcode(zs);
228 if (zs->u.r.zs_oldcode == -1) /* EOF already? */
229 return (0); /* Get out of here */
230
231 /* First code must be 8 bits = char. */
232 *bp++ = (u_char)zs->u.r.zs_finchar;
233 count--;
234 zs->u.r.zs_stackp = de_stack;
235
236 while ((zs->u.r.zs_code = getcode(zs)) > -1) {
237
238 if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
239 for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
240 zs->u.r.zs_code--)
241 tab_prefixof(zs->u.r.zs_code) = 0;
242 zs->zs_clear_flg = 1;
243 zs->zs_free_ent = FIRST - 1;
244 if ((zs->u.r.zs_code = getcode(zs)) == -1) /* O, untimely death! */
245 break;
246 }
247 zs->u.r.zs_incode = zs->u.r.zs_code;
248
249 /* Special case for KwKwK string. */
250 if (zs->u.r.zs_code >= zs->zs_free_ent) {
251 *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
252 zs->u.r.zs_code = zs->u.r.zs_oldcode;
253 }
254
255 /* Generate output characters in reverse order. */
256 while (zs->u.r.zs_code >= 256) {
257 *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
258 zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
259 }
260 *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
261
262 /* And put them out in forward order. */
263 middle: do {
264 if (count-- == 0)
265 return (num);
266 *bp++ = *--zs->u.r.zs_stackp;
267 } while (zs->u.r.zs_stackp > de_stack);
268
269 /* Generate the new entry. */
270 if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode) {
271 tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
272 tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
273 zs->zs_free_ent = zs->u.r.zs_code + 1;
274 }
275
276 /* Remember previous code. */
277 zs->u.r.zs_oldcode = zs->u.r.zs_incode;
278 }
279 zs->zs_state = S_EOF;
280 eof: return (num - count);
281 }
282
283 /*-
284 * Read one code from the standard input. If EOF, return -1.
285 * Inputs:
286 * stdin
287 * Outputs:
288 * code or -1 is returned.
289 */
290 static code_int
291 getcode(struct s_zstate *zs)
292 {
293 code_int gcode;
294 int r_off, bits;
295 char_type *bp;
296
297 bp = zs->u.r.zs_gbuf;
298 if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
299 zs->zs_free_ent > zs->zs_maxcode) {
300 /*
301 * If the next entry will be too big for the current gcode
302 * size, then we must increase the size. This implies reading
303 * a new buffer full, too.
304 */
305 if (zs->zs_free_ent > zs->zs_maxcode) {
306 zs->zs_n_bits++;
307 if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */
308 zs->zs_maxcode = zs->zs_maxmaxcode;
309 else
310 zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
311 }
312 if (zs->zs_clear_flg > 0) {
313 zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
314 zs->zs_clear_flg = 0;
315 }
316 zs->u.r.zs_size = fread(zs->u.r.zs_gbuf, 1, zs->zs_n_bits, zs->zs_fp);
317 if (zs->u.r.zs_size <= 0) /* End of file. */
318 return (-1);
319 zs->u.r.zs_roffset = 0;
320 /* Round size down to integral number of codes. */
321 zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
322 }
323 r_off = zs->u.r.zs_roffset;
324 bits = zs->zs_n_bits;
325
326 /* Get to the first byte. */
327 bp += (r_off >> 3);
328 r_off &= 7;
329
330 /* Get first part (low order bits). */
331 gcode = (*bp++ >> r_off);
332 bits -= (8 - r_off);
333 r_off = 8 - r_off; /* Now, roffset into gcode word. */
334
335 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
336 if (bits >= 8) {
337 gcode |= *bp++ << r_off;
338 r_off += 8;
339 bits -= 8;
340 }
341
342 /* High order bits. */
343 gcode |= (*bp & rmask[bits]) << r_off;
344 zs->u.r.zs_roffset += zs->zs_n_bits;
345
346 return (gcode);
347 }
348