1 1.11 joerg /* $NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */ 2 1.1 mrg 3 1.1 mrg /*- 4 1.1 mrg * Copyright (c) 1985, 1986, 1992, 1993 5 1.1 mrg * The Regents of the University of California. All rights reserved. 6 1.1 mrg * 7 1.1 mrg * This code is derived from software contributed to Berkeley by 8 1.1 mrg * Diomidis Spinellis and James A. Woods, derived from original 9 1.1 mrg * work by Spencer Thomas and Joseph Orost. 10 1.1 mrg * 11 1.1 mrg * Redistribution and use in source and binary forms, with or without 12 1.1 mrg * modification, are permitted provided that the following conditions 13 1.1 mrg * are met: 14 1.1 mrg * 1. Redistributions of source code must retain the above copyright 15 1.1 mrg * notice, this list of conditions and the following disclaimer. 16 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 mrg * notice, this list of conditions and the following disclaimer in the 18 1.1 mrg * documentation and/or other materials provided with the distribution. 19 1.1 mrg * 3. Neither the name of the University nor the names of its contributors 20 1.1 mrg * may be used to endorse or promote products derived from this software 21 1.1 mrg * without specific prior written permission. 22 1.1 mrg * 23 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 1.1 mrg * SUCH DAMAGE. 34 1.1 mrg * 35 1.1 mrg * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp 36 1.1 mrg */ 37 1.1 mrg 38 1.1 mrg /* This file is #included by gzip.c */ 39 1.1 mrg 40 1.2 he static int zread(void *, char *, int); 41 1.1 mrg 42 1.1 mrg #define tab_prefixof(i) (zs->zs_codetab[i]) 43 1.1 mrg #define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i] 44 1.1 mrg #define de_stack ((char_type *)&tab_suffixof(1 << BITS)) 45 1.1 mrg 46 1.1 mrg #define BITS 16 /* Default bits. */ 47 1.1 mrg #define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */ 48 1.1 mrg #define BIT_MASK 0x1f /* Defines for third byte of header. */ 49 1.1 mrg #define BLOCK_MASK 0x80 50 1.1 mrg #define CHECK_GAP 10000 /* Ratio check interval. */ 51 1.1 mrg #define BUFSIZE (64 * 1024) 52 1.1 mrg 53 1.1 mrg /* 54 1.1 mrg * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 55 1.1 mrg * a fourth header byte (for expansion). 56 1.1 mrg */ 57 1.1 mrg #define INIT_BITS 9 /* Initial number of bits/code. */ 58 1.1 mrg 59 1.1 mrg /* 60 1.1 mrg * the next two codes should not be changed lightly, as they must not 61 1.1 mrg * lie within the contiguous general code space. 62 1.1 mrg */ 63 1.1 mrg #define FIRST 257 /* First free entry. */ 64 1.1 mrg #define CLEAR 256 /* Table clear output code. */ 65 1.1 mrg 66 1.1 mrg 67 1.1 mrg #define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 68 1.1 mrg 69 1.1 mrg typedef long code_int; 70 1.1 mrg typedef long count_int; 71 1.1 mrg typedef u_char char_type; 72 1.1 mrg 73 1.1 mrg static char_type magic_header[] = 74 1.1 mrg {'\037', '\235'}; /* 1F 9D */ 75 1.1 mrg 76 1.1 mrg static char_type rmask[9] = 77 1.1 mrg {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 78 1.1 mrg 79 1.3 mrg /* XXX zuncompress global */ 80 1.3 mrg off_t total_compressed_bytes; 81 1.3 mrg size_t compressed_prelen; 82 1.3 mrg char *compressed_pre; 83 1.1 mrg 84 1.1 mrg struct s_zstate { 85 1.1 mrg FILE *zs_fp; /* File stream for I/O */ 86 1.1 mrg char zs_mode; /* r or w */ 87 1.1 mrg enum { 88 1.1 mrg S_START, S_MIDDLE, S_EOF 89 1.1 mrg } zs_state; /* State of computation */ 90 1.1 mrg int zs_n_bits; /* Number of bits/code. */ 91 1.1 mrg int zs_maxbits; /* User settable max # bits/code. */ 92 1.1 mrg code_int zs_maxcode; /* Maximum code, given n_bits. */ 93 1.1 mrg code_int zs_maxmaxcode; /* Should NEVER generate this code. */ 94 1.1 mrg count_int zs_htab [HSIZE]; 95 1.1 mrg u_short zs_codetab [HSIZE]; 96 1.1 mrg code_int zs_hsize; /* For dynamic table sizing. */ 97 1.1 mrg code_int zs_free_ent; /* First unused entry. */ 98 1.1 mrg /* 99 1.1 mrg * Block compression parameters -- after all codes are used up, 100 1.1 mrg * and compression rate changes, start over. 101 1.1 mrg */ 102 1.1 mrg int zs_block_compress; 103 1.1 mrg int zs_clear_flg; 104 1.1 mrg long zs_ratio; 105 1.1 mrg count_int zs_checkpoint; 106 1.1 mrg int zs_offset; 107 1.1 mrg long zs_in_count; /* Length of input. */ 108 1.1 mrg long zs_bytes_out; /* Length of compressed output. */ 109 1.1 mrg long zs_out_count; /* # of codes output (for debugging). */ 110 1.1 mrg char_type zs_buf[BITS]; 111 1.1 mrg union { 112 1.1 mrg struct { 113 1.1 mrg long zs_fcode; 114 1.1 mrg code_int zs_ent; 115 1.1 mrg code_int zs_hsize_reg; 116 1.1 mrg int zs_hshift; 117 1.8 mrg } w; /* Write parameters */ 118 1.1 mrg struct { 119 1.1 mrg char_type *zs_stackp; 120 1.1 mrg int zs_finchar; 121 1.1 mrg code_int zs_code, zs_oldcode, zs_incode; 122 1.1 mrg int zs_roffset, zs_size; 123 1.1 mrg char_type zs_gbuf[BITS]; 124 1.1 mrg } r; /* Read parameters */ 125 1.1 mrg } u; 126 1.1 mrg }; 127 1.1 mrg 128 1.1 mrg static code_int getcode(struct s_zstate *zs); 129 1.1 mrg 130 1.1 mrg static off_t 131 1.3 mrg zuncompress(FILE *in, FILE *out, char *pre, size_t prelen, 132 1.3 mrg off_t *compressed_bytes) 133 1.1 mrg { 134 1.1 mrg off_t bin, bout = 0; 135 1.5 dsl char *buf; 136 1.5 dsl 137 1.5 dsl buf = malloc(BUFSIZE); 138 1.5 dsl if (buf == NULL) 139 1.5 dsl return -1; 140 1.1 mrg 141 1.3 mrg /* XXX */ 142 1.3 mrg compressed_prelen = prelen; 143 1.3 mrg if (prelen != 0) 144 1.3 mrg compressed_pre = pre; 145 1.3 mrg else 146 1.3 mrg compressed_pre = NULL; 147 1.3 mrg 148 1.1 mrg while ((bin = fread(buf, 1, sizeof(buf), in)) != 0) { 149 1.7 lukem if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) { 150 1.5 dsl free(buf); 151 1.4 mrg return -1; 152 1.5 dsl } 153 1.1 mrg bout += bin; 154 1.1 mrg } 155 1.1 mrg 156 1.3 mrg if (compressed_bytes) 157 1.3 mrg *compressed_bytes = total_compressed_bytes; 158 1.3 mrg 159 1.5 dsl free(buf); 160 1.1 mrg return bout; 161 1.1 mrg } 162 1.1 mrg 163 1.5 dsl static int 164 1.5 dsl zclose(void *zs) 165 1.5 dsl { 166 1.5 dsl free(zs); 167 1.5 dsl /* We leave the caller to close the fd passed to zdopen() */ 168 1.5 dsl return 0; 169 1.5 dsl } 170 1.5 dsl 171 1.1 mrg FILE * 172 1.5 dsl zdopen(int fd) 173 1.1 mrg { 174 1.1 mrg struct s_zstate *zs; 175 1.1 mrg 176 1.1 mrg if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) 177 1.1 mrg return (NULL); 178 1.1 mrg 179 1.1 mrg zs->zs_state = S_START; 180 1.1 mrg 181 1.1 mrg /* XXX we can get rid of some of these */ 182 1.1 mrg zs->zs_hsize = HSIZE; /* For dynamic table sizing. */ 183 1.1 mrg zs->zs_free_ent = 0; /* First unused entry. */ 184 1.1 mrg zs->zs_block_compress = BLOCK_MASK; 185 1.1 mrg zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */ 186 1.1 mrg zs->zs_ratio = 0; 187 1.1 mrg zs->zs_checkpoint = CHECK_GAP; 188 1.1 mrg zs->zs_in_count = 1; /* Length of input. */ 189 1.1 mrg zs->zs_out_count = 0; /* # of codes output (for debugging). */ 190 1.1 mrg zs->u.r.zs_roffset = 0; 191 1.1 mrg zs->u.r.zs_size = 0; 192 1.1 mrg 193 1.1 mrg /* 194 1.1 mrg * Layering compress on top of stdio in order to provide buffering, 195 1.1 mrg * and ensure that reads and write work with the data specified. 196 1.1 mrg */ 197 1.5 dsl if ((zs->zs_fp = fdopen(fd, "r")) == NULL) { 198 1.1 mrg free(zs); 199 1.1 mrg return NULL; 200 1.1 mrg } 201 1.1 mrg 202 1.5 dsl return funopen(zs, zread, NULL, NULL, zclose); 203 1.1 mrg } 204 1.1 mrg 205 1.1 mrg /* 206 1.1 mrg * Decompress read. This routine adapts to the codes in the file building 207 1.1 mrg * the "string" table on-the-fly; requiring no table to be stored in the 208 1.1 mrg * compressed file. The tables used herein are shared with those of the 209 1.1 mrg * compress() routine. See the definitions above. 210 1.1 mrg */ 211 1.2 he static int 212 1.1 mrg zread(void *cookie, char *rbp, int num) 213 1.1 mrg { 214 1.3 mrg u_int count, i; 215 1.1 mrg struct s_zstate *zs; 216 1.1 mrg u_char *bp, header[3]; 217 1.1 mrg 218 1.1 mrg if (num == 0) 219 1.1 mrg return (0); 220 1.1 mrg 221 1.1 mrg zs = cookie; 222 1.1 mrg count = num; 223 1.1 mrg bp = (u_char *)rbp; 224 1.1 mrg switch (zs->zs_state) { 225 1.1 mrg case S_START: 226 1.1 mrg zs->zs_state = S_MIDDLE; 227 1.1 mrg break; 228 1.1 mrg case S_MIDDLE: 229 1.1 mrg goto middle; 230 1.1 mrg case S_EOF: 231 1.1 mrg goto eof; 232 1.1 mrg } 233 1.1 mrg 234 1.1 mrg /* Check the magic number */ 235 1.3 mrg for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--) 236 1.3 mrg header[i] = *compressed_pre++; 237 1.3 mrg 238 1.3 mrg if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) != 239 1.3 mrg sizeof(header) - i || 240 1.1 mrg memcmp(header, magic_header, sizeof(magic_header)) != 0) { 241 1.1 mrg errno = EFTYPE; 242 1.1 mrg return (-1); 243 1.1 mrg } 244 1.3 mrg total_compressed_bytes = 0; 245 1.1 mrg zs->zs_maxbits = header[2]; /* Set -b from file. */ 246 1.1 mrg zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK; 247 1.1 mrg zs->zs_maxbits &= BIT_MASK; 248 1.1 mrg zs->zs_maxmaxcode = 1L << zs->zs_maxbits; 249 1.11 joerg if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) { 250 1.1 mrg errno = EFTYPE; 251 1.1 mrg return (-1); 252 1.1 mrg } 253 1.1 mrg /* As above, initialize the first 256 entries in the table. */ 254 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); 255 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) { 256 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0; 257 1.1 mrg tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code; 258 1.1 mrg } 259 1.1 mrg zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256; 260 1.1 mrg 261 1.11 joerg zs->u.r.zs_oldcode = -1; 262 1.1 mrg zs->u.r.zs_stackp = de_stack; 263 1.1 mrg 264 1.1 mrg while ((zs->u.r.zs_code = getcode(zs)) > -1) { 265 1.1 mrg 266 1.1 mrg if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) { 267 1.1 mrg for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; 268 1.1 mrg zs->u.r.zs_code--) 269 1.1 mrg tab_prefixof(zs->u.r.zs_code) = 0; 270 1.1 mrg zs->zs_clear_flg = 1; 271 1.11 joerg zs->zs_free_ent = FIRST; 272 1.11 joerg zs->u.r.zs_oldcode = -1; 273 1.11 joerg continue; 274 1.1 mrg } 275 1.1 mrg zs->u.r.zs_incode = zs->u.r.zs_code; 276 1.1 mrg 277 1.1 mrg /* Special case for KwKwK string. */ 278 1.1 mrg if (zs->u.r.zs_code >= zs->zs_free_ent) { 279 1.11 joerg if (zs->u.r.zs_code > zs->zs_free_ent || 280 1.11 joerg zs->u.r.zs_oldcode == -1) { 281 1.11 joerg /* Bad stream. */ 282 1.11 joerg errno = EINVAL; 283 1.11 joerg return (-1); 284 1.11 joerg } 285 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar; 286 1.1 mrg zs->u.r.zs_code = zs->u.r.zs_oldcode; 287 1.1 mrg } 288 1.11 joerg /* 289 1.11 joerg * The above condition ensures that code < free_ent. 290 1.11 joerg * The construction of tab_prefixof in turn guarantees that 291 1.11 joerg * each iteration decreases code and therefore stack usage is 292 1.11 joerg * bound by 1 << BITS - 256. 293 1.11 joerg */ 294 1.1 mrg 295 1.1 mrg /* Generate output characters in reverse order. */ 296 1.1 mrg while (zs->u.r.zs_code >= 256) { 297 1.1 mrg *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code); 298 1.1 mrg zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code); 299 1.1 mrg } 300 1.1 mrg *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code); 301 1.1 mrg 302 1.1 mrg /* And put them out in forward order. */ 303 1.1 mrg middle: do { 304 1.1 mrg if (count-- == 0) 305 1.1 mrg return (num); 306 1.1 mrg *bp++ = *--zs->u.r.zs_stackp; 307 1.1 mrg } while (zs->u.r.zs_stackp > de_stack); 308 1.1 mrg 309 1.1 mrg /* Generate the new entry. */ 310 1.11 joerg if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode && 311 1.11 joerg zs->u.r.zs_oldcode != -1) { 312 1.1 mrg tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode; 313 1.1 mrg tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar; 314 1.1 mrg zs->zs_free_ent = zs->u.r.zs_code + 1; 315 1.1 mrg } 316 1.1 mrg 317 1.1 mrg /* Remember previous code. */ 318 1.1 mrg zs->u.r.zs_oldcode = zs->u.r.zs_incode; 319 1.1 mrg } 320 1.1 mrg zs->zs_state = S_EOF; 321 1.1 mrg eof: return (num - count); 322 1.1 mrg } 323 1.1 mrg 324 1.1 mrg /*- 325 1.1 mrg * Read one code from the standard input. If EOF, return -1. 326 1.1 mrg * Inputs: 327 1.1 mrg * stdin 328 1.1 mrg * Outputs: 329 1.1 mrg * code or -1 is returned. 330 1.1 mrg */ 331 1.1 mrg static code_int 332 1.1 mrg getcode(struct s_zstate *zs) 333 1.1 mrg { 334 1.1 mrg code_int gcode; 335 1.3 mrg int r_off, bits, i; 336 1.1 mrg char_type *bp; 337 1.1 mrg 338 1.1 mrg bp = zs->u.r.zs_gbuf; 339 1.1 mrg if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size || 340 1.1 mrg zs->zs_free_ent > zs->zs_maxcode) { 341 1.1 mrg /* 342 1.1 mrg * If the next entry will be too big for the current gcode 343 1.1 mrg * size, then we must increase the size. This implies reading 344 1.1 mrg * a new buffer full, too. 345 1.1 mrg */ 346 1.1 mrg if (zs->zs_free_ent > zs->zs_maxcode) { 347 1.1 mrg zs->zs_n_bits++; 348 1.1 mrg if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */ 349 1.1 mrg zs->zs_maxcode = zs->zs_maxmaxcode; 350 1.1 mrg else 351 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits); 352 1.1 mrg } 353 1.1 mrg if (zs->zs_clear_flg > 0) { 354 1.1 mrg zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); 355 1.1 mrg zs->zs_clear_flg = 0; 356 1.1 mrg } 357 1.3 mrg /* XXX */ 358 1.3 mrg for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--) 359 1.3 mrg zs->u.r.zs_gbuf[i] = *compressed_pre++; 360 1.3 mrg zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp); 361 1.3 mrg zs->u.r.zs_size += i; 362 1.1 mrg if (zs->u.r.zs_size <= 0) /* End of file. */ 363 1.1 mrg return (-1); 364 1.1 mrg zs->u.r.zs_roffset = 0; 365 1.3 mrg 366 1.3 mrg total_compressed_bytes += zs->u.r.zs_size; 367 1.3 mrg 368 1.1 mrg /* Round size down to integral number of codes. */ 369 1.1 mrg zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1); 370 1.1 mrg } 371 1.1 mrg r_off = zs->u.r.zs_roffset; 372 1.1 mrg bits = zs->zs_n_bits; 373 1.1 mrg 374 1.1 mrg /* Get to the first byte. */ 375 1.1 mrg bp += (r_off >> 3); 376 1.1 mrg r_off &= 7; 377 1.1 mrg 378 1.1 mrg /* Get first part (low order bits). */ 379 1.1 mrg gcode = (*bp++ >> r_off); 380 1.1 mrg bits -= (8 - r_off); 381 1.1 mrg r_off = 8 - r_off; /* Now, roffset into gcode word. */ 382 1.1 mrg 383 1.1 mrg /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 384 1.1 mrg if (bits >= 8) { 385 1.1 mrg gcode |= *bp++ << r_off; 386 1.1 mrg r_off += 8; 387 1.1 mrg bits -= 8; 388 1.1 mrg } 389 1.1 mrg 390 1.1 mrg /* High order bits. */ 391 1.1 mrg gcode |= (*bp & rmask[bits]) << r_off; 392 1.1 mrg zs->u.r.zs_roffset += zs->zs_n_bits; 393 1.1 mrg 394 1.1 mrg return (gcode); 395 1.1 mrg } 396