Home | History | Annotate | Line # | Download | only in examples
      1      1.1  christos /* gzjoin -- command to join gzip files into one gzip file
      2      1.1  christos 
      3  1.1.1.2  christos   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
      4  1.1.1.2  christos   version 1.2, 14 Aug 2012
      5      1.1  christos 
      6      1.1  christos   This software is provided 'as-is', without any express or implied
      7      1.1  christos   warranty.  In no event will the author be held liable for any damages
      8      1.1  christos   arising from the use of this software.
      9      1.1  christos 
     10      1.1  christos   Permission is granted to anyone to use this software for any purpose,
     11      1.1  christos   including commercial applications, and to alter it and redistribute it
     12      1.1  christos   freely, subject to the following restrictions:
     13      1.1  christos 
     14      1.1  christos   1. The origin of this software must not be misrepresented; you must not
     15      1.1  christos      claim that you wrote the original software. If you use this software
     16      1.1  christos      in a product, an acknowledgment in the product documentation would be
     17      1.1  christos      appreciated but is not required.
     18      1.1  christos   2. Altered source versions must be plainly marked as such, and must not be
     19      1.1  christos      misrepresented as being the original software.
     20      1.1  christos   3. This notice may not be removed or altered from any source distribution.
     21      1.1  christos 
     22      1.1  christos   Mark Adler    madler (at) alumni.caltech.edu
     23      1.1  christos  */
     24      1.1  christos 
     25      1.1  christos /*
     26      1.1  christos  * Change history:
     27      1.1  christos  *
     28      1.1  christos  * 1.0  11 Dec 2004     - First version
     29      1.1  christos  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
     30  1.1.1.2  christos  * 1.2  14 Aug 2012     - Clean up for z_const usage
     31      1.1  christos  */
     32      1.1  christos 
     33      1.1  christos /*
     34      1.1  christos    gzjoin takes one or more gzip files on the command line and writes out a
     35      1.1  christos    single gzip file that will uncompress to the concatenation of the
     36      1.1  christos    uncompressed data from the individual gzip files.  gzjoin does this without
     37      1.1  christos    having to recompress any of the data and without having to calculate a new
     38      1.1  christos    crc32 for the concatenated uncompressed data.  gzjoin does however have to
     39      1.1  christos    decompress all of the input data in order to find the bits in the compressed
     40      1.1  christos    data that need to be modified to concatenate the streams.
     41      1.1  christos 
     42      1.1  christos    gzjoin does not do an integrity check on the input gzip files other than
     43      1.1  christos    checking the gzip header and decompressing the compressed data.  They are
     44      1.1  christos    otherwise assumed to be complete and correct.
     45      1.1  christos 
     46      1.1  christos    Each joint between gzip files removes at least 18 bytes of previous trailer
     47      1.1  christos    and subsequent header, and inserts an average of about three bytes to the
     48      1.1  christos    compressed data in order to connect the streams.  The output gzip file
     49      1.1  christos    has a minimal ten-byte gzip header with no file name or modification time.
     50      1.1  christos 
     51      1.1  christos    This program was written to illustrate the use of the Z_BLOCK option of
     52      1.1  christos    inflate() and the crc32_combine() function.  gzjoin will not compile with
     53      1.1  christos    versions of zlib earlier than 1.2.3.
     54      1.1  christos  */
     55      1.1  christos 
     56      1.1  christos #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
     57      1.1  christos #include <stdlib.h>     /* exit(), malloc(), free() */
     58      1.1  christos #include <fcntl.h>      /* open() */
     59      1.1  christos #include <unistd.h>     /* close(), read(), lseek() */
     60      1.1  christos #include "zlib.h"
     61      1.1  christos     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
     62      1.1  christos 
     63      1.1  christos #define local static
     64      1.1  christos 
     65      1.1  christos /* exit with an error (return a value to allow use in an expression) */
     66      1.1  christos local int bail(char *why1, char *why2)
     67      1.1  christos {
     68      1.1  christos     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
     69      1.1  christos     exit(1);
     70      1.1  christos     return 0;
     71      1.1  christos }
     72      1.1  christos 
     73      1.1  christos /* -- simple buffered file input with access to the buffer -- */
     74      1.1  christos 
     75      1.1  christos #define CHUNK 32768         /* must be a power of two and fit in unsigned */
     76      1.1  christos 
     77      1.1  christos /* bin buffered input file type */
     78      1.1  christos typedef struct {
     79      1.1  christos     char *name;             /* name of file for error messages */
     80      1.1  christos     int fd;                 /* file descriptor */
     81      1.1  christos     unsigned left;          /* bytes remaining at next */
     82      1.1  christos     unsigned char *next;    /* next byte to read */
     83      1.1  christos     unsigned char *buf;     /* allocated buffer of length CHUNK */
     84      1.1  christos } bin;
     85      1.1  christos 
     86      1.1  christos /* close a buffered file and free allocated memory */
     87      1.1  christos local void bclose(bin *in)
     88      1.1  christos {
     89      1.1  christos     if (in != NULL) {
     90      1.1  christos         if (in->fd != -1)
     91      1.1  christos             close(in->fd);
     92      1.1  christos         if (in->buf != NULL)
     93      1.1  christos             free(in->buf);
     94      1.1  christos         free(in);
     95      1.1  christos     }
     96      1.1  christos }
     97      1.1  christos 
     98      1.1  christos /* open a buffered file for input, return a pointer to type bin, or NULL on
     99      1.1  christos    failure */
    100      1.1  christos local bin *bopen(char *name)
    101      1.1  christos {
    102      1.1  christos     bin *in;
    103      1.1  christos 
    104      1.1  christos     in = malloc(sizeof(bin));
    105      1.1  christos     if (in == NULL)
    106      1.1  christos         return NULL;
    107      1.1  christos     in->buf = malloc(CHUNK);
    108      1.1  christos     in->fd = open(name, O_RDONLY, 0);
    109      1.1  christos     if (in->buf == NULL || in->fd == -1) {
    110      1.1  christos         bclose(in);
    111      1.1  christos         return NULL;
    112      1.1  christos     }
    113      1.1  christos     in->left = 0;
    114      1.1  christos     in->next = in->buf;
    115      1.1  christos     in->name = name;
    116      1.1  christos     return in;
    117      1.1  christos }
    118      1.1  christos 
    119      1.1  christos /* load buffer from file, return -1 on read error, 0 or 1 on success, with
    120      1.1  christos    1 indicating that end-of-file was reached */
    121      1.1  christos local int bload(bin *in)
    122      1.1  christos {
    123      1.1  christos     long len;
    124      1.1  christos 
    125      1.1  christos     if (in == NULL)
    126      1.1  christos         return -1;
    127      1.1  christos     if (in->left != 0)
    128      1.1  christos         return 0;
    129      1.1  christos     in->next = in->buf;
    130      1.1  christos     do {
    131      1.1  christos         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
    132      1.1  christos         if (len < 0)
    133      1.1  christos             return -1;
    134      1.1  christos         in->left += (unsigned)len;
    135      1.1  christos     } while (len != 0 && in->left < CHUNK);
    136      1.1  christos     return len == 0 ? 1 : 0;
    137      1.1  christos }
    138      1.1  christos 
    139      1.1  christos /* get a byte from the file, bail if end of file */
    140      1.1  christos #define bget(in) (in->left ? 0 : bload(in), \
    141      1.1  christos                   in->left ? (in->left--, *(in->next)++) : \
    142      1.1  christos                     bail("unexpected end of file on ", in->name))
    143      1.1  christos 
    144      1.1  christos /* get a four-byte little-endian unsigned integer from file */
    145      1.1  christos local unsigned long bget4(bin *in)
    146      1.1  christos {
    147      1.1  christos     unsigned long val;
    148      1.1  christos 
    149      1.1  christos     val = bget(in);
    150      1.1  christos     val += (unsigned long)(bget(in)) << 8;
    151      1.1  christos     val += (unsigned long)(bget(in)) << 16;
    152      1.1  christos     val += (unsigned long)(bget(in)) << 24;
    153      1.1  christos     return val;
    154      1.1  christos }
    155      1.1  christos 
    156      1.1  christos /* skip bytes in file */
    157      1.1  christos local void bskip(bin *in, unsigned skip)
    158      1.1  christos {
    159      1.1  christos     /* check pointer */
    160      1.1  christos     if (in == NULL)
    161      1.1  christos         return;
    162      1.1  christos 
    163      1.1  christos     /* easy case -- skip bytes in buffer */
    164      1.1  christos     if (skip <= in->left) {
    165      1.1  christos         in->left -= skip;
    166      1.1  christos         in->next += skip;
    167      1.1  christos         return;
    168      1.1  christos     }
    169      1.1  christos 
    170      1.1  christos     /* skip what's in buffer, discard buffer contents */
    171      1.1  christos     skip -= in->left;
    172      1.1  christos     in->left = 0;
    173      1.1  christos 
    174      1.1  christos     /* seek past multiples of CHUNK bytes */
    175      1.1  christos     if (skip > CHUNK) {
    176      1.1  christos         unsigned left;
    177      1.1  christos 
    178      1.1  christos         left = skip & (CHUNK - 1);
    179      1.1  christos         if (left == 0) {
    180      1.1  christos             /* exact number of chunks: seek all the way minus one byte to check
    181      1.1  christos                for end-of-file with a read */
    182      1.1  christos             lseek(in->fd, skip - 1, SEEK_CUR);
    183      1.1  christos             if (read(in->fd, in->buf, 1) != 1)
    184      1.1  christos                 bail("unexpected end of file on ", in->name);
    185      1.1  christos             return;
    186      1.1  christos         }
    187      1.1  christos 
    188      1.1  christos         /* skip the integral chunks, update skip with remainder */
    189      1.1  christos         lseek(in->fd, skip - left, SEEK_CUR);
    190      1.1  christos         skip = left;
    191      1.1  christos     }
    192      1.1  christos 
    193      1.1  christos     /* read more input and skip remainder */
    194      1.1  christos     bload(in);
    195      1.1  christos     if (skip > in->left)
    196      1.1  christos         bail("unexpected end of file on ", in->name);
    197      1.1  christos     in->left -= skip;
    198      1.1  christos     in->next += skip;
    199      1.1  christos }
    200      1.1  christos 
    201      1.1  christos /* -- end of buffered input functions -- */
    202      1.1  christos 
    203      1.1  christos /* skip the gzip header from file in */
    204      1.1  christos local void gzhead(bin *in)
    205      1.1  christos {
    206      1.1  christos     int flags;
    207      1.1  christos 
    208      1.1  christos     /* verify gzip magic header and compression method */
    209      1.1  christos     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
    210      1.1  christos         bail(in->name, " is not a valid gzip file");
    211      1.1  christos 
    212      1.1  christos     /* get and verify flags */
    213      1.1  christos     flags = bget(in);
    214      1.1  christos     if ((flags & 0xe0) != 0)
    215      1.1  christos         bail("unknown reserved bits set in ", in->name);
    216      1.1  christos 
    217      1.1  christos     /* skip modification time, extra flags, and os */
    218      1.1  christos     bskip(in, 6);
    219      1.1  christos 
    220      1.1  christos     /* skip extra field if present */
    221      1.1  christos     if (flags & 4) {
    222      1.1  christos         unsigned len;
    223      1.1  christos 
    224      1.1  christos         len = bget(in);
    225      1.1  christos         len += (unsigned)(bget(in)) << 8;
    226      1.1  christos         bskip(in, len);
    227      1.1  christos     }
    228      1.1  christos 
    229      1.1  christos     /* skip file name if present */
    230      1.1  christos     if (flags & 8)
    231      1.1  christos         while (bget(in) != 0)
    232      1.1  christos             ;
    233      1.1  christos 
    234      1.1  christos     /* skip comment if present */
    235      1.1  christos     if (flags & 16)
    236      1.1  christos         while (bget(in) != 0)
    237      1.1  christos             ;
    238      1.1  christos 
    239      1.1  christos     /* skip header crc if present */
    240      1.1  christos     if (flags & 2)
    241      1.1  christos         bskip(in, 2);
    242      1.1  christos }
    243      1.1  christos 
    244      1.1  christos /* write a four-byte little-endian unsigned integer to out */
    245      1.1  christos local void put4(unsigned long val, FILE *out)
    246      1.1  christos {
    247      1.1  christos     putc(val & 0xff, out);
    248      1.1  christos     putc((val >> 8) & 0xff, out);
    249      1.1  christos     putc((val >> 16) & 0xff, out);
    250      1.1  christos     putc((val >> 24) & 0xff, out);
    251      1.1  christos }
    252      1.1  christos 
    253      1.1  christos /* Load up zlib stream from buffered input, bail if end of file */
    254      1.1  christos local void zpull(z_streamp strm, bin *in)
    255      1.1  christos {
    256      1.1  christos     if (in->left == 0)
    257      1.1  christos         bload(in);
    258      1.1  christos     if (in->left == 0)
    259      1.1  christos         bail("unexpected end of file on ", in->name);
    260      1.1  christos     strm->avail_in = in->left;
    261      1.1  christos     strm->next_in = in->next;
    262      1.1  christos }
    263      1.1  christos 
    264      1.1  christos /* Write header for gzip file to out and initialize trailer. */
    265      1.1  christos local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
    266      1.1  christos {
    267      1.1  christos     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
    268      1.1  christos     *crc = crc32(0L, Z_NULL, 0);
    269      1.1  christos     *tot = 0;
    270      1.1  christos }
    271      1.1  christos 
    272      1.1  christos /* Copy the compressed data from name, zeroing the last block bit of the last
    273      1.1  christos    block if clr is true, and adding empty blocks as needed to get to a byte
    274      1.1  christos    boundary.  If clr is false, then the last block becomes the last block of
    275      1.1  christos    the output, and the gzip trailer is written.  crc and tot maintains the
    276      1.1  christos    crc and length (modulo 2^32) of the output for the trailer.  The resulting
    277      1.1  christos    gzip file is written to out.  gzinit() must be called before the first call
    278      1.1  christos    of gzcopy() to write the gzip header and to initialize crc and tot. */
    279      1.1  christos local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
    280      1.1  christos                   FILE *out)
    281      1.1  christos {
    282      1.1  christos     int ret;                /* return value from zlib functions */
    283      1.1  christos     int pos;                /* where the "last block" bit is in byte */
    284      1.1  christos     int last;               /* true if processing the last block */
    285      1.1  christos     bin *in;                /* buffered input file */
    286      1.1  christos     unsigned char *start;   /* start of compressed data in buffer */
    287      1.1  christos     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
    288      1.1  christos     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
    289      1.1  christos     z_stream strm;          /* zlib inflate stream */
    290      1.1  christos 
    291      1.1  christos     /* open gzip file and skip header */
    292      1.1  christos     in = bopen(name);
    293      1.1  christos     if (in == NULL)
    294      1.1  christos         bail("could not open ", name);
    295      1.1  christos     gzhead(in);
    296      1.1  christos 
    297      1.1  christos     /* allocate buffer for uncompressed data and initialize raw inflate
    298      1.1  christos        stream */
    299      1.1  christos     junk = malloc(CHUNK);
    300      1.1  christos     strm.zalloc = Z_NULL;
    301      1.1  christos     strm.zfree = Z_NULL;
    302      1.1  christos     strm.opaque = Z_NULL;
    303      1.1  christos     strm.avail_in = 0;
    304      1.1  christos     strm.next_in = Z_NULL;
    305      1.1  christos     ret = inflateInit2(&strm, -15);
    306      1.1  christos     if (junk == NULL || ret != Z_OK)
    307      1.1  christos         bail("out of memory", "");
    308      1.1  christos 
    309      1.1  christos     /* inflate and copy compressed data, clear last-block bit if requested */
    310      1.1  christos     len = 0;
    311      1.1  christos     zpull(&strm, in);
    312  1.1.1.2  christos     start = in->next;
    313      1.1  christos     last = start[0] & 1;
    314      1.1  christos     if (last && clr)
    315      1.1  christos         start[0] &= ~1;
    316      1.1  christos     strm.avail_out = 0;
    317      1.1  christos     for (;;) {
    318      1.1  christos         /* if input used and output done, write used input and get more */
    319      1.1  christos         if (strm.avail_in == 0 && strm.avail_out != 0) {
    320      1.1  christos             fwrite(start, 1, strm.next_in - start, out);
    321      1.1  christos             start = in->buf;
    322      1.1  christos             in->left = 0;
    323      1.1  christos             zpull(&strm, in);
    324      1.1  christos         }
    325      1.1  christos 
    326      1.1  christos         /* decompress -- return early when end-of-block reached */
    327      1.1  christos         strm.avail_out = CHUNK;
    328      1.1  christos         strm.next_out = junk;
    329      1.1  christos         ret = inflate(&strm, Z_BLOCK);
    330      1.1  christos         switch (ret) {
    331      1.1  christos         case Z_MEM_ERROR:
    332      1.1  christos             bail("out of memory", "");
    333      1.1  christos         case Z_DATA_ERROR:
    334      1.1  christos             bail("invalid compressed data in ", in->name);
    335      1.1  christos         }
    336      1.1  christos 
    337      1.1  christos         /* update length of uncompressed data */
    338      1.1  christos         len += CHUNK - strm.avail_out;
    339      1.1  christos 
    340      1.1  christos         /* check for block boundary (only get this when block copied out) */
    341      1.1  christos         if (strm.data_type & 128) {
    342      1.1  christos             /* if that was the last block, then done */
    343      1.1  christos             if (last)
    344      1.1  christos                 break;
    345      1.1  christos 
    346      1.1  christos             /* number of unused bits in last byte */
    347      1.1  christos             pos = strm.data_type & 7;
    348      1.1  christos 
    349      1.1  christos             /* find the next last-block bit */
    350      1.1  christos             if (pos != 0) {
    351      1.1  christos                 /* next last-block bit is in last used byte */
    352      1.1  christos                 pos = 0x100 >> pos;
    353      1.1  christos                 last = strm.next_in[-1] & pos;
    354      1.1  christos                 if (last && clr)
    355  1.1.1.2  christos                     in->buf[strm.next_in - in->buf - 1] &= ~pos;
    356      1.1  christos             }
    357      1.1  christos             else {
    358      1.1  christos                 /* next last-block bit is in next unused byte */
    359      1.1  christos                 if (strm.avail_in == 0) {
    360      1.1  christos                     /* don't have that byte yet -- get it */
    361      1.1  christos                     fwrite(start, 1, strm.next_in - start, out);
    362      1.1  christos                     start = in->buf;
    363      1.1  christos                     in->left = 0;
    364      1.1  christos                     zpull(&strm, in);
    365      1.1  christos                 }
    366      1.1  christos                 last = strm.next_in[0] & 1;
    367      1.1  christos                 if (last && clr)
    368  1.1.1.2  christos                     in->buf[strm.next_in - in->buf] &= ~1;
    369      1.1  christos             }
    370      1.1  christos         }
    371      1.1  christos     }
    372      1.1  christos 
    373      1.1  christos     /* update buffer with unused input */
    374      1.1  christos     in->left = strm.avail_in;
    375  1.1.1.2  christos     in->next = in->buf + (strm.next_in - in->buf);
    376      1.1  christos 
    377      1.1  christos     /* copy used input, write empty blocks to get to byte boundary */
    378      1.1  christos     pos = strm.data_type & 7;
    379      1.1  christos     fwrite(start, 1, in->next - start - 1, out);
    380      1.1  christos     last = in->next[-1];
    381      1.1  christos     if (pos == 0 || !clr)
    382      1.1  christos         /* already at byte boundary, or last file: write last byte */
    383      1.1  christos         putc(last, out);
    384      1.1  christos     else {
    385      1.1  christos         /* append empty blocks to last byte */
    386      1.1  christos         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
    387      1.1  christos         if (pos & 1) {
    388      1.1  christos             /* odd -- append an empty stored block */
    389      1.1  christos             putc(last, out);
    390      1.1  christos             if (pos == 1)
    391      1.1  christos                 putc(0, out);               /* two more bits in block header */
    392      1.1  christos             fwrite("\0\0\xff\xff", 1, 4, out);
    393      1.1  christos         }
    394      1.1  christos         else {
    395      1.1  christos             /* even -- append 1, 2, or 3 empty fixed blocks */
    396      1.1  christos             switch (pos) {
    397      1.1  christos             case 6:
    398      1.1  christos                 putc(last | 8, out);
    399      1.1  christos                 last = 0;
    400      1.1  christos             case 4:
    401      1.1  christos                 putc(last | 0x20, out);
    402      1.1  christos                 last = 0;
    403      1.1  christos             case 2:
    404      1.1  christos                 putc(last | 0x80, out);
    405      1.1  christos                 putc(0, out);
    406      1.1  christos             }
    407      1.1  christos         }
    408      1.1  christos     }
    409      1.1  christos 
    410      1.1  christos     /* update crc and tot */
    411      1.1  christos     *crc = crc32_combine(*crc, bget4(in), len);
    412      1.1  christos     *tot += (unsigned long)len;
    413      1.1  christos 
    414      1.1  christos     /* clean up */
    415      1.1  christos     inflateEnd(&strm);
    416      1.1  christos     free(junk);
    417      1.1  christos     bclose(in);
    418      1.1  christos 
    419      1.1  christos     /* write trailer if this is the last gzip file */
    420      1.1  christos     if (!clr) {
    421      1.1  christos         put4(*crc, out);
    422      1.1  christos         put4(*tot, out);
    423      1.1  christos     }
    424      1.1  christos }
    425      1.1  christos 
    426      1.1  christos /* join the gzip files on the command line, write result to stdout */
    427      1.1  christos int main(int argc, char **argv)
    428      1.1  christos {
    429      1.1  christos     unsigned long crc, tot;     /* running crc and total uncompressed length */
    430      1.1  christos 
    431      1.1  christos     /* skip command name */
    432      1.1  christos     argc--;
    433      1.1  christos     argv++;
    434      1.1  christos 
    435      1.1  christos     /* show usage if no arguments */
    436      1.1  christos     if (argc == 0) {
    437      1.1  christos         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
    438      1.1  christos               stderr);
    439      1.1  christos         return 0;
    440      1.1  christos     }
    441      1.1  christos 
    442      1.1  christos     /* join gzip files on command line and write to stdout */
    443      1.1  christos     gzinit(&crc, &tot, stdout);
    444      1.1  christos     while (argc--)
    445      1.1  christos         gzcopy(*argv++, argc, &crc, &tot, stdout);
    446      1.1  christos 
    447      1.1  christos     /* done */
    448      1.1  christos     return 0;
    449      1.1  christos }
    450