Home | History | Annotate | Line # | Download | only in zlib
      1 /* gzread.c -- zlib functions for reading gzip files
      2  * Copyright (C) 2004-2017 Mark Adler
      3  * For conditions of distribution and use, see copyright notice in zlib.h
      4  */
      5 
      6 #include "gzguts.h"
      7 
      8 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
      9    state->fd, and update state->eof, state->err, and state->msg as appropriate.
     10    This function needs to loop on read(), since read() is not guaranteed to
     11    read the number of bytes requested, depending on the type of descriptor. */
     12 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
     13                   unsigned *have) {
     14     int ret;
     15     unsigned get, max = ((unsigned)-1 >> 2) + 1;
     16 
     17     *have = 0;
     18     do {
     19         get = len - *have;
     20         if (get > max)
     21             get = max;
     22         ret = read(state->fd, buf + *have, get);
     23         if (ret <= 0)
     24             break;
     25         *have += (unsigned)ret;
     26     } while (*have < len);
     27     if (ret < 0) {
     28         gz_error(state, Z_ERRNO, zstrerror());
     29         return -1;
     30     }
     31     if (ret == 0)
     32         state->eof = 1;
     33     return 0;
     34 }
     35 
     36 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
     37    error, 0 otherwise.  Note that the eof flag is set when the end of the input
     38    file is reached, even though there may be unused data in the buffer.  Once
     39    that data has been used, no more attempts will be made to read the file.
     40    If strm->avail_in != 0, then the current data is moved to the beginning of
     41    the input buffer, and then the remainder of the buffer is loaded with the
     42    available data from the input file. */
     43 local int gz_avail(gz_statep state) {
     44     unsigned got;
     45     z_streamp strm = &(state->strm);
     46 
     47     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
     48         return -1;
     49     if (state->eof == 0) {
     50         if (strm->avail_in) {       /* copy what's there to the start */
     51             unsigned char *p = state->in;
     52             unsigned const char *q = strm->next_in;
     53             unsigned n = strm->avail_in;
     54             do {
     55                 *p++ = *q++;
     56             } while (--n);
     57         }
     58         if (gz_load(state, state->in + strm->avail_in,
     59                     state->size - strm->avail_in, &got) == -1)
     60             return -1;
     61         strm->avail_in += got;
     62         strm->next_in = state->in;
     63     }
     64     return 0;
     65 }
     66 
     67 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
     68    If this is the first time in, allocate required memory.  state->how will be
     69    left unchanged if there is no more input data available, will be set to COPY
     70    if there is no gzip header and direct copying will be performed, or it will
     71    be set to GZIP for decompression.  If direct copying, then leftover input
     72    data from the input buffer will be copied to the output buffer.  In that
     73    case, all further file reads will be directly to either the output buffer or
     74    a user buffer.  If decompressing, the inflate state will be initialized.
     75    gz_look() will return 0 on success or -1 on failure. */
     76 local int gz_look(gz_statep state) {
     77     z_streamp strm = &(state->strm);
     78 
     79     /* allocate read buffers and inflate memory */
     80     if (state->size == 0) {
     81         /* allocate buffers */
     82         state->in = (unsigned char *)malloc(state->want);
     83         state->out = (unsigned char *)malloc(state->want << 1);
     84         if (state->in == NULL || state->out == NULL) {
     85             free(state->out);
     86             free(state->in);
     87             gz_error(state, Z_MEM_ERROR, "out of memory");
     88             return -1;
     89         }
     90         state->size = state->want;
     91 
     92         /* allocate inflate memory */
     93         state->strm.zalloc = Z_NULL;
     94         state->strm.zfree = Z_NULL;
     95         state->strm.opaque = Z_NULL;
     96         state->strm.avail_in = 0;
     97         state->strm.next_in = Z_NULL;
     98         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
     99             free(state->out);
    100             free(state->in);
    101             state->size = 0;
    102             gz_error(state, Z_MEM_ERROR, "out of memory");
    103             return -1;
    104         }
    105     }
    106 
    107     /* get at least the magic bytes in the input buffer */
    108     if (strm->avail_in < 2) {
    109         if (gz_avail(state) == -1)
    110             return -1;
    111         if (strm->avail_in == 0)
    112             return 0;
    113     }
    114 
    115     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
    116        a logical dilemma here when considering the case of a partially written
    117        gzip file, to wit, if a single 31 byte is written, then we cannot tell
    118        whether this is a single-byte file, or just a partially written gzip
    119        file -- for here we assume that if a gzip file is being written, then
    120        the header will be written in a single operation, so that reading a
    121        single byte is sufficient indication that it is not a gzip file) */
    122     if (strm->avail_in > 1 &&
    123             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
    124         inflateReset(strm);
    125         state->how = GZIP;
    126         state->direct = 0;
    127         return 0;
    128     }
    129 
    130     /* no gzip header -- if we were decoding gzip before, then this is trailing
    131        garbage.  Ignore the trailing garbage and finish. */
    132     if (state->direct == 0) {
    133         strm->avail_in = 0;
    134         state->eof = 1;
    135         state->x.have = 0;
    136         return 0;
    137     }
    138 
    139     /* doing raw i/o, copy any leftover input to output -- this assumes that
    140        the output buffer is larger than the input buffer, which also assures
    141        space for gzungetc() */
    142     state->x.next = state->out;
    143     memcpy(state->x.next, strm->next_in, strm->avail_in);
    144     state->x.have = strm->avail_in;
    145     strm->avail_in = 0;
    146     state->how = COPY;
    147     state->direct = 1;
    148     return 0;
    149 }
    150 
    151 /* Decompress from input to the provided next_out and avail_out in the state.
    152    On return, state->x.have and state->x.next point to the just decompressed
    153    data.  If the gzip stream completes, state->how is reset to LOOK to look for
    154    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
    155    on success, -1 on failure. */
    156 local int gz_decomp(gz_statep state) {
    157     int ret = Z_OK;
    158     unsigned had;
    159     z_streamp strm = &(state->strm);
    160 
    161     /* fill output buffer up to end of deflate stream */
    162     had = strm->avail_out;
    163     do {
    164         /* get more input for inflate() */
    165         if (strm->avail_in == 0 && gz_avail(state) == -1)
    166             return -1;
    167         if (strm->avail_in == 0) {
    168             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
    169             break;
    170         }
    171 
    172         /* decompress and handle errors */
    173         ret = inflate(strm, Z_NO_FLUSH);
    174         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
    175             gz_error(state, Z_STREAM_ERROR,
    176                      "internal error: inflate stream corrupt");
    177             return -1;
    178         }
    179         if (ret == Z_MEM_ERROR) {
    180             gz_error(state, Z_MEM_ERROR, "out of memory");
    181             return -1;
    182         }
    183         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
    184             gz_error(state, Z_DATA_ERROR,
    185                      strm->msg == NULL ? "compressed data error" : strm->msg);
    186             return -1;
    187         }
    188     } while (strm->avail_out && ret != Z_STREAM_END);
    189 
    190     /* update available output */
    191     state->x.have = had - strm->avail_out;
    192     state->x.next = strm->next_out - state->x.have;
    193 
    194     /* if the gzip stream completed successfully, look for another */
    195     if (ret == Z_STREAM_END)
    196         state->how = LOOK;
    197 
    198     /* good decompression */
    199     return 0;
    200 }
    201 
    202 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
    203    Data is either copied from the input file or decompressed from the input
    204    file depending on state->how.  If state->how is LOOK, then a gzip header is
    205    looked for to determine whether to copy or decompress.  Returns -1 on error,
    206    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
    207    end of the input file has been reached and all data has been processed.  */
    208 local int gz_fetch(gz_statep state) {
    209     z_streamp strm = &(state->strm);
    210 
    211     do {
    212         switch(state->how) {
    213         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
    214             if (gz_look(state) == -1)
    215                 return -1;
    216             if (state->how == LOOK)
    217                 return 0;
    218             break;
    219         case COPY:      /* -> COPY */
    220             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
    221                     == -1)
    222                 return -1;
    223             state->x.next = state->out;
    224             return 0;
    225         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
    226             strm->avail_out = state->size << 1;
    227             strm->next_out = state->out;
    228             if (gz_decomp(state) == -1)
    229                 return -1;
    230         }
    231     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
    232     return 0;
    233 }
    234 
    235 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
    236 local int gz_skip(gz_statep state, z_off64_t len) {
    237     unsigned n;
    238 
    239     /* skip over len bytes or reach end-of-file, whichever comes first */
    240     while (len)
    241         /* skip over whatever is in output buffer */
    242         if (state->x.have) {
    243             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
    244                 (unsigned)len : state->x.have;
    245             state->x.have -= n;
    246             state->x.next += n;
    247             state->x.pos += n;
    248             len -= n;
    249         }
    250 
    251         /* output buffer empty -- return if we're at the end of the input */
    252         else if (state->eof && state->strm.avail_in == 0)
    253             break;
    254 
    255         /* need more data to skip -- load up output buffer */
    256         else {
    257             /* get more output, looking for header if required */
    258             if (gz_fetch(state) == -1)
    259                 return -1;
    260         }
    261     return 0;
    262 }
    263 
    264 /* Read len bytes into buf from file, or less than len up to the end of the
    265    input.  Return the number of bytes read.  If zero is returned, either the
    266    end of file was reached, or there was an error.  state->err must be
    267    consulted in that case to determine which. */
    268 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
    269     z_size_t got;
    270     unsigned n;
    271 
    272     /* if len is zero, avoid unnecessary operations */
    273     if (len == 0)
    274         return 0;
    275 
    276     /* process a skip request */
    277     if (state->seek) {
    278         state->seek = 0;
    279         if (gz_skip(state, state->skip) == -1)
    280             return 0;
    281     }
    282 
    283     /* get len bytes to buf, or less than len if at the end */
    284     got = 0;
    285     do {
    286         /* set n to the maximum amount of len that fits in an unsigned int */
    287         n = (unsigned)-1;
    288         if (n > len)
    289             n = (unsigned)len;
    290 
    291         /* first just try copying data from the output buffer */
    292         if (state->x.have) {
    293             if (state->x.have < n)
    294                 n = state->x.have;
    295             memcpy(buf, state->x.next, n);
    296             state->x.next += n;
    297             state->x.have -= n;
    298         }
    299 
    300         /* output buffer empty -- return if we're at the end of the input */
    301         else if (state->eof && state->strm.avail_in == 0) {
    302             state->past = 1;        /* tried to read past end */
    303             break;
    304         }
    305 
    306         /* need output data -- for small len or new stream load up our output
    307            buffer */
    308         else if (state->how == LOOK || n < (state->size << 1)) {
    309             /* get more output, looking for header if required */
    310             if (gz_fetch(state) == -1)
    311                 return 0;
    312             continue;       /* no progress yet -- go back to copy above */
    313             /* the copy above assures that we will leave with space in the
    314                output buffer, allowing at least one gzungetc() to succeed */
    315         }
    316 
    317         /* large len -- read directly into user buffer */
    318         else if (state->how == COPY) {      /* read directly */
    319             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
    320                 return 0;
    321         }
    322 
    323         /* large len -- decompress directly into user buffer */
    324         else {  /* state->how == GZIP */
    325             state->strm.avail_out = n;
    326             state->strm.next_out = (unsigned char *)buf;
    327             if (gz_decomp(state) == -1)
    328                 return 0;
    329             n = state->x.have;
    330             state->x.have = 0;
    331         }
    332 
    333         /* update progress */
    334         len -= n;
    335         buf = (char *)buf + n;
    336         got += n;
    337         state->x.pos += n;
    338     } while (len);
    339 
    340     /* return number of bytes read into user buffer */
    341     return got;
    342 }
    343 
    344 /* -- see zlib.h -- */
    345 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
    346     gz_statep state;
    347 
    348     /* get internal structure */
    349     if (file == NULL)
    350         return -1;
    351     state = (gz_statep)file;
    352 
    353     /* check that we're reading and that there's no (serious) error */
    354     if (state->mode != GZ_READ ||
    355             (state->err != Z_OK && state->err != Z_BUF_ERROR))
    356         return -1;
    357 
    358     /* since an int is returned, make sure len fits in one, otherwise return
    359        with an error (this avoids a flaw in the interface) */
    360     if ((int)len < 0) {
    361         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
    362         return -1;
    363     }
    364 
    365     /* read len or fewer bytes to buf */
    366     len = (unsigned)gz_read(state, buf, len);
    367 
    368     /* check for an error */
    369     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
    370         return -1;
    371 
    372     /* return the number of bytes read (this is assured to fit in an int) */
    373     return (int)len;
    374 }
    375 
    376 /* -- see zlib.h -- */
    377 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
    378     z_size_t len;
    379     gz_statep state;
    380 
    381     /* get internal structure */
    382     if (file == NULL)
    383         return 0;
    384     state = (gz_statep)file;
    385 
    386     /* check that we're reading and that there's no (serious) error */
    387     if (state->mode != GZ_READ ||
    388             (state->err != Z_OK && state->err != Z_BUF_ERROR))
    389         return 0;
    390 
    391     /* compute bytes to read -- error on overflow */
    392     len = nitems * size;
    393     if (size && len / size != nitems) {
    394         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
    395         return 0;
    396     }
    397 
    398     /* read len or fewer bytes to buf, return the number of full items read */
    399     return len ? gz_read(state, buf, len) / size : 0;
    400 }
    401 
    402 /* -- see zlib.h -- */
    403 #ifdef Z_PREFIX_SET
    404 #  undef z_gzgetc
    405 #else
    406 #  undef gzgetc
    407 #endif
    408 int ZEXPORT gzgetc(gzFile file) {
    409     unsigned char buf[1];
    410     gz_statep state;
    411 
    412     /* get internal structure */
    413     if (file == NULL)
    414         return -1;
    415     state = (gz_statep)file;
    416 
    417     /* check that we're reading and that there's no (serious) error */
    418     if (state->mode != GZ_READ ||
    419         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    420         return -1;
    421 
    422     /* try output buffer (no need to check for skip request) */
    423     if (state->x.have) {
    424         state->x.have--;
    425         state->x.pos++;
    426         return *(state->x.next)++;
    427     }
    428 
    429     /* nothing there -- try gz_read() */
    430     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
    431 }
    432 
    433 int ZEXPORT gzgetc_(gzFile file) {
    434     return gzgetc(file);
    435 }
    436 
    437 /* -- see zlib.h -- */
    438 int ZEXPORT gzungetc(int c, gzFile file) {
    439     gz_statep state;
    440 
    441     /* get internal structure */
    442     if (file == NULL)
    443         return -1;
    444     state = (gz_statep)file;
    445 
    446     /* in case this was just opened, set up the input buffer */
    447     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
    448         (void)gz_look(state);
    449 
    450     /* check that we're reading and that there's no (serious) error */
    451     if (state->mode != GZ_READ ||
    452         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    453         return -1;
    454 
    455     /* process a skip request */
    456     if (state->seek) {
    457         state->seek = 0;
    458         if (gz_skip(state, state->skip) == -1)
    459             return -1;
    460     }
    461 
    462     /* can't push EOF */
    463     if (c < 0)
    464         return -1;
    465 
    466     /* if output buffer empty, put byte at end (allows more pushing) */
    467     if (state->x.have == 0) {
    468         state->x.have = 1;
    469         state->x.next = state->out + (state->size << 1) - 1;
    470         state->x.next[0] = (unsigned char)c;
    471         state->x.pos--;
    472         state->past = 0;
    473         return c;
    474     }
    475 
    476     /* if no room, give up (must have already done a gzungetc()) */
    477     if (state->x.have == (state->size << 1)) {
    478         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
    479         return -1;
    480     }
    481 
    482     /* slide output data if needed and insert byte before existing data */
    483     if (state->x.next == state->out) {
    484         unsigned char *src = state->out + state->x.have;
    485         unsigned char *dest = state->out + (state->size << 1);
    486         while (src > state->out)
    487             *--dest = *--src;
    488         state->x.next = dest;
    489     }
    490     state->x.have++;
    491     state->x.next--;
    492     state->x.next[0] = (unsigned char)c;
    493     state->x.pos--;
    494     state->past = 0;
    495     return c;
    496 }
    497 
    498 /* -- see zlib.h -- */
    499 char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
    500     unsigned left, n;
    501     char *str;
    502     unsigned char *eol;
    503     gz_statep state;
    504 
    505     /* check parameters and get internal structure */
    506     if (file == NULL || buf == NULL || len < 1)
    507         return NULL;
    508     state = (gz_statep)file;
    509 
    510     /* check that we're reading and that there's no (serious) error */
    511     if (state->mode != GZ_READ ||
    512         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    513         return NULL;
    514 
    515     /* process a skip request */
    516     if (state->seek) {
    517         state->seek = 0;
    518         if (gz_skip(state, state->skip) == -1)
    519             return NULL;
    520     }
    521 
    522     /* copy output bytes up to new line or len - 1, whichever comes first --
    523        append a terminating zero to the string (we don't check for a zero in
    524        the contents, let the user worry about that) */
    525     str = buf;
    526     left = (unsigned)len - 1;
    527     if (left) do {
    528         /* assure that something is in the output buffer */
    529         if (state->x.have == 0 && gz_fetch(state) == -1)
    530             return NULL;                /* error */
    531         if (state->x.have == 0) {       /* end of file */
    532             state->past = 1;            /* read past end */
    533             break;                      /* return what we have */
    534         }
    535 
    536         /* look for end-of-line in current output buffer */
    537         n = state->x.have > left ? left : state->x.have;
    538         eol = (unsigned char *)memchr(state->x.next, '\n', n);
    539         if (eol != NULL)
    540             n = (unsigned)(eol - state->x.next) + 1;
    541 
    542         /* copy through end-of-line, or remainder if not found */
    543         memcpy(buf, state->x.next, n);
    544         state->x.have -= n;
    545         state->x.next += n;
    546         state->x.pos += n;
    547         left -= n;
    548         buf += n;
    549     } while (left && eol == NULL);
    550 
    551     /* return terminated string, or if nothing, end of file */
    552     if (buf == str)
    553         return NULL;
    554     buf[0] = 0;
    555     return str;
    556 }
    557 
    558 /* -- see zlib.h -- */
    559 int ZEXPORT gzdirect(gzFile file) {
    560     gz_statep state;
    561 
    562     /* get internal structure */
    563     if (file == NULL)
    564         return 0;
    565     state = (gz_statep)file;
    566 
    567     /* if the state is not known, but we can find out, then do so (this is
    568        mainly for right after a gzopen() or gzdopen()) */
    569     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
    570         (void)gz_look(state);
    571 
    572     /* return 1 if transparent, 0 if processing a gzip stream */
    573     return state->direct;
    574 }
    575 
    576 /* -- see zlib.h -- */
    577 int ZEXPORT gzclose_r(gzFile file) {
    578     int ret, err;
    579     gz_statep state;
    580 
    581     /* get internal structure */
    582     if (file == NULL)
    583         return Z_STREAM_ERROR;
    584     state = (gz_statep)file;
    585 
    586     /* check that we're reading */
    587     if (state->mode != GZ_READ)
    588         return Z_STREAM_ERROR;
    589 
    590     /* free memory and close file */
    591     if (state->size) {
    592         inflateEnd(&(state->strm));
    593         free(state->out);
    594         free(state->in);
    595     }
    596     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
    597     gz_error(state, Z_OK, NULL);
    598     free(state->path);
    599     ret = close(state->fd);
    600     free(state);
    601     return ret ? Z_ERRNO : err;
    602 }
    603