Home | History | Annotate | Line # | Download | only in examples
gzlog.c revision 1.1
      1 /*	$NetBSD: gzlog.c,v 1.1 2006/01/14 20:11:09 christos Exp $	*/
      2 
      3 /*
      4  * gzlog.c
      5  * Copyright (C) 2004 Mark Adler
      6  * For conditions of distribution and use, see copyright notice in gzlog.h
      7  * version 1.0, 26 Nov 2004
      8  *
      9  */
     10 
     11 #include <string.h>             /* memcmp() */
     12 #include <stdlib.h>             /* malloc(), free(), NULL */
     13 #include <sys/types.h>          /* size_t, off_t */
     14 #include <unistd.h>             /* read(), close(), sleep(), ftruncate(), */
     15                                 /* lseek() */
     16 #include <fcntl.h>              /* open() */
     17 #include <sys/file.h>           /* flock() */
     18 #include "zlib.h"               /* deflateInit2(), deflate(), deflateEnd() */
     19 
     20 #include "gzlog.h"              /* interface */
     21 #define local static
     22 
     23 /* log object structure */
     24 typedef struct {
     25     int id;                 /* object identifier */
     26     int fd;                 /* log file descriptor */
     27     off_t extra;            /* offset of extra "ap" subfield */
     28     off_t mark_off;         /* offset of marked data */
     29     off_t last_off;         /* offset of last block */
     30     unsigned long crc;      /* uncompressed crc */
     31     unsigned long len;      /* uncompressed length (modulo 2^32) */
     32     unsigned stored;        /* length of current stored block */
     33 } gz_log;
     34 
     35 #define GZLOGID 19334       /* gz_log object identifier */
     36 
     37 #define LOCK_RETRY 1            /* retry lock once a second */
     38 #define LOCK_PATIENCE 1200      /* try about twenty minutes before forcing */
     39 
     40 /* acquire a lock on a file */
     41 local int lock(int fd)
     42 {
     43     int patience;
     44 
     45     /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */
     46     patience = LOCK_PATIENCE;
     47     do {
     48         if (flock(fd, LOCK_EX + LOCK_NB) == 0)
     49             return 0;
     50         (void)sleep(LOCK_RETRY);
     51         patience -= LOCK_RETRY;
     52     } while (patience > 0);
     53 
     54     /* we've run out of patience -- give up */
     55     return -1;
     56 }
     57 
     58 /* release lock */
     59 local void unlock(int fd)
     60 {
     61     (void)flock(fd, LOCK_UN);
     62 }
     63 
     64 /* release a log object */
     65 local void log_clean(gz_log *log)
     66 {
     67     unlock(log->fd);
     68     (void)close(log->fd);
     69     free(log);
     70 }
     71 
     72 /* read an unsigned long from a byte buffer little-endian */
     73 local unsigned long make_ulg(unsigned char *buf)
     74 {
     75     int n;
     76     unsigned long val;
     77 
     78     val = (unsigned long)(*buf++);
     79     for (n = 8; n < 32; n += 8)
     80         val += (unsigned long)(*buf++) << n;
     81     return val;
     82 }
     83 
     84 /* read an off_t from a byte buffer little-endian */
     85 local off_t make_off(unsigned char *buf)
     86 {
     87     int n;
     88     off_t val;
     89 
     90     val = (off_t)(*buf++);
     91     for (n = 8; n < 64; n += 8)
     92         val += (off_t)(*buf++) << n;
     93     return val;
     94 }
     95 
     96 /* write an unsigned long little-endian to byte buffer */
     97 local void dice_ulg(unsigned long val, unsigned char *buf)
     98 {
     99     int n;
    100 
    101     for (n = 0; n < 4; n++) {
    102         *buf++ = val & 0xff;
    103         val >>= 8;
    104     }
    105 }
    106 
    107 /* write an off_t little-endian to byte buffer */
    108 local void dice_off(off_t val, unsigned char *buf)
    109 {
    110     int n;
    111 
    112     for (n = 0; n < 8; n++) {
    113         *buf++ = val & 0xff;
    114         val >>= 8;
    115     }
    116 }
    117 
    118 /* initial, empty gzip file for appending */
    119 local char empty_gz[] = {
    120     0x1f, 0x8b,                 /* magic gzip id */
    121     8,                          /* compression method is deflate */
    122     4,                          /* there is an extra field */
    123     0, 0, 0, 0,                 /* no modification time provided */
    124     0, 0xff,                    /* no extra flags, no OS */
    125     20, 0, 'a', 'p', 16, 0,     /* extra field with "ap" subfield */
    126     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of uncompressed data */
    127     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of last block */
    128     1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */
    129     0, 0, 0, 0,                 /* crc */
    130     0, 0, 0, 0                  /* uncompressed length */
    131 };
    132 
    133 /* initialize a log object with locking */
    134 void *gzlog_open(char *path)
    135 {
    136     unsigned xlen;
    137     unsigned char temp[20];
    138     unsigned sub_len;
    139     int good;
    140     gz_log *log;
    141 
    142     /* allocate log structure */
    143     log = malloc(sizeof(gz_log));
    144     if (log == NULL)
    145         return NULL;
    146     log->id = GZLOGID;
    147 
    148     /* open file, creating it if necessary, and locking it */
    149     log->fd = open(path, O_RDWR | O_CREAT, 0600);
    150     if (log->fd < 0) {
    151         free(log);
    152         return NULL;
    153     }
    154     if (lock(log->fd)) {
    155         close(log->fd);
    156         free(log);
    157         return NULL;
    158     }
    159 
    160     /* if file is empty, write new gzip stream */
    161     if (lseek(log->fd, 0, SEEK_END) == 0) {
    162         if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) {
    163             log_clean(log);
    164             return NULL;
    165         }
    166     }
    167 
    168     /* check gzip header */
    169     (void)lseek(log->fd, 0, SEEK_SET);
    170     if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f ||
    171         temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) {
    172         log_clean(log);
    173         return NULL;
    174     }
    175 
    176     /* process extra field to find "ap" sub-field */
    177     xlen = temp[10] + (temp[11] << 8);
    178     good = 0;
    179     while (xlen) {
    180         if (xlen < 4 || read(log->fd, temp, 4) != 4)
    181             break;
    182         sub_len = temp[2];
    183         sub_len += temp[3] << 8;
    184         xlen -= 4;
    185         if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) {
    186             good = 1;
    187             break;
    188         }
    189         if (xlen < sub_len)
    190             break;
    191         (void)lseek(log->fd, sub_len, SEEK_CUR);
    192         xlen -= sub_len;
    193     }
    194     if (!good) {
    195         log_clean(log);
    196         return NULL;
    197     }
    198 
    199     /* read in "ap" sub-field */
    200     log->extra = lseek(log->fd, 0, SEEK_CUR);
    201     if (read(log->fd, temp, 16) != 16) {
    202         log_clean(log);
    203         return NULL;
    204     }
    205     log->mark_off = make_off(temp);
    206     log->last_off = make_off(temp + 8);
    207 
    208     /* get crc, length of gzip file */
    209     (void)lseek(log->fd, log->last_off, SEEK_SET);
    210     if (read(log->fd, temp, 13) != 13 ||
    211         memcmp(temp, "\001\000\000\377\377", 5) != 0) {
    212         log_clean(log);
    213         return NULL;
    214     }
    215     log->crc = make_ulg(temp + 5);
    216     log->len = make_ulg(temp + 9);
    217 
    218     /* set up to write over empty last block */
    219     (void)lseek(log->fd, log->last_off + 5, SEEK_SET);
    220     log->stored = 0;
    221     return (void *)log;
    222 }
    223 
    224 /* maximum amount to put in a stored block before starting a new one */
    225 #define MAX_BLOCK 16384
    226 
    227 /* write a block to a log object */
    228 int gzlog_write(void *obj, char *data, size_t len)
    229 {
    230     size_t some;
    231     unsigned char temp[5];
    232     gz_log *log;
    233 
    234     /* check object */
    235     log = (gz_log *)obj;
    236     if (log == NULL || log->id != GZLOGID)
    237         return 1;
    238 
    239     /* write stored blocks until all of the input is written */
    240     do {
    241         some = MAX_BLOCK - log->stored;
    242         if (some > len)
    243             some = len;
    244         if (write(log->fd, data, some) != some)
    245             return 1;
    246         log->crc = crc32(log->crc, data, some);
    247         log->len += some;
    248         len -= some;
    249         data += some;
    250         log->stored += some;
    251 
    252         /* if the stored block is full, end it and start another */
    253         if (log->stored == MAX_BLOCK) {
    254             (void)lseek(log->fd, log->last_off, SEEK_SET);
    255             temp[0] = 0;
    256             dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
    257                      temp + 1);
    258             if (write(log->fd, temp, 5) != 5)
    259                 return 1;
    260             log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
    261             (void)lseek(log->fd, 5, SEEK_CUR);
    262             log->stored = 0;
    263         }
    264     } while (len);
    265     return 0;
    266 }
    267 
    268 /* recompress the remaining stored deflate data in place */
    269 local int recomp(gz_log *log)
    270 {
    271     z_stream strm;
    272     size_t len, max;
    273     unsigned char *in;
    274     unsigned char *out;
    275     unsigned char temp[16];
    276 
    277     /* allocate space and read it all in (it's around 1 MB) */
    278     len = log->last_off - log->mark_off;
    279     max = len + (len >> 12) + (len >> 14) + 11;
    280     out = malloc(max);
    281     if (out == NULL)
    282         return 1;
    283     in = malloc(len);
    284     if (in == NULL) {
    285         free(out);
    286         return 1;
    287     }
    288     (void)lseek(log->fd, log->mark_off, SEEK_SET);
    289     if (read(log->fd, in, len) != len) {
    290         free(in);
    291         free(out);
    292         return 1;
    293     }
    294 
    295     /* recompress in memory, decoding stored data as we go */
    296     /* note: this assumes that unsigned is four bytes or more */
    297     /*       consider not making that assumption */
    298     strm.zalloc = Z_NULL;
    299     strm.zfree = Z_NULL;
    300     strm.opaque = Z_NULL;
    301     if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8,
    302         Z_DEFAULT_STRATEGY) != Z_OK) {
    303         free(in);
    304         free(out);
    305         return 1;
    306     }
    307     strm.next_in = in;
    308     strm.avail_out = max;
    309     strm.next_out = out;
    310     while (len >= 5) {
    311         if (strm.next_in[0] != 0)
    312             break;
    313         strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8);
    314         strm.next_in += 5;
    315         len -= 5;
    316         if (strm.avail_in != 0) {
    317             if (len < strm.avail_in)
    318                 break;
    319             len -= strm.avail_in;
    320             (void)deflate(&strm, Z_NO_FLUSH);
    321             if (strm.avail_in != 0 || strm.avail_out == 0)
    322                 break;
    323         }
    324     }
    325     (void)deflate(&strm, Z_SYNC_FLUSH);
    326     (void)deflateEnd(&strm);
    327     free(in);
    328     if (len != 0 || strm.avail_out == 0) {
    329         free(out);
    330         return 1;
    331     }
    332 
    333     /* overwrite stored data with compressed data */
    334     (void)lseek(log->fd, log->mark_off, SEEK_SET);
    335     len = max - strm.avail_out;
    336     if (write(log->fd, out, len) != len) {
    337         free(out);
    338         return 1;
    339     }
    340     free(out);
    341 
    342     /* write last empty block, crc, and length */
    343     log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR);
    344     temp[0] = 1;
    345     dice_ulg(0xffffL << 16, temp + 1);
    346     dice_ulg(log->crc, temp + 5);
    347     dice_ulg(log->len, temp + 9);
    348     if (write(log->fd, temp, 13) != 13)
    349         return 1;
    350 
    351     /* truncate file to discard remaining stored data and old trailer */
    352     ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR));
    353 
    354     /* update extra field to point to new last empty block */
    355     (void)lseek(log->fd, log->extra, SEEK_SET);
    356     dice_off(log->mark_off, temp);
    357     dice_off(log->last_off, temp + 8);
    358     if (write(log->fd, temp, 16) != 16)
    359         return 1;
    360     return 0;
    361 }
    362 
    363 /* maximum accumulation of stored blocks before compressing */
    364 #define MAX_STORED 1048576
    365 
    366 /* close log object */
    367 int gzlog_close(void *obj)
    368 {
    369     unsigned char temp[8];
    370     gz_log *log;
    371 
    372     /* check object */
    373     log = (gz_log *)obj;
    374     if (log == NULL || log->id != GZLOGID)
    375         return 1;
    376 
    377     /* go to start of most recent block being written */
    378     (void)lseek(log->fd, log->last_off, SEEK_SET);
    379 
    380     /* if some stuff was put there, update block */
    381     if (log->stored) {
    382         temp[0] = 0;
    383         dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
    384                  temp + 1);
    385         if (write(log->fd, temp, 5) != 5)
    386             return 1;
    387         log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
    388     }
    389 
    390     /* write last block (empty) */
    391     if (write(log->fd, "\001\000\000\377\377", 5) != 5)
    392         return 1;
    393 
    394     /* write updated crc and uncompressed length */
    395     dice_ulg(log->crc, temp);
    396     dice_ulg(log->len, temp + 4);
    397     if (write(log->fd, temp, 8) != 8)
    398         return 1;
    399 
    400     /* put offset of that last block in gzip extra block */
    401     (void)lseek(log->fd, log->extra + 8, SEEK_SET);
    402     dice_off(log->last_off, temp);
    403     if (write(log->fd, temp, 8) != 8)
    404         return 1;
    405 
    406     /* if more than 1 MB stored, then time to compress it */
    407     if (log->last_off - log->mark_off > MAX_STORED) {
    408         if (recomp(log))
    409             return 1;
    410     }
    411 
    412     /* unlock and close file */
    413     log_clean(log);
    414     return 0;
    415 }
    416