Home | History | Annotate | Line # | Download | only in zlib
      1  1.6  christos /*	$NetBSD: deflate.h,v 1.6 2024/09/22 19:12:27 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* deflate.h -- internal compression state
      4  1.6  christos  * Copyright (C) 1995-2024 Jean-loup Gailly
      5  1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      6  1.1  christos  */
      7  1.1  christos 
      8  1.1  christos /* WARNING: this file should *not* be used by applications. It is
      9  1.1  christos    part of the implementation of the compression library and is
     10  1.1  christos    subject to change. Applications should only use zlib.h.
     11  1.1  christos  */
     12  1.1  christos 
     13  1.5  christos /* @(#) Id */
     14  1.1  christos 
     15  1.1  christos #ifndef DEFLATE_H
     16  1.1  christos #define DEFLATE_H
     17  1.1  christos 
     18  1.1  christos #include "zutil.h"
     19  1.1  christos 
     20  1.1  christos /* define NO_GZIP when compiling if you want to disable gzip header and
     21  1.1  christos    trailer creation by deflate().  NO_GZIP would be used to avoid linking in
     22  1.1  christos    the crc code when it is not needed.  For shared libraries, gzip encoding
     23  1.1  christos    should be left enabled. */
     24  1.1  christos #ifndef NO_GZIP
     25  1.1  christos #  define GZIP
     26  1.1  christos #endif
     27  1.1  christos 
     28  1.6  christos /* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
     29  1.6  christos    the cost of a larger memory footprint */
     30  1.6  christos /* #define LIT_MEM */
     31  1.6  christos 
     32  1.1  christos /* ===========================================================================
     33  1.1  christos  * Internal compression state.
     34  1.1  christos  */
     35  1.1  christos 
     36  1.1  christos #define LENGTH_CODES 29
     37  1.1  christos /* number of length codes, not counting the special END_BLOCK code */
     38  1.1  christos 
     39  1.1  christos #define LITERALS  256
     40  1.1  christos /* number of literal bytes 0..255 */
     41  1.1  christos 
     42  1.1  christos #define L_CODES (LITERALS+1+LENGTH_CODES)
     43  1.1  christos /* number of Literal or Length codes, including the END_BLOCK code */
     44  1.1  christos 
     45  1.1  christos #define D_CODES   30
     46  1.1  christos /* number of distance codes */
     47  1.1  christos 
     48  1.1  christos #define BL_CODES  19
     49  1.1  christos /* number of codes used to transfer the bit lengths */
     50  1.1  christos 
     51  1.1  christos #define HEAP_SIZE (2*L_CODES+1)
     52  1.1  christos /* maximum heap size */
     53  1.1  christos 
     54  1.1  christos #define MAX_BITS 15
     55  1.1  christos /* All codes must not exceed MAX_BITS bits */
     56  1.1  christos 
     57  1.3  christos #define Buf_size 16
     58  1.3  christos /* size of bit buffer in bi_buf */
     59  1.3  christos 
     60  1.3  christos #define INIT_STATE    42    /* zlib header -> BUSY_STATE */
     61  1.3  christos #ifdef GZIP
     62  1.3  christos #  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
     63  1.3  christos #endif
     64  1.3  christos #define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
     65  1.3  christos #define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
     66  1.3  christos #define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
     67  1.3  christos #define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
     68  1.3  christos #define BUSY_STATE   113    /* deflate -> FINISH_STATE */
     69  1.3  christos #define FINISH_STATE 666    /* stream complete */
     70  1.1  christos /* Stream status */
     71  1.1  christos 
     72  1.1  christos 
     73  1.1  christos /* Data structure describing a single value and its code string. */
     74  1.1  christos typedef struct ct_data_s {
     75  1.1  christos     union {
     76  1.1  christos         ush  freq;       /* frequency count */
     77  1.1  christos         ush  code;       /* bit string */
     78  1.1  christos     } fc;
     79  1.1  christos     union {
     80  1.1  christos         ush  dad;        /* father node in Huffman tree */
     81  1.1  christos         ush  len;        /* length of bit string */
     82  1.1  christos     } dl;
     83  1.1  christos } FAR ct_data;
     84  1.1  christos 
     85  1.1  christos #define Freq fc.freq
     86  1.1  christos #define Code fc.code
     87  1.1  christos #define Dad  dl.dad
     88  1.1  christos #define Len  dl.len
     89  1.1  christos 
     90  1.1  christos typedef struct static_tree_desc_s  static_tree_desc;
     91  1.1  christos 
     92  1.1  christos typedef struct tree_desc_s {
     93  1.1  christos     ct_data *dyn_tree;           /* the dynamic tree */
     94  1.1  christos     int     max_code;            /* largest code with non zero frequency */
     95  1.3  christos     const static_tree_desc *stat_desc;  /* the corresponding static tree */
     96  1.1  christos } FAR tree_desc;
     97  1.1  christos 
     98  1.1  christos typedef ush Pos;
     99  1.1  christos typedef Pos FAR Posf;
    100  1.1  christos typedef unsigned IPos;
    101  1.1  christos 
    102  1.1  christos /* A Pos is an index in the character window. We use short instead of int to
    103  1.1  christos  * save space in the various tables. IPos is used only for parameter passing.
    104  1.1  christos  */
    105  1.1  christos 
    106  1.1  christos typedef struct internal_state {
    107  1.1  christos     z_streamp strm;      /* pointer back to this zlib stream */
    108  1.1  christos     int   status;        /* as the name implies */
    109  1.1  christos     Bytef *pending_buf;  /* output still pending */
    110  1.1  christos     ulg   pending_buf_size; /* size of pending_buf */
    111  1.1  christos     Bytef *pending_out;  /* next pending byte to output to the stream */
    112  1.3  christos     ulg   pending;       /* nb of bytes in the pending buffer */
    113  1.1  christos     int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
    114  1.1  christos     gz_headerp  gzhead;  /* gzip header information to write */
    115  1.3  christos     ulg   gzindex;       /* where in extra, name, or comment */
    116  1.3  christos     Byte  method;        /* can only be DEFLATED */
    117  1.1  christos     int   last_flush;    /* value of flush param for previous deflate call */
    118  1.1  christos 
    119  1.1  christos                 /* used by deflate.c: */
    120  1.1  christos 
    121  1.1  christos     uInt  w_size;        /* LZ77 window size (32K by default) */
    122  1.1  christos     uInt  w_bits;        /* log2(w_size)  (8..16) */
    123  1.1  christos     uInt  w_mask;        /* w_size - 1 */
    124  1.1  christos 
    125  1.1  christos     Bytef *window;
    126  1.1  christos     /* Sliding window. Input bytes are read into the second half of the window,
    127  1.1  christos      * and move to the first half later to keep a dictionary of at least wSize
    128  1.1  christos      * bytes. With this organization, matches are limited to a distance of
    129  1.1  christos      * wSize-MAX_MATCH bytes, but this ensures that IO is always
    130  1.1  christos      * performed with a length multiple of the block size. Also, it limits
    131  1.1  christos      * the window size to 64K, which is quite useful on MSDOS.
    132  1.1  christos      * To do: use the user input buffer as sliding window.
    133  1.1  christos      */
    134  1.1  christos 
    135  1.1  christos     ulg window_size;
    136  1.1  christos     /* Actual size of window: 2*wSize, except when the user input buffer
    137  1.1  christos      * is directly used as sliding window.
    138  1.1  christos      */
    139  1.1  christos 
    140  1.1  christos     Posf *prev;
    141  1.1  christos     /* Link to older string with same hash index. To limit the size of this
    142  1.1  christos      * array to 64K, this link is maintained only for the last 32K strings.
    143  1.1  christos      * An index in this array is thus a window index modulo 32K.
    144  1.1  christos      */
    145  1.1  christos 
    146  1.1  christos     Posf *head; /* Heads of the hash chains or NIL. */
    147  1.1  christos 
    148  1.1  christos     uInt  ins_h;          /* hash index of string to be inserted */
    149  1.1  christos     uInt  hash_size;      /* number of elements in hash table */
    150  1.1  christos     uInt  hash_bits;      /* log2(hash_size) */
    151  1.1  christos     uInt  hash_mask;      /* hash_size-1 */
    152  1.1  christos 
    153  1.1  christos     uInt  hash_shift;
    154  1.1  christos     /* Number of bits by which ins_h must be shifted at each input
    155  1.1  christos      * step. It must be such that after MIN_MATCH steps, the oldest
    156  1.1  christos      * byte no longer takes part in the hash key, that is:
    157  1.1  christos      *   hash_shift * MIN_MATCH >= hash_bits
    158  1.1  christos      */
    159  1.1  christos 
    160  1.1  christos     long block_start;
    161  1.1  christos     /* Window position at the beginning of the current output block. Gets
    162  1.1  christos      * negative when the window is moved backwards.
    163  1.1  christos      */
    164  1.1  christos 
    165  1.1  christos     uInt match_length;           /* length of best match */
    166  1.1  christos     IPos prev_match;             /* previous match */
    167  1.1  christos     int match_available;         /* set if previous match exists */
    168  1.1  christos     uInt strstart;               /* start of string to insert */
    169  1.1  christos     uInt match_start;            /* start of matching string */
    170  1.1  christos     uInt lookahead;              /* number of valid bytes ahead in window */
    171  1.1  christos 
    172  1.1  christos     uInt prev_length;
    173  1.1  christos     /* Length of the best match at previous step. Matches not greater than this
    174  1.1  christos      * are discarded. This is used in the lazy match evaluation.
    175  1.1  christos      */
    176  1.1  christos 
    177  1.1  christos     uInt max_chain_length;
    178  1.1  christos     /* To speed up deflation, hash chains are never searched beyond this
    179  1.1  christos      * length.  A higher limit improves compression ratio but degrades the
    180  1.1  christos      * speed.
    181  1.1  christos      */
    182  1.1  christos 
    183  1.1  christos     uInt max_lazy_match;
    184  1.1  christos     /* Attempt to find a better match only when the current match is strictly
    185  1.1  christos      * smaller than this value. This mechanism is used only for compression
    186  1.1  christos      * levels >= 4.
    187  1.1  christos      */
    188  1.1  christos #   define max_insert_length  max_lazy_match
    189  1.1  christos     /* Insert new strings in the hash table only if the match length is not
    190  1.1  christos      * greater than this length. This saves time but degrades compression.
    191  1.1  christos      * max_insert_length is used only for compression levels <= 3.
    192  1.1  christos      */
    193  1.1  christos 
    194  1.1  christos     int level;    /* compression level (1..9) */
    195  1.1  christos     int strategy; /* favor or force Huffman coding*/
    196  1.1  christos 
    197  1.1  christos     uInt good_match;
    198  1.1  christos     /* Use a faster search when the previous match is longer than this */
    199  1.1  christos 
    200  1.1  christos     int nice_match; /* Stop searching when current match exceeds this */
    201  1.1  christos 
    202  1.1  christos                 /* used by trees.c: */
    203  1.3  christos     /* Didn't use ct_data typedef below to suppress compiler warning */
    204  1.1  christos     struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
    205  1.1  christos     struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
    206  1.1  christos     struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
    207  1.1  christos 
    208  1.1  christos     struct tree_desc_s l_desc;               /* desc. for literal tree */
    209  1.1  christos     struct tree_desc_s d_desc;               /* desc. for distance tree */
    210  1.1  christos     struct tree_desc_s bl_desc;              /* desc. for bit length tree */
    211  1.1  christos 
    212  1.1  christos     ush bl_count[MAX_BITS+1];
    213  1.1  christos     /* number of codes at each bit length for an optimal tree */
    214  1.1  christos 
    215  1.1  christos     int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
    216  1.1  christos     int heap_len;               /* number of elements in the heap */
    217  1.1  christos     int heap_max;               /* element of largest frequency */
    218  1.1  christos     /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
    219  1.1  christos      * The same heap array is used to build all trees.
    220  1.1  christos      */
    221  1.1  christos 
    222  1.1  christos     uch depth[2*L_CODES+1];
    223  1.1  christos     /* Depth of each subtree used as tie breaker for trees of equal frequency
    224  1.1  christos      */
    225  1.1  christos 
    226  1.6  christos #ifdef LIT_MEM
    227  1.6  christos #   define LIT_BUFS 5
    228  1.6  christos     ushf *d_buf;          /* buffer for distances */
    229  1.6  christos     uchf *l_buf;          /* buffer for literals/lengths */
    230  1.6  christos #else
    231  1.6  christos #   define LIT_BUFS 4
    232  1.4       wiz     uchf *sym_buf;        /* buffer for distances and literals/lengths */
    233  1.6  christos #endif
    234  1.1  christos 
    235  1.1  christos     uInt  lit_bufsize;
    236  1.1  christos     /* Size of match buffer for literals/lengths.  There are 4 reasons for
    237  1.1  christos      * limiting lit_bufsize to 64K:
    238  1.1  christos      *   - frequencies can be kept in 16 bit counters
    239  1.1  christos      *   - if compression is not successful for the first block, all input
    240  1.1  christos      *     data is still in the window so we can still emit a stored block even
    241  1.1  christos      *     when input comes from standard input.  (This can also be done for
    242  1.1  christos      *     all blocks if lit_bufsize is not greater than 32K.)
    243  1.1  christos      *   - if compression is not successful for a file smaller than 64K, we can
    244  1.1  christos      *     even emit a stored file instead of a stored block (saving 5 bytes).
    245  1.1  christos      *     This is applicable only for zip (not gzip or zlib).
    246  1.1  christos      *   - creating new Huffman trees less frequently may not provide fast
    247  1.1  christos      *     adaptation to changes in the input data statistics. (Take for
    248  1.1  christos      *     example a binary file with poorly compressible code followed by
    249  1.1  christos      *     a highly compressible string table.) Smaller buffer sizes give
    250  1.1  christos      *     fast adaptation but have of course the overhead of transmitting
    251  1.1  christos      *     trees more frequently.
    252  1.1  christos      *   - I can't count above 4
    253  1.1  christos      */
    254  1.1  christos 
    255  1.6  christos     uInt sym_next;      /* running index in symbol buffer */
    256  1.4       wiz     uInt sym_end;       /* symbol table full when sym_next reaches this */
    257  1.1  christos 
    258  1.1  christos     ulg opt_len;        /* bit length of current block with optimal trees */
    259  1.1  christos     ulg static_len;     /* bit length of current block with static trees */
    260  1.1  christos     uInt matches;       /* number of string matches in current block */
    261  1.3  christos     uInt insert;        /* bytes at end of window left to insert */
    262  1.1  christos 
    263  1.2  christos #ifdef ZLIB_DEBUG
    264  1.1  christos     ulg compressed_len; /* total bit length of compressed file mod 2^32 */
    265  1.1  christos     ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
    266  1.1  christos #endif
    267  1.1  christos 
    268  1.1  christos     ush bi_buf;
    269  1.1  christos     /* Output buffer. bits are inserted starting at the bottom (least
    270  1.1  christos      * significant bits).
    271  1.1  christos      */
    272  1.1  christos     int bi_valid;
    273  1.1  christos     /* Number of valid bits in bi_buf.  All bits above the last valid bit
    274  1.1  christos      * are always zero.
    275  1.1  christos      */
    276  1.1  christos 
    277  1.3  christos     ulg high_water;
    278  1.3  christos     /* High water mark offset in window for initialized bytes -- bytes above
    279  1.3  christos      * this are set to zero in order to avoid memory check warnings when
    280  1.3  christos      * longest match routines access bytes past the input.  This is then
    281  1.3  christos      * updated to the new high water mark.
    282  1.3  christos      */
    283  1.3  christos 
    284  1.1  christos } FAR deflate_state;
    285  1.1  christos 
    286  1.1  christos /* Output a byte on the stream.
    287  1.1  christos  * IN assertion: there is enough room in pending_buf.
    288  1.1  christos  */
    289  1.3  christos #define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
    290  1.1  christos 
    291  1.1  christos 
    292  1.1  christos #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
    293  1.1  christos /* Minimum amount of lookahead, except at the end of the input file.
    294  1.1  christos  * See deflate.c for comments about the MIN_MATCH+1.
    295  1.1  christos  */
    296  1.1  christos 
    297  1.1  christos #define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
    298  1.1  christos /* In order to simplify the code, particularly on 16 bit machines, match
    299  1.1  christos  * distances are limited to MAX_DIST instead of WSIZE.
    300  1.1  christos  */
    301  1.1  christos 
    302  1.3  christos #define WIN_INIT MAX_MATCH
    303  1.3  christos /* Number of bytes after end of data in window to initialize in order to avoid
    304  1.3  christos    memory checker errors from longest match routines */
    305  1.3  christos 
    306  1.1  christos         /* in trees.c */
    307  1.6  christos void ZLIB_INTERNAL _tr_init(deflate_state *s);
    308  1.6  christos int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
    309  1.6  christos void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
    310  1.6  christos                                    ulg stored_len, int last);
    311  1.6  christos void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
    312  1.6  christos void ZLIB_INTERNAL _tr_align(deflate_state *s);
    313  1.6  christos void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
    314  1.6  christos                                     ulg stored_len, int last);
    315  1.1  christos 
    316  1.1  christos #define d_code(dist) \
    317  1.1  christos    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
    318  1.1  christos /* Mapping from a distance to a distance code. dist is the distance - 1 and
    319  1.1  christos  * must not have side effects. _dist_code[256] and _dist_code[257] are never
    320  1.1  christos  * used.
    321  1.1  christos  */
    322  1.1  christos 
    323  1.2  christos #ifndef ZLIB_DEBUG
    324  1.1  christos /* Inline versions of _tr_tally for speed: */
    325  1.1  christos 
    326  1.1  christos #if defined(GEN_TREES_H) || !defined(STDC)
    327  1.3  christos   extern uch ZLIB_INTERNAL _length_code[];
    328  1.3  christos   extern uch ZLIB_INTERNAL _dist_code[];
    329  1.1  christos #else
    330  1.3  christos   extern const uch ZLIB_INTERNAL _length_code[];
    331  1.3  christos   extern const uch ZLIB_INTERNAL _dist_code[];
    332  1.1  christos #endif
    333  1.1  christos 
    334  1.6  christos #ifdef LIT_MEM
    335  1.6  christos # define _tr_tally_lit(s, c, flush) \
    336  1.6  christos   { uch cc = (c); \
    337  1.6  christos     s->d_buf[s->sym_next] = 0; \
    338  1.6  christos     s->l_buf[s->sym_next++] = cc; \
    339  1.6  christos     s->dyn_ltree[cc].Freq++; \
    340  1.6  christos     flush = (s->sym_next == s->sym_end); \
    341  1.6  christos    }
    342  1.6  christos # define _tr_tally_dist(s, distance, length, flush) \
    343  1.6  christos   { uch len = (uch)(length); \
    344  1.6  christos     ush dist = (ush)(distance); \
    345  1.6  christos     s->d_buf[s->sym_next] = dist; \
    346  1.6  christos     s->l_buf[s->sym_next++] = len; \
    347  1.6  christos     dist--; \
    348  1.6  christos     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
    349  1.6  christos     s->dyn_dtree[d_code(dist)].Freq++; \
    350  1.6  christos     flush = (s->sym_next == s->sym_end); \
    351  1.6  christos   }
    352  1.6  christos #else
    353  1.1  christos # define _tr_tally_lit(s, c, flush) \
    354  1.1  christos   { uch cc = (c); \
    355  1.4       wiz     s->sym_buf[s->sym_next++] = 0; \
    356  1.4       wiz     s->sym_buf[s->sym_next++] = 0; \
    357  1.4       wiz     s->sym_buf[s->sym_next++] = cc; \
    358  1.1  christos     s->dyn_ltree[cc].Freq++; \
    359  1.4       wiz     flush = (s->sym_next == s->sym_end); \
    360  1.1  christos    }
    361  1.1  christos # define _tr_tally_dist(s, distance, length, flush) \
    362  1.3  christos   { uch len = (uch)(length); \
    363  1.3  christos     ush dist = (ush)(distance); \
    364  1.5  christos     s->sym_buf[s->sym_next++] = (uch)dist; \
    365  1.5  christos     s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \
    366  1.4       wiz     s->sym_buf[s->sym_next++] = len; \
    367  1.1  christos     dist--; \
    368  1.1  christos     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
    369  1.1  christos     s->dyn_dtree[d_code(dist)].Freq++; \
    370  1.4       wiz     flush = (s->sym_next == s->sym_end); \
    371  1.1  christos   }
    372  1.6  christos #endif
    373  1.1  christos #else
    374  1.1  christos # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
    375  1.1  christos # define _tr_tally_dist(s, distance, length, flush) \
    376  1.1  christos               flush = _tr_tally(s, distance, length)
    377  1.1  christos #endif
    378  1.1  christos 
    379  1.1  christos #endif /* DEFLATE_H */
    380