Home | History | Annotate | Line # | Download | only in zlib
inftrees.c revision 1.2
      1  1.2  christos /*	$NetBSD: inftrees.c,v 1.2 2006/01/16 03:23:10 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* inftrees.c -- generate Huffman trees for efficient decoding
      4  1.1  christos  * Copyright (C) 1995-2005 Mark Adler
      5  1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      6  1.1  christos  */
      7  1.1  christos 
      8  1.1  christos #include "zutil.h"
      9  1.1  christos #include "inftrees.h"
     10  1.1  christos 
     11  1.1  christos #define MAXBITS 15
     12  1.1  christos 
     13  1.1  christos const char inflate_copyright[] =
     14  1.1  christos    " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
     15  1.1  christos /*
     16  1.1  christos   If you use the zlib library in a product, an acknowledgment is welcome
     17  1.1  christos   in the documentation of your product. If for some reason you cannot
     18  1.1  christos   include such an acknowledgment, I would appreciate that you keep this
     19  1.1  christos   copyright string in the executable of your product.
     20  1.1  christos  */
     21  1.1  christos 
     22  1.1  christos /*
     23  1.1  christos    Build a set of tables to decode the provided canonical Huffman code.
     24  1.1  christos    The code lengths are lens[0..codes-1].  The result starts at *table,
     25  1.1  christos    whose indices are 0..2^bits-1.  work is a writable array of at least
     26  1.1  christos    lens shorts, which is used as a work area.  type is the type of code
     27  1.1  christos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
     28  1.1  christos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
     29  1.1  christos    on return points to the next available entry's address.  bits is the
     30  1.1  christos    requested root table index bits, and on return it is the actual root
     31  1.1  christos    table index bits.  It will differ if the request is greater than the
     32  1.1  christos    longest code or if it is less than the shortest code.
     33  1.1  christos  */
     34  1.1  christos int inflate_table(type, lens, codes, table, bits, work)
     35  1.1  christos codetype type;
     36  1.1  christos unsigned short FAR *lens;
     37  1.1  christos unsigned codes;
     38  1.1  christos code FAR * FAR *table;
     39  1.1  christos unsigned FAR *bits;
     40  1.1  christos unsigned short FAR *work;
     41  1.1  christos {
     42  1.1  christos     unsigned len;               /* a code's length in bits */
     43  1.1  christos     unsigned sym;               /* index of code symbols */
     44  1.2  christos     unsigned mmin, mmax;        /* minimum and maximum code lengths */
     45  1.1  christos     unsigned root;              /* number of index bits for root table */
     46  1.1  christos     unsigned curr;              /* number of index bits for current table */
     47  1.1  christos     unsigned drop;              /* code bits to drop for sub-table */
     48  1.1  christos     int left;                   /* number of prefix codes available */
     49  1.1  christos     unsigned used;              /* code entries in table used */
     50  1.1  christos     unsigned huff;              /* Huffman code */
     51  1.1  christos     unsigned incr;              /* for incrementing code, index */
     52  1.1  christos     unsigned fill;              /* index for replicating entries */
     53  1.1  christos     unsigned low;               /* low bits for current root entry */
     54  1.1  christos     unsigned mask;              /* mask for low root bits */
     55  1.1  christos     code this;                  /* table entry for duplication */
     56  1.1  christos     code FAR *next;             /* next available space in table */
     57  1.1  christos     const unsigned short FAR *base;     /* base value table to use */
     58  1.1  christos     const unsigned short FAR *extra;    /* extra bits table to use */
     59  1.1  christos     int end;                    /* use base and extra for symbol > end */
     60  1.1  christos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
     61  1.1  christos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
     62  1.1  christos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
     63  1.1  christos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
     64  1.1  christos         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     65  1.1  christos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
     66  1.1  christos         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
     67  1.1  christos         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
     68  1.1  christos     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
     69  1.1  christos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
     70  1.1  christos         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
     71  1.1  christos         8193, 12289, 16385, 24577, 0, 0};
     72  1.1  christos     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
     73  1.1  christos         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
     74  1.1  christos         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
     75  1.1  christos         28, 28, 29, 29, 64, 64};
     76  1.1  christos 
     77  1.1  christos     /*
     78  1.1  christos        Process a set of code lengths to create a canonical Huffman code.  The
     79  1.1  christos        code lengths are lens[0..codes-1].  Each length corresponds to the
     80  1.1  christos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
     81  1.1  christos        symbols by length from short to long, and retaining the symbol order
     82  1.1  christos        for codes with equal lengths.  Then the code starts with all zero bits
     83  1.1  christos        for the first code of the shortest length, and the codes are integer
     84  1.1  christos        increments for the same length, and zeros are appended as the length
     85  1.1  christos        increases.  For the deflate format, these bits are stored backwards
     86  1.1  christos        from their more natural integer increment ordering, and so when the
     87  1.1  christos        decoding tables are built in the large loop below, the integer codes
     88  1.1  christos        are incremented backwards.
     89  1.1  christos 
     90  1.1  christos        This routine assumes, but does not check, that all of the entries in
     91  1.1  christos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
     92  1.1  christos        1..MAXBITS is interpreted as that code length.  zero means that that
     93  1.1  christos        symbol does not occur in this code.
     94  1.1  christos 
     95  1.1  christos        The codes are sorted by computing a count of codes for each length,
     96  1.1  christos        creating from that a table of starting indices for each length in the
     97  1.1  christos        sorted table, and then entering the symbols in order in the sorted
     98  1.1  christos        table.  The sorted table is work[], with that space being provided by
     99  1.1  christos        the caller.
    100  1.1  christos 
    101  1.1  christos        The length counts are used for other purposes as well, i.e. finding
    102  1.1  christos        the minimum and maximum length codes, determining if there are any
    103  1.1  christos        codes at all, checking for a valid set of lengths, and looking ahead
    104  1.1  christos        at length counts to determine sub-table sizes when building the
    105  1.1  christos        decoding tables.
    106  1.1  christos      */
    107  1.1  christos 
    108  1.1  christos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
    109  1.1  christos     for (len = 0; len <= MAXBITS; len++)
    110  1.1  christos         count[len] = 0;
    111  1.1  christos     for (sym = 0; sym < codes; sym++)
    112  1.1  christos         count[lens[sym]]++;
    113  1.1  christos 
    114  1.1  christos     /* bound code lengths, force root to be within code lengths */
    115  1.1  christos     root = *bits;
    116  1.2  christos     for (mmax = MAXBITS; mmax >= 1; mmax--)
    117  1.2  christos         if (count[mmax] != 0) break;
    118  1.2  christos     if (root > mmax) root = mmax;
    119  1.2  christos     if (mmax == 0) {                     /* no symbols to code at all */
    120  1.1  christos         this.op = (unsigned char)64;    /* invalid code marker */
    121  1.1  christos         this.bits = (unsigned char)1;
    122  1.1  christos         this.val = (unsigned short)0;
    123  1.1  christos         *(*table)++ = this;             /* make a table to force an error */
    124  1.1  christos         *(*table)++ = this;
    125  1.1  christos         *bits = 1;
    126  1.1  christos         return 0;     /* no symbols, but wait for decoding to report error */
    127  1.1  christos     }
    128  1.2  christos     for (mmin = 1; mmin <= MAXBITS; mmin++)
    129  1.2  christos         if (count[mmin] != 0) break;
    130  1.2  christos     if (root < mmin) root = mmin;
    131  1.1  christos 
    132  1.1  christos     /* check for an over-subscribed or incomplete set of lengths */
    133  1.1  christos     left = 1;
    134  1.1  christos     for (len = 1; len <= MAXBITS; len++) {
    135  1.1  christos         left <<= 1;
    136  1.1  christos         left -= count[len];
    137  1.1  christos         if (left < 0) return -1;        /* over-subscribed */
    138  1.1  christos     }
    139  1.2  christos     if (left > 0 && (type == CODES || mmax != 1))
    140  1.1  christos         return -1;                      /* incomplete set */
    141  1.1  christos 
    142  1.1  christos     /* generate offsets into symbol table for each length for sorting */
    143  1.1  christos     offs[1] = 0;
    144  1.1  christos     for (len = 1; len < MAXBITS; len++)
    145  1.1  christos         offs[len + 1] = offs[len] + count[len];
    146  1.1  christos 
    147  1.1  christos     /* sort symbols by length, by symbol order within each length */
    148  1.1  christos     for (sym = 0; sym < codes; sym++)
    149  1.1  christos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
    150  1.1  christos 
    151  1.1  christos     /*
    152  1.1  christos        Create and fill in decoding tables.  In this loop, the table being
    153  1.1  christos        filled is at next and has curr index bits.  The code being used is huff
    154  1.1  christos        with length len.  That code is converted to an index by dropping drop
    155  1.1  christos        bits off of the bottom.  For codes where len is less than drop + curr,
    156  1.1  christos        those top drop + curr - len bits are incremented through all values to
    157  1.1  christos        fill the table with replicated entries.
    158  1.1  christos 
    159  1.1  christos        root is the number of index bits for the root table.  When len exceeds
    160  1.1  christos        root, sub-tables are created pointed to by the root entry with an index
    161  1.1  christos        of the low root bits of huff.  This is saved in low to check for when a
    162  1.1  christos        new sub-table should be started.  drop is zero when the root table is
    163  1.1  christos        being filled, and drop is root when sub-tables are being filled.
    164  1.1  christos 
    165  1.1  christos        When a new sub-table is needed, it is necessary to look ahead in the
    166  1.1  christos        code lengths to determine what size sub-table is needed.  The length
    167  1.1  christos        counts are used for this, and so count[] is decremented as codes are
    168  1.1  christos        entered in the tables.
    169  1.1  christos 
    170  1.1  christos        used keeps track of how many table entries have been allocated from the
    171  1.1  christos        provided *table space.  It is checked when a LENS table is being made
    172  1.1  christos        against the space in *table, ENOUGH, minus the maximum space needed by
    173  1.1  christos        the worst case distance code, MAXD.  This should never happen, but the
    174  1.1  christos        sufficiency of ENOUGH has not been proven exhaustively, hence the check.
    175  1.1  christos        This assumes that when type == LENS, bits == 9.
    176  1.1  christos 
    177  1.1  christos        sym increments through all symbols, and the loop terminates when
    178  1.2  christos        all codes of length mmax, i.e. all codes, have been processed.  This
    179  1.1  christos        routine permits incomplete codes, so another loop after this one fills
    180  1.1  christos        in the rest of the decoding tables with invalid code markers.
    181  1.1  christos      */
    182  1.1  christos 
    183  1.1  christos     /* set up for code type */
    184  1.1  christos     switch (type) {
    185  1.1  christos     case CODES:
    186  1.1  christos         base = extra = work;    /* dummy value--not used */
    187  1.1  christos         end = 19;
    188  1.1  christos         break;
    189  1.1  christos     case LENS:
    190  1.1  christos         base = lbase;
    191  1.1  christos         base -= 257;
    192  1.1  christos         extra = lext;
    193  1.1  christos         extra -= 257;
    194  1.1  christos         end = 256;
    195  1.1  christos         break;
    196  1.1  christos     default:            /* DISTS */
    197  1.1  christos         base = dbase;
    198  1.1  christos         extra = dext;
    199  1.1  christos         end = -1;
    200  1.1  christos     }
    201  1.1  christos 
    202  1.1  christos     /* initialize state for loop */
    203  1.1  christos     huff = 0;                   /* starting code */
    204  1.1  christos     sym = 0;                    /* starting code symbol */
    205  1.2  christos     len = mmin;                  /* starting code length */
    206  1.1  christos     next = *table;              /* current table to fill in */
    207  1.1  christos     curr = root;                /* current table index bits */
    208  1.1  christos     drop = 0;                   /* current bits to drop from code for index */
    209  1.1  christos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
    210  1.1  christos     used = 1U << root;          /* use root table entries */
    211  1.1  christos     mask = used - 1;            /* mask for comparing low */
    212  1.1  christos 
    213  1.1  christos     /* check available table space */
    214  1.1  christos     if (type == LENS && used >= ENOUGH - MAXD)
    215  1.1  christos         return 1;
    216  1.1  christos 
    217  1.1  christos     /* process all codes and make table entries */
    218  1.1  christos     for (;;) {
    219  1.1  christos         /* create table entry */
    220  1.1  christos         this.bits = (unsigned char)(len - drop);
    221  1.1  christos         if ((int)(work[sym]) < end) {
    222  1.1  christos             this.op = (unsigned char)0;
    223  1.1  christos             this.val = work[sym];
    224  1.1  christos         }
    225  1.1  christos         else if ((int)(work[sym]) > end) {
    226  1.1  christos             this.op = (unsigned char)(extra[work[sym]]);
    227  1.1  christos             this.val = base[work[sym]];
    228  1.1  christos         }
    229  1.1  christos         else {
    230  1.1  christos             this.op = (unsigned char)(32 + 64);         /* end of block */
    231  1.1  christos             this.val = 0;
    232  1.1  christos         }
    233  1.1  christos 
    234  1.1  christos         /* replicate for those indices with low len bits equal to huff */
    235  1.1  christos         incr = 1U << (len - drop);
    236  1.1  christos         fill = 1U << curr;
    237  1.2  christos         mmin = fill;                 /* save offset to next table */
    238  1.1  christos         do {
    239  1.1  christos             fill -= incr;
    240  1.1  christos             next[(huff >> drop) + fill] = this;
    241  1.1  christos         } while (fill != 0);
    242  1.1  christos 
    243  1.1  christos         /* backwards increment the len-bit code huff */
    244  1.1  christos         incr = 1U << (len - 1);
    245  1.1  christos         while (huff & incr)
    246  1.1  christos             incr >>= 1;
    247  1.1  christos         if (incr != 0) {
    248  1.1  christos             huff &= incr - 1;
    249  1.1  christos             huff += incr;
    250  1.1  christos         }
    251  1.1  christos         else
    252  1.1  christos             huff = 0;
    253  1.1  christos 
    254  1.1  christos         /* go to next symbol, update count, len */
    255  1.1  christos         sym++;
    256  1.1  christos         if (--(count[len]) == 0) {
    257  1.2  christos             if (len == mmax) break;
    258  1.1  christos             len = lens[work[sym]];
    259  1.1  christos         }
    260  1.1  christos 
    261  1.1  christos         /* create new sub-table if needed */
    262  1.1  christos         if (len > root && (huff & mask) != low) {
    263  1.1  christos             /* if first time, transition to sub-tables */
    264  1.1  christos             if (drop == 0)
    265  1.1  christos                 drop = root;
    266  1.1  christos 
    267  1.1  christos             /* increment past last table */
    268  1.2  christos             next += mmin;            /* here mmin is 1 << curr */
    269  1.1  christos 
    270  1.1  christos             /* determine length of next table */
    271  1.1  christos             curr = len - drop;
    272  1.1  christos             left = (int)(1 << curr);
    273  1.2  christos             while (curr + drop < mmax) {
    274  1.1  christos                 left -= count[curr + drop];
    275  1.1  christos                 if (left <= 0) break;
    276  1.1  christos                 curr++;
    277  1.1  christos                 left <<= 1;
    278  1.1  christos             }
    279  1.1  christos 
    280  1.1  christos             /* check for enough space */
    281  1.1  christos             used += 1U << curr;
    282  1.1  christos             if (type == LENS && used >= ENOUGH - MAXD)
    283  1.1  christos                 return 1;
    284  1.1  christos 
    285  1.1  christos             /* point entry in root table to sub-table */
    286  1.1  christos             low = huff & mask;
    287  1.1  christos             (*table)[low].op = (unsigned char)curr;
    288  1.1  christos             (*table)[low].bits = (unsigned char)root;
    289  1.1  christos             (*table)[low].val = (unsigned short)(next - *table);
    290  1.1  christos         }
    291  1.1  christos     }
    292  1.1  christos 
    293  1.1  christos     /*
    294  1.1  christos        Fill in rest of table for incomplete codes.  This loop is similar to the
    295  1.1  christos        loop above in incrementing huff for table indices.  It is assumed that
    296  1.1  christos        len is equal to curr + drop, so there is no loop needed to increment
    297  1.1  christos        through high index bits.  When the current sub-table is filled, the loop
    298  1.1  christos        drops back to the root table to fill in any remaining entries there.
    299  1.1  christos      */
    300  1.1  christos     this.op = (unsigned char)64;                /* invalid code marker */
    301  1.1  christos     this.bits = (unsigned char)(len - drop);
    302  1.1  christos     this.val = (unsigned short)0;
    303  1.1  christos     while (huff != 0) {
    304  1.1  christos         /* when done with sub-table, drop back to root table */
    305  1.1  christos         if (drop != 0 && (huff & mask) != low) {
    306  1.1  christos             drop = 0;
    307  1.1  christos             len = root;
    308  1.1  christos             next = *table;
    309  1.1  christos             this.bits = (unsigned char)len;
    310  1.1  christos         }
    311  1.1  christos 
    312  1.1  christos         /* put invalid code marker in table */
    313  1.1  christos         next[huff >> drop] = this;
    314  1.1  christos 
    315  1.1  christos         /* backwards increment the len-bit code huff */
    316  1.1  christos         incr = 1U << (len - 1);
    317  1.1  christos         while (huff & incr)
    318  1.1  christos             incr >>= 1;
    319  1.1  christos         if (incr != 0) {
    320  1.1  christos             huff &= incr - 1;
    321  1.1  christos             huff += incr;
    322  1.1  christos         }
    323  1.1  christos         else
    324  1.1  christos             huff = 0;
    325  1.1  christos     }
    326  1.1  christos 
    327  1.1  christos     /* set return parameters */
    328  1.1  christos     *table += used;
    329  1.1  christos     *bits = root;
    330  1.1  christos     return 0;
    331  1.1  christos }
    332