Home | History | Annotate | Line # | Download | only in zlib
      1  1.5  christos /*	$NetBSD: inftrees.c,v 1.5 2024/09/22 19:12:27 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* inftrees.c -- generate Huffman trees for efficient decoding
      4  1.5  christos  * Copyright (C) 1995-2024 Mark Adler
      5  1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      6  1.1  christos  */
      7  1.1  christos 
      8  1.1  christos #include "zutil.h"
      9  1.1  christos #include "inftrees.h"
     10  1.1  christos 
     11  1.1  christos #define MAXBITS 15
     12  1.1  christos 
     13  1.1  christos const char inflate_copyright[] =
     14  1.5  christos    " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
     15  1.1  christos /*
     16  1.1  christos   If you use the zlib library in a product, an acknowledgment is welcome
     17  1.1  christos   in the documentation of your product. If for some reason you cannot
     18  1.1  christos   include such an acknowledgment, I would appreciate that you keep this
     19  1.1  christos   copyright string in the executable of your product.
     20  1.1  christos  */
     21  1.1  christos 
     22  1.1  christos /*
     23  1.1  christos    Build a set of tables to decode the provided canonical Huffman code.
     24  1.1  christos    The code lengths are lens[0..codes-1].  The result starts at *table,
     25  1.1  christos    whose indices are 0..2^bits-1.  work is a writable array of at least
     26  1.1  christos    lens shorts, which is used as a work area.  type is the type of code
     27  1.1  christos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
     28  1.1  christos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
     29  1.1  christos    on return points to the next available entry's address.  bits is the
     30  1.1  christos    requested root table index bits, and on return it is the actual root
     31  1.1  christos    table index bits.  It will differ if the request is greater than the
     32  1.1  christos    longest code or if it is less than the shortest code.
     33  1.1  christos  */
     34  1.5  christos int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
     35  1.5  christos                                 unsigned codes, code FAR * FAR *table,
     36  1.5  christos                                 unsigned FAR *bits, unsigned short FAR *work) {
     37  1.1  christos     unsigned len;               /* a code's length in bits */
     38  1.1  christos     unsigned sym;               /* index of code symbols */
     39  1.2  christos     unsigned mmin, mmax;        /* minimum and maximum code lengths */
     40  1.1  christos     unsigned root;              /* number of index bits for root table */
     41  1.1  christos     unsigned curr;              /* number of index bits for current table */
     42  1.1  christos     unsigned drop;              /* code bits to drop for sub-table */
     43  1.1  christos     int left;                   /* number of prefix codes available */
     44  1.1  christos     unsigned used;              /* code entries in table used */
     45  1.1  christos     unsigned huff;              /* Huffman code */
     46  1.1  christos     unsigned incr;              /* for incrementing code, index */
     47  1.1  christos     unsigned fill;              /* index for replicating entries */
     48  1.1  christos     unsigned low;               /* low bits for current root entry */
     49  1.1  christos     unsigned mask;              /* mask for low root bits */
     50  1.3  christos     code here;                  /* table entry for duplication */
     51  1.1  christos     code FAR *next;             /* next available space in table */
     52  1.1  christos     const unsigned short FAR *base;     /* base value table to use */
     53  1.1  christos     const unsigned short FAR *extra;    /* extra bits table to use */
     54  1.3  christos     unsigned match;             /* use base and extra for symbol >= match */
     55  1.1  christos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
     56  1.1  christos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
     57  1.1  christos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
     58  1.1  christos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
     59  1.1  christos         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     60  1.1  christos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
     61  1.1  christos         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
     62  1.5  christos         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
     63  1.1  christos     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
     64  1.1  christos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
     65  1.1  christos         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
     66  1.1  christos         8193, 12289, 16385, 24577, 0, 0};
     67  1.1  christos     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
     68  1.1  christos         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
     69  1.1  christos         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
     70  1.1  christos         28, 28, 29, 29, 64, 64};
     71  1.1  christos 
     72  1.1  christos     /*
     73  1.1  christos        Process a set of code lengths to create a canonical Huffman code.  The
     74  1.1  christos        code lengths are lens[0..codes-1].  Each length corresponds to the
     75  1.1  christos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
     76  1.1  christos        symbols by length from short to long, and retaining the symbol order
     77  1.1  christos        for codes with equal lengths.  Then the code starts with all zero bits
     78  1.1  christos        for the first code of the shortest length, and the codes are integer
     79  1.1  christos        increments for the same length, and zeros are appended as the length
     80  1.1  christos        increases.  For the deflate format, these bits are stored backwards
     81  1.1  christos        from their more natural integer increment ordering, and so when the
     82  1.1  christos        decoding tables are built in the large loop below, the integer codes
     83  1.1  christos        are incremented backwards.
     84  1.1  christos 
     85  1.1  christos        This routine assumes, but does not check, that all of the entries in
     86  1.1  christos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
     87  1.1  christos        1..MAXBITS is interpreted as that code length.  zero means that that
     88  1.1  christos        symbol does not occur in this code.
     89  1.1  christos 
     90  1.1  christos        The codes are sorted by computing a count of codes for each length,
     91  1.1  christos        creating from that a table of starting indices for each length in the
     92  1.1  christos        sorted table, and then entering the symbols in order in the sorted
     93  1.1  christos        table.  The sorted table is work[], with that space being provided by
     94  1.1  christos        the caller.
     95  1.1  christos 
     96  1.1  christos        The length counts are used for other purposes as well, i.e. finding
     97  1.1  christos        the minimum and maximum length codes, determining if there are any
     98  1.1  christos        codes at all, checking for a valid set of lengths, and looking ahead
     99  1.1  christos        at length counts to determine sub-table sizes when building the
    100  1.1  christos        decoding tables.
    101  1.1  christos      */
    102  1.1  christos 
    103  1.1  christos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
    104  1.1  christos     for (len = 0; len <= MAXBITS; len++)
    105  1.1  christos         count[len] = 0;
    106  1.1  christos     for (sym = 0; sym < codes; sym++)
    107  1.1  christos         count[lens[sym]]++;
    108  1.1  christos 
    109  1.1  christos     /* bound code lengths, force root to be within code lengths */
    110  1.1  christos     root = *bits;
    111  1.2  christos     for (mmax = MAXBITS; mmax >= 1; mmax--)
    112  1.2  christos         if (count[mmax] != 0) break;
    113  1.2  christos     if (root > mmax) root = mmax;
    114  1.2  christos     if (mmax == 0) {                     /* no symbols to code at all */
    115  1.3  christos         here.op = (unsigned char)64;    /* invalid code marker */
    116  1.3  christos         here.bits = (unsigned char)1;
    117  1.3  christos         here.val = (unsigned short)0;
    118  1.3  christos         *(*table)++ = here;             /* make a table to force an error */
    119  1.3  christos         *(*table)++ = here;
    120  1.1  christos         *bits = 1;
    121  1.1  christos         return 0;     /* no symbols, but wait for decoding to report error */
    122  1.1  christos     }
    123  1.2  christos     for (mmin = 1; mmin <= MAXBITS; mmin++)
    124  1.2  christos         if (count[mmin] != 0) break;
    125  1.2  christos     if (root < mmin) root = mmin;
    126  1.1  christos 
    127  1.1  christos     /* check for an over-subscribed or incomplete set of lengths */
    128  1.1  christos     left = 1;
    129  1.1  christos     for (len = 1; len <= MAXBITS; len++) {
    130  1.1  christos         left <<= 1;
    131  1.1  christos         left -= count[len];
    132  1.1  christos         if (left < 0) return -1;        /* over-subscribed */
    133  1.1  christos     }
    134  1.2  christos     if (left > 0 && (type == CODES || mmax != 1))
    135  1.1  christos         return -1;                      /* incomplete set */
    136  1.1  christos 
    137  1.1  christos     /* generate offsets into symbol table for each length for sorting */
    138  1.1  christos     offs[1] = 0;
    139  1.1  christos     for (len = 1; len < MAXBITS; len++)
    140  1.1  christos         offs[len + 1] = offs[len] + count[len];
    141  1.1  christos 
    142  1.1  christos     /* sort symbols by length, by symbol order within each length */
    143  1.1  christos     for (sym = 0; sym < codes; sym++)
    144  1.1  christos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
    145  1.1  christos 
    146  1.1  christos     /*
    147  1.1  christos        Create and fill in decoding tables.  In this loop, the table being
    148  1.1  christos        filled is at next and has curr index bits.  The code being used is huff
    149  1.1  christos        with length len.  That code is converted to an index by dropping drop
    150  1.1  christos        bits off of the bottom.  For codes where len is less than drop + curr,
    151  1.1  christos        those top drop + curr - len bits are incremented through all values to
    152  1.1  christos        fill the table with replicated entries.
    153  1.1  christos 
    154  1.1  christos        root is the number of index bits for the root table.  When len exceeds
    155  1.1  christos        root, sub-tables are created pointed to by the root entry with an index
    156  1.1  christos        of the low root bits of huff.  This is saved in low to check for when a
    157  1.1  christos        new sub-table should be started.  drop is zero when the root table is
    158  1.1  christos        being filled, and drop is root when sub-tables are being filled.
    159  1.1  christos 
    160  1.1  christos        When a new sub-table is needed, it is necessary to look ahead in the
    161  1.1  christos        code lengths to determine what size sub-table is needed.  The length
    162  1.1  christos        counts are used for this, and so count[] is decremented as codes are
    163  1.1  christos        entered in the tables.
    164  1.1  christos 
    165  1.1  christos        used keeps track of how many table entries have been allocated from the
    166  1.3  christos        provided *table space.  It is checked for LENS and DIST tables against
    167  1.3  christos        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
    168  1.3  christos        the initial root table size constants.  See the comments in inftrees.h
    169  1.3  christos        for more information.
    170  1.1  christos 
    171  1.1  christos        sym increments through all symbols, and the loop terminates when
    172  1.2  christos        all codes of length mmax, i.e. all codes, have been processed.  This
    173  1.1  christos        routine permits incomplete codes, so another loop after this one fills
    174  1.1  christos        in the rest of the decoding tables with invalid code markers.
    175  1.1  christos      */
    176  1.1  christos 
    177  1.1  christos     /* set up for code type */
    178  1.1  christos     switch (type) {
    179  1.1  christos     case CODES:
    180  1.1  christos         base = extra = work;    /* dummy value--not used */
    181  1.3  christos         match = 20;
    182  1.1  christos         break;
    183  1.1  christos     case LENS:
    184  1.1  christos         base = lbase;
    185  1.1  christos         extra = lext;
    186  1.3  christos         match = 257;
    187  1.1  christos         break;
    188  1.3  christos     default:    /* DISTS */
    189  1.1  christos         base = dbase;
    190  1.1  christos         extra = dext;
    191  1.3  christos         match = 0;
    192  1.1  christos     }
    193  1.1  christos 
    194  1.1  christos     /* initialize state for loop */
    195  1.1  christos     huff = 0;                   /* starting code */
    196  1.1  christos     sym = 0;                    /* starting code symbol */
    197  1.2  christos     len = mmin;                  /* starting code length */
    198  1.1  christos     next = *table;              /* current table to fill in */
    199  1.1  christos     curr = root;                /* current table index bits */
    200  1.1  christos     drop = 0;                   /* current bits to drop from code for index */
    201  1.1  christos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
    202  1.1  christos     used = 1U << root;          /* use root table entries */
    203  1.1  christos     mask = used - 1;            /* mask for comparing low */
    204  1.1  christos 
    205  1.1  christos     /* check available table space */
    206  1.3  christos     if ((type == LENS && used > ENOUGH_LENS) ||
    207  1.3  christos         (type == DISTS && used > ENOUGH_DISTS))
    208  1.1  christos         return 1;
    209  1.1  christos 
    210  1.1  christos     /* process all codes and make table entries */
    211  1.1  christos     for (;;) {
    212  1.1  christos         /* create table entry */
    213  1.3  christos         here.bits = (unsigned char)(len - drop);
    214  1.3  christos         if (work[sym] + 1U < match) {
    215  1.3  christos             here.op = (unsigned char)0;
    216  1.3  christos             here.val = work[sym];
    217  1.1  christos         }
    218  1.3  christos         else if (work[sym] >= match) {
    219  1.3  christos             here.op = (unsigned char)(extra[work[sym] - match]);
    220  1.3  christos             here.val = base[work[sym] - match];
    221  1.1  christos         }
    222  1.1  christos         else {
    223  1.3  christos             here.op = (unsigned char)(32 + 64);         /* end of block */
    224  1.3  christos             here.val = 0;
    225  1.1  christos         }
    226  1.1  christos 
    227  1.1  christos         /* replicate for those indices with low len bits equal to huff */
    228  1.1  christos         incr = 1U << (len - drop);
    229  1.1  christos         fill = 1U << curr;
    230  1.2  christos         mmin = fill;                 /* save offset to next table */
    231  1.1  christos         do {
    232  1.1  christos             fill -= incr;
    233  1.3  christos             next[(huff >> drop) + fill] = here;
    234  1.1  christos         } while (fill != 0);
    235  1.1  christos 
    236  1.1  christos         /* backwards increment the len-bit code huff */
    237  1.1  christos         incr = 1U << (len - 1);
    238  1.1  christos         while (huff & incr)
    239  1.1  christos             incr >>= 1;
    240  1.1  christos         if (incr != 0) {
    241  1.1  christos             huff &= incr - 1;
    242  1.1  christos             huff += incr;
    243  1.1  christos         }
    244  1.1  christos         else
    245  1.1  christos             huff = 0;
    246  1.1  christos 
    247  1.1  christos         /* go to next symbol, update count, len */
    248  1.1  christos         sym++;
    249  1.1  christos         if (--(count[len]) == 0) {
    250  1.2  christos             if (len == mmax) break;
    251  1.1  christos             len = lens[work[sym]];
    252  1.1  christos         }
    253  1.1  christos 
    254  1.1  christos         /* create new sub-table if needed */
    255  1.1  christos         if (len > root && (huff & mask) != low) {
    256  1.1  christos             /* if first time, transition to sub-tables */
    257  1.1  christos             if (drop == 0)
    258  1.1  christos                 drop = root;
    259  1.1  christos 
    260  1.1  christos             /* increment past last table */
    261  1.2  christos             next += mmin;            /* here mmin is 1 << curr */
    262  1.1  christos 
    263  1.1  christos             /* determine length of next table */
    264  1.1  christos             curr = len - drop;
    265  1.1  christos             left = (int)(1 << curr);
    266  1.2  christos             while (curr + drop < mmax) {
    267  1.1  christos                 left -= count[curr + drop];
    268  1.1  christos                 if (left <= 0) break;
    269  1.1  christos                 curr++;
    270  1.1  christos                 left <<= 1;
    271  1.1  christos             }
    272  1.1  christos 
    273  1.1  christos             /* check for enough space */
    274  1.1  christos             used += 1U << curr;
    275  1.3  christos             if ((type == LENS && used > ENOUGH_LENS) ||
    276  1.3  christos                 (type == DISTS && used > ENOUGH_DISTS))
    277  1.1  christos                 return 1;
    278  1.1  christos 
    279  1.1  christos             /* point entry in root table to sub-table */
    280  1.1  christos             low = huff & mask;
    281  1.1  christos             (*table)[low].op = (unsigned char)curr;
    282  1.1  christos             (*table)[low].bits = (unsigned char)root;
    283  1.1  christos             (*table)[low].val = (unsigned short)(next - *table);
    284  1.1  christos         }
    285  1.1  christos     }
    286  1.1  christos 
    287  1.3  christos     /* fill in remaining table entry if code is incomplete (guaranteed to have
    288  1.3  christos        at most one remaining entry, since if the code is incomplete, the
    289  1.3  christos        maximum code length that was allowed to get this far is one bit) */
    290  1.3  christos     if (huff != 0) {
    291  1.3  christos         here.op = (unsigned char)64;            /* invalid code marker */
    292  1.3  christos         here.bits = (unsigned char)(len - drop);
    293  1.3  christos         here.val = (unsigned short)0;
    294  1.3  christos         next[huff] = here;
    295  1.1  christos     }
    296  1.1  christos 
    297  1.1  christos     /* set return parameters */
    298  1.1  christos     *table += used;
    299  1.1  christos     *bits = root;
    300  1.1  christos     return 0;
    301  1.1  christos }
    302