Home | History | Annotate | Line # | Download | only in infback9
      1      1.1  christos /* inftree9.c -- generate Huffman trees for efficient decoding
      2  1.1.1.4  christos  * Copyright (C) 1995-2024 Mark Adler
      3      1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      4      1.1  christos  */
      5      1.1  christos 
      6      1.1  christos #include "zutil.h"
      7      1.1  christos #include "inftree9.h"
      8      1.1  christos 
      9      1.1  christos #define MAXBITS 15
     10      1.1  christos 
     11      1.1  christos const char inflate9_copyright[] =
     12  1.1.1.4  christos    " inflate9 1.3.1 Copyright 1995-2024 Mark Adler ";
     13      1.1  christos /*
     14      1.1  christos   If you use the zlib library in a product, an acknowledgment is welcome
     15      1.1  christos   in the documentation of your product. If for some reason you cannot
     16      1.1  christos   include such an acknowledgment, I would appreciate that you keep this
     17      1.1  christos   copyright string in the executable of your product.
     18      1.1  christos  */
     19      1.1  christos 
     20      1.1  christos /*
     21      1.1  christos    Build a set of tables to decode the provided canonical Huffman code.
     22      1.1  christos    The code lengths are lens[0..codes-1].  The result starts at *table,
     23      1.1  christos    whose indices are 0..2^bits-1.  work is a writable array of at least
     24      1.1  christos    lens shorts, which is used as a work area.  type is the type of code
     25      1.1  christos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
     26      1.1  christos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
     27      1.1  christos    on return points to the next available entry's address.  bits is the
     28      1.1  christos    requested root table index bits, and on return it is the actual root
     29      1.1  christos    table index bits.  It will differ if the request is greater than the
     30      1.1  christos    longest code or if it is less than the shortest code.
     31      1.1  christos  */
     32  1.1.1.4  christos int inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes,
     33  1.1.1.4  christos                    code FAR * FAR *table, unsigned FAR *bits,
     34  1.1.1.4  christos                    unsigned short FAR *work) {
     35      1.1  christos     unsigned len;               /* a code's length in bits */
     36      1.1  christos     unsigned sym;               /* index of code symbols */
     37      1.1  christos     unsigned min, max;          /* minimum and maximum code lengths */
     38      1.1  christos     unsigned root;              /* number of index bits for root table */
     39      1.1  christos     unsigned curr;              /* number of index bits for current table */
     40      1.1  christos     unsigned drop;              /* code bits to drop for sub-table */
     41      1.1  christos     int left;                   /* number of prefix codes available */
     42      1.1  christos     unsigned used;              /* code entries in table used */
     43      1.1  christos     unsigned huff;              /* Huffman code */
     44      1.1  christos     unsigned incr;              /* for incrementing code, index */
     45      1.1  christos     unsigned fill;              /* index for replicating entries */
     46      1.1  christos     unsigned low;               /* low bits for current root entry */
     47      1.1  christos     unsigned mask;              /* mask for low root bits */
     48      1.1  christos     code this;                  /* table entry for duplication */
     49      1.1  christos     code FAR *next;             /* next available space in table */
     50      1.1  christos     const unsigned short FAR *base;     /* base value table to use */
     51      1.1  christos     const unsigned short FAR *extra;    /* extra bits table to use */
     52      1.1  christos     int end;                    /* use base and extra for symbol > end */
     53      1.1  christos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
     54      1.1  christos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
     55      1.1  christos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
     56      1.1  christos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17,
     57      1.1  christos         19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
     58      1.1  christos         131, 163, 195, 227, 3, 0, 0};
     59      1.1  christos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
     60      1.1  christos         128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
     61      1.1  christos         130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132,
     62  1.1.1.4  christos         133, 133, 133, 133, 144, 203, 77};
     63      1.1  christos     static const unsigned short dbase[32] = { /* Distance codes 0..31 base */
     64      1.1  christos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49,
     65      1.1  christos         65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073,
     66      1.1  christos         4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153};
     67      1.1  christos     static const unsigned short dext[32] = { /* Distance codes 0..31 extra */
     68      1.1  christos         128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132,
     69      1.1  christos         133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138,
     70      1.1  christos         139, 139, 140, 140, 141, 141, 142, 142};
     71      1.1  christos 
     72      1.1  christos     /*
     73      1.1  christos        Process a set of code lengths to create a canonical Huffman code.  The
     74      1.1  christos        code lengths are lens[0..codes-1].  Each length corresponds to the
     75      1.1  christos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
     76      1.1  christos        symbols by length from short to long, and retaining the symbol order
     77      1.1  christos        for codes with equal lengths.  Then the code starts with all zero bits
     78      1.1  christos        for the first code of the shortest length, and the codes are integer
     79      1.1  christos        increments for the same length, and zeros are appended as the length
     80      1.1  christos        increases.  For the deflate format, these bits are stored backwards
     81      1.1  christos        from their more natural integer increment ordering, and so when the
     82      1.1  christos        decoding tables are built in the large loop below, the integer codes
     83      1.1  christos        are incremented backwards.
     84      1.1  christos 
     85      1.1  christos        This routine assumes, but does not check, that all of the entries in
     86      1.1  christos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
     87      1.1  christos        1..MAXBITS is interpreted as that code length.  zero means that that
     88      1.1  christos        symbol does not occur in this code.
     89      1.1  christos 
     90      1.1  christos        The codes are sorted by computing a count of codes for each length,
     91      1.1  christos        creating from that a table of starting indices for each length in the
     92      1.1  christos        sorted table, and then entering the symbols in order in the sorted
     93      1.1  christos        table.  The sorted table is work[], with that space being provided by
     94      1.1  christos        the caller.
     95      1.1  christos 
     96      1.1  christos        The length counts are used for other purposes as well, i.e. finding
     97      1.1  christos        the minimum and maximum length codes, determining if there are any
     98      1.1  christos        codes at all, checking for a valid set of lengths, and looking ahead
     99      1.1  christos        at length counts to determine sub-table sizes when building the
    100      1.1  christos        decoding tables.
    101      1.1  christos      */
    102      1.1  christos 
    103      1.1  christos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
    104      1.1  christos     for (len = 0; len <= MAXBITS; len++)
    105      1.1  christos         count[len] = 0;
    106      1.1  christos     for (sym = 0; sym < codes; sym++)
    107      1.1  christos         count[lens[sym]]++;
    108      1.1  christos 
    109      1.1  christos     /* bound code lengths, force root to be within code lengths */
    110      1.1  christos     root = *bits;
    111      1.1  christos     for (max = MAXBITS; max >= 1; max--)
    112      1.1  christos         if (count[max] != 0) break;
    113      1.1  christos     if (root > max) root = max;
    114      1.1  christos     if (max == 0) return -1;            /* no codes! */
    115      1.1  christos     for (min = 1; min <= MAXBITS; min++)
    116      1.1  christos         if (count[min] != 0) break;
    117      1.1  christos     if (root < min) root = min;
    118      1.1  christos 
    119      1.1  christos     /* check for an over-subscribed or incomplete set of lengths */
    120      1.1  christos     left = 1;
    121      1.1  christos     for (len = 1; len <= MAXBITS; len++) {
    122      1.1  christos         left <<= 1;
    123      1.1  christos         left -= count[len];
    124      1.1  christos         if (left < 0) return -1;        /* over-subscribed */
    125      1.1  christos     }
    126      1.1  christos     if (left > 0 && (type == CODES || max != 1))
    127      1.1  christos         return -1;                      /* incomplete set */
    128      1.1  christos 
    129      1.1  christos     /* generate offsets into symbol table for each length for sorting */
    130      1.1  christos     offs[1] = 0;
    131      1.1  christos     for (len = 1; len < MAXBITS; len++)
    132      1.1  christos         offs[len + 1] = offs[len] + count[len];
    133      1.1  christos 
    134      1.1  christos     /* sort symbols by length, by symbol order within each length */
    135      1.1  christos     for (sym = 0; sym < codes; sym++)
    136      1.1  christos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
    137      1.1  christos 
    138      1.1  christos     /*
    139      1.1  christos        Create and fill in decoding tables.  In this loop, the table being
    140      1.1  christos        filled is at next and has curr index bits.  The code being used is huff
    141      1.1  christos        with length len.  That code is converted to an index by dropping drop
    142      1.1  christos        bits off of the bottom.  For codes where len is less than drop + curr,
    143      1.1  christos        those top drop + curr - len bits are incremented through all values to
    144      1.1  christos        fill the table with replicated entries.
    145      1.1  christos 
    146      1.1  christos        root is the number of index bits for the root table.  When len exceeds
    147      1.1  christos        root, sub-tables are created pointed to by the root entry with an index
    148      1.1  christos        of the low root bits of huff.  This is saved in low to check for when a
    149      1.1  christos        new sub-table should be started.  drop is zero when the root table is
    150      1.1  christos        being filled, and drop is root when sub-tables are being filled.
    151      1.1  christos 
    152      1.1  christos        When a new sub-table is needed, it is necessary to look ahead in the
    153      1.1  christos        code lengths to determine what size sub-table is needed.  The length
    154      1.1  christos        counts are used for this, and so count[] is decremented as codes are
    155      1.1  christos        entered in the tables.
    156      1.1  christos 
    157      1.1  christos        used keeps track of how many table entries have been allocated from the
    158  1.1.1.2  christos        provided *table space.  It is checked for LENS and DIST tables against
    159  1.1.1.2  christos        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
    160  1.1.1.2  christos        the initial root table size constants.  See the comments in inftree9.h
    161  1.1.1.2  christos        for more information.
    162      1.1  christos 
    163      1.1  christos        sym increments through all symbols, and the loop terminates when
    164      1.1  christos        all codes of length max, i.e. all codes, have been processed.  This
    165      1.1  christos        routine permits incomplete codes, so another loop after this one fills
    166      1.1  christos        in the rest of the decoding tables with invalid code markers.
    167      1.1  christos      */
    168      1.1  christos 
    169      1.1  christos     /* set up for code type */
    170      1.1  christos     switch (type) {
    171      1.1  christos     case CODES:
    172      1.1  christos         base = extra = work;    /* dummy value--not used */
    173      1.1  christos         end = 19;
    174      1.1  christos         break;
    175      1.1  christos     case LENS:
    176      1.1  christos         base = lbase;
    177      1.1  christos         base -= 257;
    178      1.1  christos         extra = lext;
    179      1.1  christos         extra -= 257;
    180      1.1  christos         end = 256;
    181      1.1  christos         break;
    182      1.1  christos     default:            /* DISTS */
    183      1.1  christos         base = dbase;
    184      1.1  christos         extra = dext;
    185      1.1  christos         end = -1;
    186      1.1  christos     }
    187      1.1  christos 
    188      1.1  christos     /* initialize state for loop */
    189      1.1  christos     huff = 0;                   /* starting code */
    190      1.1  christos     sym = 0;                    /* starting code symbol */
    191      1.1  christos     len = min;                  /* starting code length */
    192      1.1  christos     next = *table;              /* current table to fill in */
    193      1.1  christos     curr = root;                /* current table index bits */
    194      1.1  christos     drop = 0;                   /* current bits to drop from code for index */
    195      1.1  christos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
    196      1.1  christos     used = 1U << root;          /* use root table entries */
    197      1.1  christos     mask = used - 1;            /* mask for comparing low */
    198      1.1  christos 
    199      1.1  christos     /* check available table space */
    200  1.1.1.2  christos     if ((type == LENS && used >= ENOUGH_LENS) ||
    201  1.1.1.2  christos         (type == DISTS && used >= ENOUGH_DISTS))
    202      1.1  christos         return 1;
    203      1.1  christos 
    204      1.1  christos     /* process all codes and make table entries */
    205      1.1  christos     for (;;) {
    206      1.1  christos         /* create table entry */
    207      1.1  christos         this.bits = (unsigned char)(len - drop);
    208      1.1  christos         if ((int)(work[sym]) < end) {
    209      1.1  christos             this.op = (unsigned char)0;
    210      1.1  christos             this.val = work[sym];
    211      1.1  christos         }
    212      1.1  christos         else if ((int)(work[sym]) > end) {
    213      1.1  christos             this.op = (unsigned char)(extra[work[sym]]);
    214      1.1  christos             this.val = base[work[sym]];
    215      1.1  christos         }
    216      1.1  christos         else {
    217      1.1  christos             this.op = (unsigned char)(32 + 64);         /* end of block */
    218      1.1  christos             this.val = 0;
    219      1.1  christos         }
    220      1.1  christos 
    221      1.1  christos         /* replicate for those indices with low len bits equal to huff */
    222      1.1  christos         incr = 1U << (len - drop);
    223      1.1  christos         fill = 1U << curr;
    224      1.1  christos         do {
    225      1.1  christos             fill -= incr;
    226      1.1  christos             next[(huff >> drop) + fill] = this;
    227      1.1  christos         } while (fill != 0);
    228      1.1  christos 
    229      1.1  christos         /* backwards increment the len-bit code huff */
    230      1.1  christos         incr = 1U << (len - 1);
    231      1.1  christos         while (huff & incr)
    232      1.1  christos             incr >>= 1;
    233      1.1  christos         if (incr != 0) {
    234      1.1  christos             huff &= incr - 1;
    235      1.1  christos             huff += incr;
    236      1.1  christos         }
    237      1.1  christos         else
    238      1.1  christos             huff = 0;
    239      1.1  christos 
    240      1.1  christos         /* go to next symbol, update count, len */
    241      1.1  christos         sym++;
    242      1.1  christos         if (--(count[len]) == 0) {
    243      1.1  christos             if (len == max) break;
    244      1.1  christos             len = lens[work[sym]];
    245      1.1  christos         }
    246      1.1  christos 
    247      1.1  christos         /* create new sub-table if needed */
    248      1.1  christos         if (len > root && (huff & mask) != low) {
    249      1.1  christos             /* if first time, transition to sub-tables */
    250      1.1  christos             if (drop == 0)
    251      1.1  christos                 drop = root;
    252      1.1  christos 
    253      1.1  christos             /* increment past last table */
    254      1.1  christos             next += 1U << curr;
    255      1.1  christos 
    256      1.1  christos             /* determine length of next table */
    257      1.1  christos             curr = len - drop;
    258      1.1  christos             left = (int)(1 << curr);
    259      1.1  christos             while (curr + drop < max) {
    260      1.1  christos                 left -= count[curr + drop];
    261      1.1  christos                 if (left <= 0) break;
    262      1.1  christos                 curr++;
    263      1.1  christos                 left <<= 1;
    264      1.1  christos             }
    265      1.1  christos 
    266      1.1  christos             /* check for enough space */
    267      1.1  christos             used += 1U << curr;
    268  1.1.1.2  christos             if ((type == LENS && used >= ENOUGH_LENS) ||
    269  1.1.1.2  christos                 (type == DISTS && used >= ENOUGH_DISTS))
    270      1.1  christos                 return 1;
    271      1.1  christos 
    272      1.1  christos             /* point entry in root table to sub-table */
    273      1.1  christos             low = huff & mask;
    274      1.1  christos             (*table)[low].op = (unsigned char)curr;
    275      1.1  christos             (*table)[low].bits = (unsigned char)root;
    276      1.1  christos             (*table)[low].val = (unsigned short)(next - *table);
    277      1.1  christos         }
    278      1.1  christos     }
    279      1.1  christos 
    280      1.1  christos     /*
    281      1.1  christos        Fill in rest of table for incomplete codes.  This loop is similar to the
    282      1.1  christos        loop above in incrementing huff for table indices.  It is assumed that
    283      1.1  christos        len is equal to curr + drop, so there is no loop needed to increment
    284      1.1  christos        through high index bits.  When the current sub-table is filled, the loop
    285      1.1  christos        drops back to the root table to fill in any remaining entries there.
    286      1.1  christos      */
    287      1.1  christos     this.op = (unsigned char)64;                /* invalid code marker */
    288      1.1  christos     this.bits = (unsigned char)(len - drop);
    289      1.1  christos     this.val = (unsigned short)0;
    290      1.1  christos     while (huff != 0) {
    291      1.1  christos         /* when done with sub-table, drop back to root table */
    292      1.1  christos         if (drop != 0 && (huff & mask) != low) {
    293      1.1  christos             drop = 0;
    294      1.1  christos             len = root;
    295      1.1  christos             next = *table;
    296      1.1  christos             curr = root;
    297      1.1  christos             this.bits = (unsigned char)len;
    298      1.1  christos         }
    299      1.1  christos 
    300      1.1  christos         /* put invalid code marker in table */
    301      1.1  christos         next[huff >> drop] = this;
    302      1.1  christos 
    303      1.1  christos         /* backwards increment the len-bit code huff */
    304      1.1  christos         incr = 1U << (len - 1);
    305      1.1  christos         while (huff & incr)
    306      1.1  christos             incr >>= 1;
    307      1.1  christos         if (incr != 0) {
    308      1.1  christos             huff &= incr - 1;
    309      1.1  christos             huff += incr;
    310      1.1  christos         }
    311      1.1  christos         else
    312      1.1  christos             huff = 0;
    313      1.1  christos     }
    314      1.1  christos 
    315      1.1  christos     /* set return parameters */
    316      1.1  christos     *table += used;
    317      1.1  christos     *bits = root;
    318      1.1  christos     return 0;
    319      1.1  christos }
    320