126fa459cSmrg/* Copyright 2016 Google Inc. All Rights Reserved.
226fa459cSmrg
326fa459cSmrg   Distributed under MIT license.
426fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
526fa459cSmrg*/
626fa459cSmrg
726fa459cSmrg/**
826fa459cSmrg * @file
926fa459cSmrg * Common constants used in decoder and encoder API.
1026fa459cSmrg */
1126fa459cSmrg
1226fa459cSmrg#ifndef BROTLI_COMMON_CONSTANTS_H_
1326fa459cSmrg#define BROTLI_COMMON_CONSTANTS_H_
1426fa459cSmrg
1526fa459cSmrg#include "./platform.h"
1626fa459cSmrg#include <brotli/port.h>
1726fa459cSmrg#include <brotli/types.h>
1826fa459cSmrg
1926fa459cSmrg/* Specification: 7.3. Encoding of the context map */
2026fa459cSmrg#define BROTLI_CONTEXT_MAP_MAX_RLE 16
2126fa459cSmrg
2226fa459cSmrg/* Specification: 2. Compressed representation overview */
2326fa459cSmrg#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
2426fa459cSmrg
2526fa459cSmrg/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
2626fa459cSmrg#define BROTLI_NUM_LITERAL_SYMBOLS 256
2726fa459cSmrg#define BROTLI_NUM_COMMAND_SYMBOLS 704
2826fa459cSmrg#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
2926fa459cSmrg#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
3026fa459cSmrg                                        BROTLI_CONTEXT_MAP_MAX_RLE)
3126fa459cSmrg#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
3226fa459cSmrg
3326fa459cSmrg/* Specification: 3.5. Complex prefix codes */
3426fa459cSmrg#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
3526fa459cSmrg#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
3626fa459cSmrg#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
3726fa459cSmrg/* "code length of 8 is repeated" */
3826fa459cSmrg#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
3926fa459cSmrg
4026fa459cSmrg/* "Large Window Brotli" */
4126fa459cSmrg
4226fa459cSmrg/**
4326fa459cSmrg * The theoretical maximum number of distance bits specified for large window
4426fa459cSmrg * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
4526fa459cSmrg * encoders and decoders only support up to 30 max distance bits, the value is
4626fa459cSmrg * set to 62 because it affects the large window brotli file format.
4726fa459cSmrg * Specifically, it affects the encoding of simple huffman tree for distances,
4826fa459cSmrg * see Specification RFC 7932 chapter 3.4.
4926fa459cSmrg */
5026fa459cSmrg#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
5126fa459cSmrg#define BROTLI_LARGE_MIN_WBITS 10
5226fa459cSmrg/**
5326fa459cSmrg * The maximum supported large brotli window bits by the encoder and decoder.
5426fa459cSmrg * Large window brotli allows up to 62 bits, however the current encoder and
5526fa459cSmrg * decoder, designed for 32-bit integers, only support up to 30 bits maximum.
5626fa459cSmrg */
5726fa459cSmrg#define BROTLI_LARGE_MAX_WBITS 30
5826fa459cSmrg
5926fa459cSmrg/* Specification: 4. Encoding of distances */
6026fa459cSmrg#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
6126fa459cSmrg/**
6226fa459cSmrg * Maximal number of "postfix" bits.
6326fa459cSmrg *
6426fa459cSmrg * Number of "postfix" bits is stored as 2 bits in meta-block header.
6526fa459cSmrg */
6626fa459cSmrg#define BROTLI_MAX_NPOSTFIX 3
6726fa459cSmrg#define BROTLI_MAX_NDIRECT 120
6826fa459cSmrg#define BROTLI_MAX_DISTANCE_BITS 24U
6926fa459cSmrg#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
7026fa459cSmrg    BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) +                    \
7126fa459cSmrg    ((MAXNBITS) << ((NPOSTFIX) + 1)))
7226fa459cSmrg/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
7326fa459cSmrg#define BROTLI_NUM_DISTANCE_SYMBOLS \
7426fa459cSmrg    BROTLI_DISTANCE_ALPHABET_SIZE(  \
7526fa459cSmrg        BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
7626fa459cSmrg
7726fa459cSmrg/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
7826fa459cSmrg   brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
7926fa459cSmrg   NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
8026fa459cSmrg#define BROTLI_MAX_DISTANCE 0x3FFFFFC
8126fa459cSmrg
8226fa459cSmrg/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
8326fa459cSmrg   allows safe distance calculation without overflows, given the distance
8426fa459cSmrg   alphabet size is limited to corresponding size
8526fa459cSmrg   (see kLargeWindowDistanceCodeLimits). */
8626fa459cSmrg#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
8726fa459cSmrg
8826fa459cSmrg
8926fa459cSmrg/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
9026fa459cSmrg#define BROTLI_NUM_INS_COPY_CODES 24
9126fa459cSmrg
9226fa459cSmrg/* 7.1. Context modes and context ID lookup for literals */
9326fa459cSmrg/* "context IDs for literals are in the range of 0..63" */
9426fa459cSmrg#define BROTLI_LITERAL_CONTEXT_BITS 6
9526fa459cSmrg
9626fa459cSmrg/* 7.2. Context ID for distances */
9726fa459cSmrg#define BROTLI_DISTANCE_CONTEXT_BITS 2
9826fa459cSmrg
9926fa459cSmrg/* 9.1. Format of the Stream Header */
10026fa459cSmrg/* Number of slack bytes for window size. Don't confuse
10126fa459cSmrg   with BROTLI_NUM_DISTANCE_SHORT_CODES. */
10226fa459cSmrg#define BROTLI_WINDOW_GAP 16
10326fa459cSmrg#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
10426fa459cSmrg
10526fa459cSmrgtypedef struct BrotliDistanceCodeLimit {
10626fa459cSmrg  uint32_t max_alphabet_size;
10726fa459cSmrg  uint32_t max_distance;
10826fa459cSmrg} BrotliDistanceCodeLimit;
10926fa459cSmrg
11026fa459cSmrg/* This function calculates maximal size of distance alphabet, such that the
11126fa459cSmrg   distances greater than the given values can not be represented.
11226fa459cSmrg
11326fa459cSmrg   This limits are designed to support fast and safe 32-bit decoders.
11426fa459cSmrg   "32-bit" means that signed integer values up to ((1 << 31) - 1) could be
11526fa459cSmrg   safely expressed.
11626fa459cSmrg
11726fa459cSmrg   Brotli distance alphabet symbols do not represent consecutive distance
11826fa459cSmrg   ranges. Each distance alphabet symbol (excluding direct distances and short
11926fa459cSmrg   codes), represent interleaved (for NPOSTFIX > 0) range of distances.
12026fa459cSmrg   A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
12126fa459cSmrg   range. Two consecutive groups require the same amount of "extra bits".
12226fa459cSmrg
12326fa459cSmrg   It is important that distance alphabet represents complete "groups".
12426fa459cSmrg   To avoid complex logic on encoder side about interleaved ranges
12526fa459cSmrg   it was decided to restrict both sides to complete distance code "groups".
12626fa459cSmrg */
12726fa459cSmrgBROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
12826fa459cSmrg    uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
12926fa459cSmrg  BrotliDistanceCodeLimit result;
13026fa459cSmrg  /* Marking this function as unused, because not all files
13126fa459cSmrg     including "constants.h" use it -> compiler warns about that. */
13226fa459cSmrg  BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
13326fa459cSmrg  if (max_distance <= ndirect) {
13426fa459cSmrg    /* This case never happens / exists only for the sake of completeness. */
13526fa459cSmrg    result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
13626fa459cSmrg    result.max_distance = max_distance;
13726fa459cSmrg    return result;
13826fa459cSmrg  } else {
13926fa459cSmrg    /* The first prohibited value. */
14026fa459cSmrg    uint32_t forbidden_distance = max_distance + 1;
14126fa459cSmrg    /* Subtract "directly" encoded region. */
14226fa459cSmrg    uint32_t offset = forbidden_distance - ndirect - 1;
14326fa459cSmrg    uint32_t ndistbits = 0;
14426fa459cSmrg    uint32_t tmp;
14526fa459cSmrg    uint32_t half;
14626fa459cSmrg    uint32_t group;
14726fa459cSmrg    /* Postfix for the last dcode in the group. */
14826fa459cSmrg    uint32_t postfix = (1u << npostfix) - 1;
14926fa459cSmrg    uint32_t extra;
15026fa459cSmrg    uint32_t start;
15126fa459cSmrg    /* Remove postfix and "head-start". */
15226fa459cSmrg    offset = (offset >> npostfix) + 4;
15326fa459cSmrg    /* Calculate the number of distance bits. */
15426fa459cSmrg    tmp = offset / 2;
15526fa459cSmrg    /* Poor-man's log2floor, to avoid extra dependencies. */
15626fa459cSmrg    while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
15726fa459cSmrg    /* One bit is covered with subrange addressing ("half"). */
15826fa459cSmrg    ndistbits--;
15926fa459cSmrg    /* Find subrange. */
16026fa459cSmrg    half = (offset >> ndistbits) & 1;
16126fa459cSmrg    /* Calculate the "group" part of dcode. */
16226fa459cSmrg    group = ((ndistbits - 1) << 1) | half;
16326fa459cSmrg    /* Calculated "group" covers the prohibited distance value. */
16426fa459cSmrg    if (group == 0) {
16526fa459cSmrg      /* This case is added for correctness; does not occur for limit > 128. */
16626fa459cSmrg      result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
16726fa459cSmrg      result.max_distance = ndirect;
16826fa459cSmrg      return result;
16926fa459cSmrg    }
17026fa459cSmrg    /* Decrement "group", so it is the last permitted "group". */
17126fa459cSmrg    group--;
17226fa459cSmrg    /* After group was decremented, ndistbits and half must be recalculated. */
17326fa459cSmrg    ndistbits = (group >> 1) + 1;
17426fa459cSmrg    /* The last available distance in the subrange has all extra bits set. */
17526fa459cSmrg    extra = (1u << ndistbits) - 1;
17626fa459cSmrg    /* Calculate region start. NB: ndistbits >= 1. */
17726fa459cSmrg    start = (1u << (ndistbits + 1)) - 4;
17826fa459cSmrg    /* Move to subregion. */
17926fa459cSmrg    start += (group & 1) << ndistbits;
18026fa459cSmrg    /* Calculate the alphabet size. */
18126fa459cSmrg    result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
18226fa459cSmrg        BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
18326fa459cSmrg    /* Calculate the maximal distance representable by alphabet. */
18426fa459cSmrg    result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
18526fa459cSmrg    return result;
18626fa459cSmrg  }
18726fa459cSmrg}
18826fa459cSmrg
18926fa459cSmrg/* Represents the range of values belonging to a prefix code:
19026fa459cSmrg   [offset, offset + 2^nbits) */
19126fa459cSmrgtypedef struct {
19226fa459cSmrg  uint16_t offset;
19326fa459cSmrg  uint8_t nbits;
19426fa459cSmrg} BrotliPrefixCodeRange;
19526fa459cSmrg
19626fa459cSmrg/* "Soft-private", it is exported, but not "advertised" as API. */
19726fa459cSmrgBROTLI_COMMON_API extern const BrotliPrefixCodeRange
19826fa459cSmrg    _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
19926fa459cSmrg
20026fa459cSmrg#endif  /* BROTLI_COMMON_CONSTANTS_H_ */
201