126fa459cSmrg/* NOLINT(build/header_guard) */
226fa459cSmrg/* Copyright 2015 Google Inc. All Rights Reserved.
326fa459cSmrg
426fa459cSmrg   Distributed under MIT license.
526fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
626fa459cSmrg*/
726fa459cSmrg
826fa459cSmrg/* template parameters: FN */
926fa459cSmrg
1026fa459cSmrg#define HistogramType FN(Histogram)
1126fa459cSmrg
1226fa459cSmrg/* Greedy block splitter for one block category (literal, command or distance).
1326fa459cSmrg*/
1426fa459cSmrgtypedef struct FN(BlockSplitter) {
1526fa459cSmrg  /* Alphabet size of particular block category. */
1626fa459cSmrg  size_t alphabet_size_;
1726fa459cSmrg  /* We collect at least this many symbols for each block. */
1826fa459cSmrg  size_t min_block_size_;
1926fa459cSmrg  /* We merge histograms A and B if
2026fa459cSmrg       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
2126fa459cSmrg     where A is the current histogram and B is the histogram of the last or the
2226fa459cSmrg     second last block type. */
2326fa459cSmrg  double split_threshold_;
2426fa459cSmrg
2526fa459cSmrg  size_t num_blocks_;
2626fa459cSmrg  BlockSplit* split_;  /* not owned */
2726fa459cSmrg  HistogramType* histograms_;  /* not owned */
2826fa459cSmrg  size_t* histograms_size_;  /* not owned */
2926fa459cSmrg
3026fa459cSmrg  /* The number of symbols that we want to collect before deciding on whether
3126fa459cSmrg     or not to merge the block with a previous one or emit a new block. */
3226fa459cSmrg  size_t target_block_size_;
3326fa459cSmrg  /* The number of symbols in the current histogram. */
3426fa459cSmrg  size_t block_size_;
3526fa459cSmrg  /* Offset of the current histogram. */
3626fa459cSmrg  size_t curr_histogram_ix_;
3726fa459cSmrg  /* Offset of the histograms of the previous two block types. */
3826fa459cSmrg  size_t last_histogram_ix_[2];
3926fa459cSmrg  /* Entropy of the previous two block types. */
4026fa459cSmrg  double last_entropy_[2];
4126fa459cSmrg  /* The number of times we merged the current block with the last one. */
4226fa459cSmrg  size_t merge_last_count_;
4326fa459cSmrg} FN(BlockSplitter);
4426fa459cSmrg
4526fa459cSmrgstatic void FN(InitBlockSplitter)(
4626fa459cSmrg    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
4726fa459cSmrg    size_t min_block_size, double split_threshold, size_t num_symbols,
4826fa459cSmrg    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
4926fa459cSmrg  size_t max_num_blocks = num_symbols / min_block_size + 1;
5026fa459cSmrg  /* We have to allocate one more histogram than the maximum number of block
5126fa459cSmrg     types for the current histogram when the meta-block is too big. */
5226fa459cSmrg  size_t max_num_types =
5326fa459cSmrg      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
5426fa459cSmrg  self->alphabet_size_ = alphabet_size;
5526fa459cSmrg  self->min_block_size_ = min_block_size;
5626fa459cSmrg  self->split_threshold_ = split_threshold;
5726fa459cSmrg  self->num_blocks_ = 0;
5826fa459cSmrg  self->split_ = split;
5926fa459cSmrg  self->histograms_size_ = histograms_size;
6026fa459cSmrg  self->target_block_size_ = min_block_size;
6126fa459cSmrg  self->block_size_ = 0;
6226fa459cSmrg  self->curr_histogram_ix_ = 0;
6326fa459cSmrg  self->merge_last_count_ = 0;
6426fa459cSmrg  BROTLI_ENSURE_CAPACITY(m, uint8_t,
6526fa459cSmrg      split->types, split->types_alloc_size, max_num_blocks);
6626fa459cSmrg  BROTLI_ENSURE_CAPACITY(m, uint32_t,
6726fa459cSmrg      split->lengths, split->lengths_alloc_size, max_num_blocks);
6826fa459cSmrg  if (BROTLI_IS_OOM(m)) return;
6926fa459cSmrg  self->split_->num_blocks = max_num_blocks;
7026fa459cSmrg  BROTLI_DCHECK(*histograms == 0);
7126fa459cSmrg  *histograms_size = max_num_types;
7226fa459cSmrg  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
7326fa459cSmrg  self->histograms_ = *histograms;
7426fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
7526fa459cSmrg  /* Clear only current histogram. */
7626fa459cSmrg  FN(HistogramClear)(&self->histograms_[0]);
7726fa459cSmrg  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
7826fa459cSmrg}
7926fa459cSmrg
8026fa459cSmrg/* Does either of three things:
8126fa459cSmrg     (1) emits the current block with a new block type;
8226fa459cSmrg     (2) emits the current block with the type of the second last block;
8326fa459cSmrg     (3) merges the current block with the last block. */
8426fa459cSmrgstatic void FN(BlockSplitterFinishBlock)(
8526fa459cSmrg    FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
8626fa459cSmrg  BlockSplit* split = self->split_;
8726fa459cSmrg  double* last_entropy = self->last_entropy_;
8826fa459cSmrg  HistogramType* histograms = self->histograms_;
8926fa459cSmrg  self->block_size_ =
9026fa459cSmrg      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
9126fa459cSmrg  if (self->num_blocks_ == 0) {
9226fa459cSmrg    /* Create first block. */
9326fa459cSmrg    split->lengths[0] = (uint32_t)self->block_size_;
9426fa459cSmrg    split->types[0] = 0;
9526fa459cSmrg    last_entropy[0] =
9626fa459cSmrg        BitsEntropy(histograms[0].data_, self->alphabet_size_);
9726fa459cSmrg    last_entropy[1] = last_entropy[0];
9826fa459cSmrg    ++self->num_blocks_;
9926fa459cSmrg    ++split->num_types;
10026fa459cSmrg    ++self->curr_histogram_ix_;
10126fa459cSmrg    if (self->curr_histogram_ix_ < *self->histograms_size_)
10226fa459cSmrg      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
10326fa459cSmrg    self->block_size_ = 0;
10426fa459cSmrg  } else if (self->block_size_ > 0) {
10526fa459cSmrg    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
10626fa459cSmrg                                 self->alphabet_size_);
10726fa459cSmrg    HistogramType combined_histo[2];
10826fa459cSmrg    double combined_entropy[2];
10926fa459cSmrg    double diff[2];
11026fa459cSmrg    size_t j;
11126fa459cSmrg    for (j = 0; j < 2; ++j) {
11226fa459cSmrg      size_t last_histogram_ix = self->last_histogram_ix_[j];
11326fa459cSmrg      combined_histo[j] = histograms[self->curr_histogram_ix_];
11426fa459cSmrg      FN(HistogramAddHistogram)(&combined_histo[j],
11526fa459cSmrg          &histograms[last_histogram_ix]);
11626fa459cSmrg      combined_entropy[j] = BitsEntropy(
11726fa459cSmrg          &combined_histo[j].data_[0], self->alphabet_size_);
11826fa459cSmrg      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
11926fa459cSmrg    }
12026fa459cSmrg
12126fa459cSmrg    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
12226fa459cSmrg        diff[0] > self->split_threshold_ &&
12326fa459cSmrg        diff[1] > self->split_threshold_) {
12426fa459cSmrg      /* Create new block. */
12526fa459cSmrg      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
12626fa459cSmrg      split->types[self->num_blocks_] = (uint8_t)split->num_types;
12726fa459cSmrg      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
12826fa459cSmrg      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
12926fa459cSmrg      last_entropy[1] = last_entropy[0];
13026fa459cSmrg      last_entropy[0] = entropy;
13126fa459cSmrg      ++self->num_blocks_;
13226fa459cSmrg      ++split->num_types;
13326fa459cSmrg      ++self->curr_histogram_ix_;
13426fa459cSmrg      if (self->curr_histogram_ix_ < *self->histograms_size_)
13526fa459cSmrg        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
13626fa459cSmrg      self->block_size_ = 0;
13726fa459cSmrg      self->merge_last_count_ = 0;
13826fa459cSmrg      self->target_block_size_ = self->min_block_size_;
13926fa459cSmrg    } else if (diff[1] < diff[0] - 20.0) {
14026fa459cSmrg      /* Combine this block with second last block. */
14126fa459cSmrg      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
14226fa459cSmrg      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
14326fa459cSmrg      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
14426fa459cSmrg      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
14526fa459cSmrg      last_entropy[1] = last_entropy[0];
14626fa459cSmrg      last_entropy[0] = combined_entropy[1];
14726fa459cSmrg      ++self->num_blocks_;
14826fa459cSmrg      self->block_size_ = 0;
14926fa459cSmrg      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
15026fa459cSmrg      self->merge_last_count_ = 0;
15126fa459cSmrg      self->target_block_size_ = self->min_block_size_;
15226fa459cSmrg    } else {
15326fa459cSmrg      /* Combine this block with last block. */
15426fa459cSmrg      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
15526fa459cSmrg      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
15626fa459cSmrg      last_entropy[0] = combined_entropy[0];
15726fa459cSmrg      if (split->num_types == 1) {
15826fa459cSmrg        last_entropy[1] = last_entropy[0];
15926fa459cSmrg      }
16026fa459cSmrg      self->block_size_ = 0;
16126fa459cSmrg      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
16226fa459cSmrg      if (++self->merge_last_count_ > 1) {
16326fa459cSmrg        self->target_block_size_ += self->min_block_size_;
16426fa459cSmrg      }
16526fa459cSmrg    }
16626fa459cSmrg  }
16726fa459cSmrg  if (is_final) {
16826fa459cSmrg    *self->histograms_size_ = split->num_types;
16926fa459cSmrg    split->num_blocks = self->num_blocks_;
17026fa459cSmrg  }
17126fa459cSmrg}
17226fa459cSmrg
17326fa459cSmrg/* Adds the next symbol to the current histogram. When the current histogram
17426fa459cSmrg   reaches the target size, decides on merging the block. */
17526fa459cSmrgstatic void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
17626fa459cSmrg  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
17726fa459cSmrg  ++self->block_size_;
17826fa459cSmrg  if (self->block_size_ == self->target_block_size_) {
17926fa459cSmrg    FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
18026fa459cSmrg  }
18126fa459cSmrg}
18226fa459cSmrg
18326fa459cSmrg#undef HistogramType
184