126fa459cSmrg/* NOLINT(build/header_guard) */ 226fa459cSmrg/* Copyright 2015 Google Inc. All Rights Reserved. 326fa459cSmrg 426fa459cSmrg Distributed under MIT license. 526fa459cSmrg See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 626fa459cSmrg*/ 726fa459cSmrg 826fa459cSmrg/* template parameters: FN */ 926fa459cSmrg 1026fa459cSmrg#define HistogramType FN(Histogram) 1126fa459cSmrg 1226fa459cSmrg/* Greedy block splitter for one block category (literal, command or distance). 1326fa459cSmrg*/ 1426fa459cSmrgtypedef struct FN(BlockSplitter) { 1526fa459cSmrg /* Alphabet size of particular block category. */ 1626fa459cSmrg size_t alphabet_size_; 1726fa459cSmrg /* We collect at least this many symbols for each block. */ 1826fa459cSmrg size_t min_block_size_; 1926fa459cSmrg /* We merge histograms A and B if 2026fa459cSmrg entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, 2126fa459cSmrg where A is the current histogram and B is the histogram of the last or the 2226fa459cSmrg second last block type. */ 2326fa459cSmrg double split_threshold_; 2426fa459cSmrg 2526fa459cSmrg size_t num_blocks_; 2626fa459cSmrg BlockSplit* split_; /* not owned */ 2726fa459cSmrg HistogramType* histograms_; /* not owned */ 2826fa459cSmrg size_t* histograms_size_; /* not owned */ 2926fa459cSmrg 3026fa459cSmrg /* The number of symbols that we want to collect before deciding on whether 3126fa459cSmrg or not to merge the block with a previous one or emit a new block. */ 3226fa459cSmrg size_t target_block_size_; 3326fa459cSmrg /* The number of symbols in the current histogram. */ 3426fa459cSmrg size_t block_size_; 3526fa459cSmrg /* Offset of the current histogram. */ 3626fa459cSmrg size_t curr_histogram_ix_; 3726fa459cSmrg /* Offset of the histograms of the previous two block types. */ 3826fa459cSmrg size_t last_histogram_ix_[2]; 3926fa459cSmrg /* Entropy of the previous two block types. */ 4026fa459cSmrg double last_entropy_[2]; 4126fa459cSmrg /* The number of times we merged the current block with the last one. */ 4226fa459cSmrg size_t merge_last_count_; 4326fa459cSmrg} FN(BlockSplitter); 4426fa459cSmrg 4526fa459cSmrgstatic void FN(InitBlockSplitter)( 4626fa459cSmrg MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size, 4726fa459cSmrg size_t min_block_size, double split_threshold, size_t num_symbols, 4826fa459cSmrg BlockSplit* split, HistogramType** histograms, size_t* histograms_size) { 4926fa459cSmrg size_t max_num_blocks = num_symbols / min_block_size + 1; 5026fa459cSmrg /* We have to allocate one more histogram than the maximum number of block 5126fa459cSmrg types for the current histogram when the meta-block is too big. */ 5226fa459cSmrg size_t max_num_types = 5326fa459cSmrg BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); 5426fa459cSmrg self->alphabet_size_ = alphabet_size; 5526fa459cSmrg self->min_block_size_ = min_block_size; 5626fa459cSmrg self->split_threshold_ = split_threshold; 5726fa459cSmrg self->num_blocks_ = 0; 5826fa459cSmrg self->split_ = split; 5926fa459cSmrg self->histograms_size_ = histograms_size; 6026fa459cSmrg self->target_block_size_ = min_block_size; 6126fa459cSmrg self->block_size_ = 0; 6226fa459cSmrg self->curr_histogram_ix_ = 0; 6326fa459cSmrg self->merge_last_count_ = 0; 6426fa459cSmrg BROTLI_ENSURE_CAPACITY(m, uint8_t, 6526fa459cSmrg split->types, split->types_alloc_size, max_num_blocks); 6626fa459cSmrg BROTLI_ENSURE_CAPACITY(m, uint32_t, 6726fa459cSmrg split->lengths, split->lengths_alloc_size, max_num_blocks); 6826fa459cSmrg if (BROTLI_IS_OOM(m)) return; 6926fa459cSmrg self->split_->num_blocks = max_num_blocks; 7026fa459cSmrg BROTLI_DCHECK(*histograms == 0); 7126fa459cSmrg *histograms_size = max_num_types; 7226fa459cSmrg *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size); 7326fa459cSmrg self->histograms_ = *histograms; 7426fa459cSmrg if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return; 7526fa459cSmrg /* Clear only current histogram. */ 7626fa459cSmrg FN(HistogramClear)(&self->histograms_[0]); 7726fa459cSmrg self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; 7826fa459cSmrg} 7926fa459cSmrg 8026fa459cSmrg/* Does either of three things: 8126fa459cSmrg (1) emits the current block with a new block type; 8226fa459cSmrg (2) emits the current block with the type of the second last block; 8326fa459cSmrg (3) merges the current block with the last block. */ 8426fa459cSmrgstatic void FN(BlockSplitterFinishBlock)( 8526fa459cSmrg FN(BlockSplitter)* self, BROTLI_BOOL is_final) { 8626fa459cSmrg BlockSplit* split = self->split_; 8726fa459cSmrg double* last_entropy = self->last_entropy_; 8826fa459cSmrg HistogramType* histograms = self->histograms_; 8926fa459cSmrg self->block_size_ = 9026fa459cSmrg BROTLI_MAX(size_t, self->block_size_, self->min_block_size_); 9126fa459cSmrg if (self->num_blocks_ == 0) { 9226fa459cSmrg /* Create first block. */ 9326fa459cSmrg split->lengths[0] = (uint32_t)self->block_size_; 9426fa459cSmrg split->types[0] = 0; 9526fa459cSmrg last_entropy[0] = 9626fa459cSmrg BitsEntropy(histograms[0].data_, self->alphabet_size_); 9726fa459cSmrg last_entropy[1] = last_entropy[0]; 9826fa459cSmrg ++self->num_blocks_; 9926fa459cSmrg ++split->num_types; 10026fa459cSmrg ++self->curr_histogram_ix_; 10126fa459cSmrg if (self->curr_histogram_ix_ < *self->histograms_size_) 10226fa459cSmrg FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 10326fa459cSmrg self->block_size_ = 0; 10426fa459cSmrg } else if (self->block_size_ > 0) { 10526fa459cSmrg double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, 10626fa459cSmrg self->alphabet_size_); 10726fa459cSmrg HistogramType combined_histo[2]; 10826fa459cSmrg double combined_entropy[2]; 10926fa459cSmrg double diff[2]; 11026fa459cSmrg size_t j; 11126fa459cSmrg for (j = 0; j < 2; ++j) { 11226fa459cSmrg size_t last_histogram_ix = self->last_histogram_ix_[j]; 11326fa459cSmrg combined_histo[j] = histograms[self->curr_histogram_ix_]; 11426fa459cSmrg FN(HistogramAddHistogram)(&combined_histo[j], 11526fa459cSmrg &histograms[last_histogram_ix]); 11626fa459cSmrg combined_entropy[j] = BitsEntropy( 11726fa459cSmrg &combined_histo[j].data_[0], self->alphabet_size_); 11826fa459cSmrg diff[j] = combined_entropy[j] - entropy - last_entropy[j]; 11926fa459cSmrg } 12026fa459cSmrg 12126fa459cSmrg if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && 12226fa459cSmrg diff[0] > self->split_threshold_ && 12326fa459cSmrg diff[1] > self->split_threshold_) { 12426fa459cSmrg /* Create new block. */ 12526fa459cSmrg split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 12626fa459cSmrg split->types[self->num_blocks_] = (uint8_t)split->num_types; 12726fa459cSmrg self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; 12826fa459cSmrg self->last_histogram_ix_[0] = (uint8_t)split->num_types; 12926fa459cSmrg last_entropy[1] = last_entropy[0]; 13026fa459cSmrg last_entropy[0] = entropy; 13126fa459cSmrg ++self->num_blocks_; 13226fa459cSmrg ++split->num_types; 13326fa459cSmrg ++self->curr_histogram_ix_; 13426fa459cSmrg if (self->curr_histogram_ix_ < *self->histograms_size_) 13526fa459cSmrg FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 13626fa459cSmrg self->block_size_ = 0; 13726fa459cSmrg self->merge_last_count_ = 0; 13826fa459cSmrg self->target_block_size_ = self->min_block_size_; 13926fa459cSmrg } else if (diff[1] < diff[0] - 20.0) { 14026fa459cSmrg /* Combine this block with second last block. */ 14126fa459cSmrg split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 14226fa459cSmrg split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; 14326fa459cSmrg BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); 14426fa459cSmrg histograms[self->last_histogram_ix_[0]] = combined_histo[1]; 14526fa459cSmrg last_entropy[1] = last_entropy[0]; 14626fa459cSmrg last_entropy[0] = combined_entropy[1]; 14726fa459cSmrg ++self->num_blocks_; 14826fa459cSmrg self->block_size_ = 0; 14926fa459cSmrg FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 15026fa459cSmrg self->merge_last_count_ = 0; 15126fa459cSmrg self->target_block_size_ = self->min_block_size_; 15226fa459cSmrg } else { 15326fa459cSmrg /* Combine this block with last block. */ 15426fa459cSmrg split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; 15526fa459cSmrg histograms[self->last_histogram_ix_[0]] = combined_histo[0]; 15626fa459cSmrg last_entropy[0] = combined_entropy[0]; 15726fa459cSmrg if (split->num_types == 1) { 15826fa459cSmrg last_entropy[1] = last_entropy[0]; 15926fa459cSmrg } 16026fa459cSmrg self->block_size_ = 0; 16126fa459cSmrg FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 16226fa459cSmrg if (++self->merge_last_count_ > 1) { 16326fa459cSmrg self->target_block_size_ += self->min_block_size_; 16426fa459cSmrg } 16526fa459cSmrg } 16626fa459cSmrg } 16726fa459cSmrg if (is_final) { 16826fa459cSmrg *self->histograms_size_ = split->num_types; 16926fa459cSmrg split->num_blocks = self->num_blocks_; 17026fa459cSmrg } 17126fa459cSmrg} 17226fa459cSmrg 17326fa459cSmrg/* Adds the next symbol to the current histogram. When the current histogram 17426fa459cSmrg reaches the target size, decides on merging the block. */ 17526fa459cSmrgstatic void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) { 17626fa459cSmrg FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol); 17726fa459cSmrg ++self->block_size_; 17826fa459cSmrg if (self->block_size_ == self->target_block_size_) { 17926fa459cSmrg FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE); 18026fa459cSmrg } 18126fa459cSmrg} 18226fa459cSmrg 18326fa459cSmrg#undef HistogramType 184