126fa459cSmrg/* NOLINT(build/header_guard) */
226fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved.
326fa459cSmrg
426fa459cSmrg   Distributed under MIT license.
526fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
626fa459cSmrg*/
726fa459cSmrg
826fa459cSmrg/* template parameters: FN, DataType */
926fa459cSmrg
1026fa459cSmrg#define HistogramType FN(Histogram)
1126fa459cSmrg
1226fa459cSmrgstatic void FN(InitialEntropyCodes)(const DataType* data, size_t length,
1326fa459cSmrg                                    size_t stride,
1426fa459cSmrg                                    size_t num_histograms,
1526fa459cSmrg                                    HistogramType* histograms) {
1626fa459cSmrg  uint32_t seed = 7;
1726fa459cSmrg  size_t block_length = length / num_histograms;
1826fa459cSmrg  size_t i;
1926fa459cSmrg  FN(ClearHistograms)(histograms, num_histograms);
2026fa459cSmrg  for (i = 0; i < num_histograms; ++i) {
2126fa459cSmrg    size_t pos = length * i / num_histograms;
2226fa459cSmrg    if (i != 0) {
2326fa459cSmrg      pos += MyRand(&seed) % block_length;
2426fa459cSmrg    }
2526fa459cSmrg    if (pos + stride >= length) {
2626fa459cSmrg      pos = length - stride - 1;
2726fa459cSmrg    }
2826fa459cSmrg    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
2926fa459cSmrg  }
3026fa459cSmrg}
3126fa459cSmrg
3226fa459cSmrgstatic void FN(RandomSample)(uint32_t* seed,
3326fa459cSmrg                             const DataType* data,
3426fa459cSmrg                             size_t length,
3526fa459cSmrg                             size_t stride,
3626fa459cSmrg                             HistogramType* sample) {
3726fa459cSmrg  size_t pos = 0;
3826fa459cSmrg  if (stride >= length) {
3926fa459cSmrg    stride = length;
4026fa459cSmrg  } else {
4126fa459cSmrg    pos = MyRand(seed) % (length - stride + 1);
4226fa459cSmrg  }
4326fa459cSmrg  FN(HistogramAddVector)(sample, data + pos, stride);
4426fa459cSmrg}
4526fa459cSmrg
4626fa459cSmrgstatic void FN(RefineEntropyCodes)(const DataType* data, size_t length,
4726fa459cSmrg                                   size_t stride,
4826fa459cSmrg                                   size_t num_histograms,
4926fa459cSmrg                                   HistogramType* histograms) {
5026fa459cSmrg  size_t iters =
5126fa459cSmrg      kIterMulForRefining * length / stride + kMinItersForRefining;
5226fa459cSmrg  uint32_t seed = 7;
5326fa459cSmrg  size_t iter;
5426fa459cSmrg  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
5526fa459cSmrg  for (iter = 0; iter < iters; ++iter) {
5626fa459cSmrg    HistogramType sample;
5726fa459cSmrg    FN(HistogramClear)(&sample);
5826fa459cSmrg    FN(RandomSample)(&seed, data, length, stride, &sample);
5926fa459cSmrg    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
6026fa459cSmrg  }
6126fa459cSmrg}
6226fa459cSmrg
6326fa459cSmrg/* Assigns a block id from the range [0, num_histograms) to each data element
6426fa459cSmrg   in data[0..length) and fills in block_id[0..length) with the assigned values.
6526fa459cSmrg   Returns the number of blocks, i.e. one plus the number of block switches. */
6626fa459cSmrgstatic size_t FN(FindBlocks)(const DataType* data, const size_t length,
6726fa459cSmrg                             const double block_switch_bitcost,
6826fa459cSmrg                             const size_t num_histograms,
6926fa459cSmrg                             const HistogramType* histograms,
7026fa459cSmrg                             double* insert_cost,
7126fa459cSmrg                             double* cost,
7226fa459cSmrg                             uint8_t* switch_signal,
7326fa459cSmrg                             uint8_t* block_id) {
7426fa459cSmrg  const size_t data_size = FN(HistogramDataSize)();
7526fa459cSmrg  const size_t bitmaplen = (num_histograms + 7) >> 3;
7626fa459cSmrg  size_t num_blocks = 1;
7726fa459cSmrg  size_t i;
7826fa459cSmrg  size_t j;
7926fa459cSmrg  BROTLI_DCHECK(num_histograms <= 256);
8026fa459cSmrg  if (num_histograms <= 1) {
8126fa459cSmrg    for (i = 0; i < length; ++i) {
8226fa459cSmrg      block_id[i] = 0;
8326fa459cSmrg    }
8426fa459cSmrg    return 1;
8526fa459cSmrg  }
8626fa459cSmrg  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
8726fa459cSmrg  for (i = 0; i < num_histograms; ++i) {
8826fa459cSmrg    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
8926fa459cSmrg  }
9026fa459cSmrg  for (i = data_size; i != 0;) {
9126fa459cSmrg    --i;
9226fa459cSmrg    for (j = 0; j < num_histograms; ++j) {
9326fa459cSmrg      insert_cost[i * num_histograms + j] =
9426fa459cSmrg          insert_cost[j] - BitCost(histograms[j].data_[i]);
9526fa459cSmrg    }
9626fa459cSmrg  }
9726fa459cSmrg  memset(cost, 0, sizeof(cost[0]) * num_histograms);
9826fa459cSmrg  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
9926fa459cSmrg  /* After each iteration of this loop, cost[k] will contain the difference
10026fa459cSmrg     between the minimum cost of arriving at the current byte position using
10126fa459cSmrg     entropy code k, and the minimum cost of arriving at the current byte
10226fa459cSmrg     position. This difference is capped at the block switch cost, and if it
10326fa459cSmrg     reaches block switch cost, it means that when we trace back from the last
10426fa459cSmrg     position, we need to switch here. */
10526fa459cSmrg  for (i = 0; i < length; ++i) {
10626fa459cSmrg    const size_t byte_ix = i;
10726fa459cSmrg    size_t ix = byte_ix * bitmaplen;
10826fa459cSmrg    size_t insert_cost_ix = data[byte_ix] * num_histograms;
10926fa459cSmrg    double min_cost = 1e99;
11026fa459cSmrg    double block_switch_cost = block_switch_bitcost;
11126fa459cSmrg    size_t k;
11226fa459cSmrg    for (k = 0; k < num_histograms; ++k) {
11326fa459cSmrg      /* We are coding the symbol in data[byte_ix] with entropy code k. */
11426fa459cSmrg      cost[k] += insert_cost[insert_cost_ix + k];
11526fa459cSmrg      if (cost[k] < min_cost) {
11626fa459cSmrg        min_cost = cost[k];
11726fa459cSmrg        block_id[byte_ix] = (uint8_t)k;
11826fa459cSmrg      }
11926fa459cSmrg    }
12026fa459cSmrg    /* More blocks for the beginning. */
12126fa459cSmrg    if (byte_ix < 2000) {
12226fa459cSmrg      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
12326fa459cSmrg    }
12426fa459cSmrg    for (k = 0; k < num_histograms; ++k) {
12526fa459cSmrg      cost[k] -= min_cost;
12626fa459cSmrg      if (cost[k] >= block_switch_cost) {
12726fa459cSmrg        const uint8_t mask = (uint8_t)(1u << (k & 7));
12826fa459cSmrg        cost[k] = block_switch_cost;
12926fa459cSmrg        BROTLI_DCHECK((k >> 3) < bitmaplen);
13026fa459cSmrg        switch_signal[ix + (k >> 3)] |= mask;
13126fa459cSmrg      }
13226fa459cSmrg    }
13326fa459cSmrg  }
13426fa459cSmrg  {  /* Trace back from the last position and switch at the marked places. */
13526fa459cSmrg    size_t byte_ix = length - 1;
13626fa459cSmrg    size_t ix = byte_ix * bitmaplen;
13726fa459cSmrg    uint8_t cur_id = block_id[byte_ix];
13826fa459cSmrg    while (byte_ix > 0) {
13926fa459cSmrg      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
14026fa459cSmrg      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
14126fa459cSmrg      --byte_ix;
14226fa459cSmrg      ix -= bitmaplen;
14326fa459cSmrg      if (switch_signal[ix + (cur_id >> 3)] & mask) {
14426fa459cSmrg        if (cur_id != block_id[byte_ix]) {
14526fa459cSmrg          cur_id = block_id[byte_ix];
14626fa459cSmrg          ++num_blocks;
14726fa459cSmrg        }
14826fa459cSmrg      }
14926fa459cSmrg      block_id[byte_ix] = cur_id;
15026fa459cSmrg    }
15126fa459cSmrg  }
15226fa459cSmrg  return num_blocks;
15326fa459cSmrg}
15426fa459cSmrg
15526fa459cSmrgstatic size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
15626fa459cSmrg                                uint16_t* new_id, const size_t num_histograms) {
15726fa459cSmrg  static const uint16_t kInvalidId = 256;
15826fa459cSmrg  uint16_t next_id = 0;
15926fa459cSmrg  size_t i;
16026fa459cSmrg  for (i = 0; i < num_histograms; ++i) {
16126fa459cSmrg    new_id[i] = kInvalidId;
16226fa459cSmrg  }
16326fa459cSmrg  for (i = 0; i < length; ++i) {
16426fa459cSmrg    BROTLI_DCHECK(block_ids[i] < num_histograms);
16526fa459cSmrg    if (new_id[block_ids[i]] == kInvalidId) {
16626fa459cSmrg      new_id[block_ids[i]] = next_id++;
16726fa459cSmrg    }
16826fa459cSmrg  }
16926fa459cSmrg  for (i = 0; i < length; ++i) {
17026fa459cSmrg    block_ids[i] = (uint8_t)new_id[block_ids[i]];
17126fa459cSmrg    BROTLI_DCHECK(block_ids[i] < num_histograms);
17226fa459cSmrg  }
17326fa459cSmrg  BROTLI_DCHECK(next_id <= num_histograms);
17426fa459cSmrg  return next_id;
17526fa459cSmrg}
17626fa459cSmrg
17726fa459cSmrgstatic void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
17826fa459cSmrg                                     const uint8_t* block_ids,
17926fa459cSmrg                                     const size_t num_histograms,
18026fa459cSmrg                                     HistogramType* histograms) {
18126fa459cSmrg  size_t i;
18226fa459cSmrg  FN(ClearHistograms)(histograms, num_histograms);
18326fa459cSmrg  for (i = 0; i < length; ++i) {
18426fa459cSmrg    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
18526fa459cSmrg  }
18626fa459cSmrg}
18726fa459cSmrg
18826fa459cSmrgstatic void FN(ClusterBlocks)(MemoryManager* m,
18926fa459cSmrg                              const DataType* data, const size_t length,
19026fa459cSmrg                              const size_t num_blocks,
19126fa459cSmrg                              uint8_t* block_ids,
19226fa459cSmrg                              BlockSplit* split) {
19326fa459cSmrg  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
19426fa459cSmrg  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
19526fa459cSmrg  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
19626fa459cSmrg      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
19726fa459cSmrg  size_t all_histograms_size = 0;
19826fa459cSmrg  size_t all_histograms_capacity = expected_num_clusters;
19926fa459cSmrg  HistogramType* all_histograms =
20026fa459cSmrg      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
20126fa459cSmrg  size_t cluster_size_size = 0;
20226fa459cSmrg  size_t cluster_size_capacity = expected_num_clusters;
20326fa459cSmrg  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
20426fa459cSmrg  size_t num_clusters = 0;
20526fa459cSmrg  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
20626fa459cSmrg      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
20726fa459cSmrg  size_t max_num_pairs =
20826fa459cSmrg      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
20926fa459cSmrg  size_t pairs_capacity = max_num_pairs + 1;
21026fa459cSmrg  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
21126fa459cSmrg  size_t pos = 0;
21226fa459cSmrg  uint32_t* clusters;
21326fa459cSmrg  size_t num_final_clusters;
21426fa459cSmrg  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
21526fa459cSmrg  uint32_t* new_index;
21626fa459cSmrg  size_t i;
21726fa459cSmrg  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
21826fa459cSmrg  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
21926fa459cSmrg  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
22026fa459cSmrg  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
22126fa459cSmrg
22226fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
22326fa459cSmrg      BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
22426fa459cSmrg      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
22526fa459cSmrg      BROTLI_IS_NULL(pairs)) {
22626fa459cSmrg    return;
22726fa459cSmrg  }
22826fa459cSmrg
22926fa459cSmrg  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
23026fa459cSmrg
23126fa459cSmrg  {
23226fa459cSmrg    size_t block_idx = 0;
23326fa459cSmrg    for (i = 0; i < length; ++i) {
23426fa459cSmrg      BROTLI_DCHECK(block_idx < num_blocks);
23526fa459cSmrg      ++block_lengths[block_idx];
23626fa459cSmrg      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
23726fa459cSmrg        ++block_idx;
23826fa459cSmrg      }
23926fa459cSmrg    }
24026fa459cSmrg    BROTLI_DCHECK(block_idx == num_blocks);
24126fa459cSmrg  }
24226fa459cSmrg
24326fa459cSmrg  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
24426fa459cSmrg    const size_t num_to_combine =
24526fa459cSmrg        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
24626fa459cSmrg    size_t num_new_clusters;
24726fa459cSmrg    size_t j;
24826fa459cSmrg    for (j = 0; j < num_to_combine; ++j) {
24926fa459cSmrg      size_t k;
25026fa459cSmrg      FN(HistogramClear)(&histograms[j]);
25126fa459cSmrg      for (k = 0; k < block_lengths[i + j]; ++k) {
25226fa459cSmrg        FN(HistogramAdd)(&histograms[j], data[pos++]);
25326fa459cSmrg      }
25426fa459cSmrg      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
25526fa459cSmrg      new_clusters[j] = (uint32_t)j;
25626fa459cSmrg      symbols[j] = (uint32_t)j;
25726fa459cSmrg      sizes[j] = 1;
25826fa459cSmrg    }
25926fa459cSmrg    num_new_clusters = FN(BrotliHistogramCombine)(
26026fa459cSmrg        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
26126fa459cSmrg        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
26226fa459cSmrg    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
26326fa459cSmrg        all_histograms_capacity, all_histograms_size + num_new_clusters);
26426fa459cSmrg    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
26526fa459cSmrg        cluster_size_capacity, cluster_size_size + num_new_clusters);
26626fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
26726fa459cSmrg    for (j = 0; j < num_new_clusters; ++j) {
26826fa459cSmrg      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
26926fa459cSmrg      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
27026fa459cSmrg      remap[new_clusters[j]] = (uint32_t)j;
27126fa459cSmrg    }
27226fa459cSmrg    for (j = 0; j < num_to_combine; ++j) {
27326fa459cSmrg      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
27426fa459cSmrg    }
27526fa459cSmrg    num_clusters += num_new_clusters;
27626fa459cSmrg    BROTLI_DCHECK(num_clusters == cluster_size_size);
27726fa459cSmrg    BROTLI_DCHECK(num_clusters == all_histograms_size);
27826fa459cSmrg  }
27926fa459cSmrg  BROTLI_FREE(m, histograms);
28026fa459cSmrg
28126fa459cSmrg  max_num_pairs =
28226fa459cSmrg      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
28326fa459cSmrg  if (pairs_capacity < max_num_pairs + 1) {
28426fa459cSmrg    BROTLI_FREE(m, pairs);
28526fa459cSmrg    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
28626fa459cSmrg    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
28726fa459cSmrg  }
28826fa459cSmrg
28926fa459cSmrg  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
29026fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
29126fa459cSmrg  for (i = 0; i < num_clusters; ++i) {
29226fa459cSmrg    clusters[i] = (uint32_t)i;
29326fa459cSmrg  }
29426fa459cSmrg  num_final_clusters = FN(BrotliHistogramCombine)(
29526fa459cSmrg      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
29626fa459cSmrg      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
29726fa459cSmrg      max_num_pairs);
29826fa459cSmrg  BROTLI_FREE(m, pairs);
29926fa459cSmrg  BROTLI_FREE(m, cluster_size);
30026fa459cSmrg
30126fa459cSmrg  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
30226fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
30326fa459cSmrg  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
30426fa459cSmrg  pos = 0;
30526fa459cSmrg  {
30626fa459cSmrg    uint32_t next_index = 0;
30726fa459cSmrg    for (i = 0; i < num_blocks; ++i) {
30826fa459cSmrg      HistogramType histo;
30926fa459cSmrg      size_t j;
31026fa459cSmrg      uint32_t best_out;
31126fa459cSmrg      double best_bits;
31226fa459cSmrg      FN(HistogramClear)(&histo);
31326fa459cSmrg      for (j = 0; j < block_lengths[i]; ++j) {
31426fa459cSmrg        FN(HistogramAdd)(&histo, data[pos++]);
31526fa459cSmrg      }
31626fa459cSmrg      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
31726fa459cSmrg      best_bits =
31826fa459cSmrg          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
31926fa459cSmrg      for (j = 0; j < num_final_clusters; ++j) {
32026fa459cSmrg        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
32126fa459cSmrg            &histo, &all_histograms[clusters[j]]);
32226fa459cSmrg        if (cur_bits < best_bits) {
32326fa459cSmrg          best_bits = cur_bits;
32426fa459cSmrg          best_out = clusters[j];
32526fa459cSmrg        }
32626fa459cSmrg      }
32726fa459cSmrg      histogram_symbols[i] = best_out;
32826fa459cSmrg      if (new_index[best_out] == kInvalidIndex) {
32926fa459cSmrg        new_index[best_out] = next_index++;
33026fa459cSmrg      }
33126fa459cSmrg    }
33226fa459cSmrg  }
33326fa459cSmrg  BROTLI_FREE(m, clusters);
33426fa459cSmrg  BROTLI_FREE(m, all_histograms);
33526fa459cSmrg  BROTLI_ENSURE_CAPACITY(
33626fa459cSmrg      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
33726fa459cSmrg  BROTLI_ENSURE_CAPACITY(
33826fa459cSmrg      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
33926fa459cSmrg  if (BROTLI_IS_OOM(m)) return;
34026fa459cSmrg  {
34126fa459cSmrg    uint32_t cur_length = 0;
34226fa459cSmrg    size_t block_idx = 0;
34326fa459cSmrg    uint8_t max_type = 0;
34426fa459cSmrg    for (i = 0; i < num_blocks; ++i) {
34526fa459cSmrg      cur_length += block_lengths[i];
34626fa459cSmrg      if (i + 1 == num_blocks ||
34726fa459cSmrg          histogram_symbols[i] != histogram_symbols[i + 1]) {
34826fa459cSmrg        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
34926fa459cSmrg        split->types[block_idx] = id;
35026fa459cSmrg        split->lengths[block_idx] = cur_length;
35126fa459cSmrg        max_type = BROTLI_MAX(uint8_t, max_type, id);
35226fa459cSmrg        cur_length = 0;
35326fa459cSmrg        ++block_idx;
35426fa459cSmrg      }
35526fa459cSmrg    }
35626fa459cSmrg    split->num_blocks = block_idx;
35726fa459cSmrg    split->num_types = (size_t)max_type + 1;
35826fa459cSmrg  }
35926fa459cSmrg  BROTLI_FREE(m, new_index);
36026fa459cSmrg  BROTLI_FREE(m, block_lengths);
36126fa459cSmrg  BROTLI_FREE(m, histogram_symbols);
36226fa459cSmrg}
36326fa459cSmrg
36426fa459cSmrgstatic void FN(SplitByteVector)(MemoryManager* m,
36526fa459cSmrg                                const DataType* data, const size_t length,
36626fa459cSmrg                                const size_t literals_per_histogram,
36726fa459cSmrg                                const size_t max_histograms,
36826fa459cSmrg                                const size_t sampling_stride_length,
36926fa459cSmrg                                const double block_switch_cost,
37026fa459cSmrg                                const BrotliEncoderParams* params,
37126fa459cSmrg                                BlockSplit* split) {
37226fa459cSmrg  const size_t data_size = FN(HistogramDataSize)();
37326fa459cSmrg  size_t num_histograms = length / literals_per_histogram + 1;
37426fa459cSmrg  HistogramType* histograms;
37526fa459cSmrg  if (num_histograms > max_histograms) {
37626fa459cSmrg    num_histograms = max_histograms;
37726fa459cSmrg  }
37826fa459cSmrg  if (length == 0) {
37926fa459cSmrg    split->num_types = 1;
38026fa459cSmrg    return;
38126fa459cSmrg  } else if (length < kMinLengthForBlockSplitting) {
38226fa459cSmrg    BROTLI_ENSURE_CAPACITY(m, uint8_t,
38326fa459cSmrg        split->types, split->types_alloc_size, split->num_blocks + 1);
38426fa459cSmrg    BROTLI_ENSURE_CAPACITY(m, uint32_t,
38526fa459cSmrg        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
38626fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
38726fa459cSmrg    split->num_types = 1;
38826fa459cSmrg    split->types[split->num_blocks] = 0;
38926fa459cSmrg    split->lengths[split->num_blocks] = (uint32_t)length;
39026fa459cSmrg    split->num_blocks++;
39126fa459cSmrg    return;
39226fa459cSmrg  }
39326fa459cSmrg  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
39426fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
39526fa459cSmrg  /* Find good entropy codes. */
39626fa459cSmrg  FN(InitialEntropyCodes)(data, length,
39726fa459cSmrg                          sampling_stride_length,
39826fa459cSmrg                          num_histograms, histograms);
39926fa459cSmrg  FN(RefineEntropyCodes)(data, length,
40026fa459cSmrg                         sampling_stride_length,
40126fa459cSmrg                         num_histograms, histograms);
40226fa459cSmrg  {
40326fa459cSmrg    /* Find a good path through literals with the good entropy codes. */
40426fa459cSmrg    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
40526fa459cSmrg    size_t num_blocks = 0;
40626fa459cSmrg    const size_t bitmaplen = (num_histograms + 7) >> 3;
40726fa459cSmrg    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
40826fa459cSmrg    double* cost = BROTLI_ALLOC(m, double, num_histograms);
40926fa459cSmrg    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
41026fa459cSmrg    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
41126fa459cSmrg    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
41226fa459cSmrg    size_t i;
41326fa459cSmrg    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
41426fa459cSmrg        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
41526fa459cSmrg        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
41626fa459cSmrg      return;
41726fa459cSmrg    }
41826fa459cSmrg    for (i = 0; i < iters; ++i) {
41926fa459cSmrg      num_blocks = FN(FindBlocks)(data, length,
42026fa459cSmrg                                  block_switch_cost,
42126fa459cSmrg                                  num_histograms, histograms,
42226fa459cSmrg                                  insert_cost, cost, switch_signal,
42326fa459cSmrg                                  block_ids);
42426fa459cSmrg      num_histograms = FN(RemapBlockIds)(block_ids, length,
42526fa459cSmrg                                         new_id, num_histograms);
42626fa459cSmrg      FN(BuildBlockHistograms)(data, length, block_ids,
42726fa459cSmrg                               num_histograms, histograms);
42826fa459cSmrg    }
42926fa459cSmrg    BROTLI_FREE(m, insert_cost);
43026fa459cSmrg    BROTLI_FREE(m, cost);
43126fa459cSmrg    BROTLI_FREE(m, switch_signal);
43226fa459cSmrg    BROTLI_FREE(m, new_id);
43326fa459cSmrg    BROTLI_FREE(m, histograms);
43426fa459cSmrg    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
43526fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
43626fa459cSmrg    BROTLI_FREE(m, block_ids);
43726fa459cSmrg  }
43826fa459cSmrg}
43926fa459cSmrg
44026fa459cSmrg#undef HistogramType
441