126fa459cSmrg/* NOLINT(build/header_guard) */
226fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved.
326fa459cSmrg
426fa459cSmrg   Distributed under MIT license.
526fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
626fa459cSmrg*/
726fa459cSmrg
826fa459cSmrg/* template parameters: FN, CODE */
926fa459cSmrg
1026fa459cSmrg#define HistogramType FN(Histogram)
1126fa459cSmrg
1226fa459cSmrg/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
1326fa459cSmrg   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
1426fa459cSmrgBROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
1526fa459cSmrg    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
1626fa459cSmrg    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
1726fa459cSmrg    size_t* num_pairs) CODE({
1826fa459cSmrg  BROTLI_BOOL is_good_pair = BROTLI_FALSE;
1926fa459cSmrg  HistogramPair p;
2026fa459cSmrg  p.idx1 = p.idx2 = 0;
2126fa459cSmrg  p.cost_diff = p.cost_combo = 0;
2226fa459cSmrg  if (idx1 == idx2) {
2326fa459cSmrg    return;
2426fa459cSmrg  }
2526fa459cSmrg  if (idx2 < idx1) {
2626fa459cSmrg    uint32_t t = idx2;
2726fa459cSmrg    idx2 = idx1;
2826fa459cSmrg    idx1 = t;
2926fa459cSmrg  }
3026fa459cSmrg  p.idx1 = idx1;
3126fa459cSmrg  p.idx2 = idx2;
3226fa459cSmrg  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
3326fa459cSmrg  p.cost_diff -= out[idx1].bit_cost_;
3426fa459cSmrg  p.cost_diff -= out[idx2].bit_cost_;
3526fa459cSmrg
3626fa459cSmrg  if (out[idx1].total_count_ == 0) {
3726fa459cSmrg    p.cost_combo = out[idx2].bit_cost_;
3826fa459cSmrg    is_good_pair = BROTLI_TRUE;
3926fa459cSmrg  } else if (out[idx2].total_count_ == 0) {
4026fa459cSmrg    p.cost_combo = out[idx1].bit_cost_;
4126fa459cSmrg    is_good_pair = BROTLI_TRUE;
4226fa459cSmrg  } else {
4326fa459cSmrg    double threshold = *num_pairs == 0 ? 1e99 :
4426fa459cSmrg        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
4526fa459cSmrg    HistogramType combo = out[idx1];
4626fa459cSmrg    double cost_combo;
4726fa459cSmrg    FN(HistogramAddHistogram)(&combo, &out[idx2]);
4826fa459cSmrg    cost_combo = FN(BrotliPopulationCost)(&combo);
4926fa459cSmrg    if (cost_combo < threshold - p.cost_diff) {
5026fa459cSmrg      p.cost_combo = cost_combo;
5126fa459cSmrg      is_good_pair = BROTLI_TRUE;
5226fa459cSmrg    }
5326fa459cSmrg  }
5426fa459cSmrg  if (is_good_pair) {
5526fa459cSmrg    p.cost_diff += p.cost_combo;
5626fa459cSmrg    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
5726fa459cSmrg      /* Replace the top of the queue if needed. */
5826fa459cSmrg      if (*num_pairs < max_num_pairs) {
5926fa459cSmrg        pairs[*num_pairs] = pairs[0];
6026fa459cSmrg        ++(*num_pairs);
6126fa459cSmrg      }
6226fa459cSmrg      pairs[0] = p;
6326fa459cSmrg    } else if (*num_pairs < max_num_pairs) {
6426fa459cSmrg      pairs[*num_pairs] = p;
6526fa459cSmrg      ++(*num_pairs);
6626fa459cSmrg    }
6726fa459cSmrg  }
6826fa459cSmrg})
6926fa459cSmrg
7026fa459cSmrgBROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
7126fa459cSmrg                                                  uint32_t* cluster_size,
7226fa459cSmrg                                                  uint32_t* symbols,
7326fa459cSmrg                                                  uint32_t* clusters,
7426fa459cSmrg                                                  HistogramPair* pairs,
7526fa459cSmrg                                                  size_t num_clusters,
7626fa459cSmrg                                                  size_t symbols_size,
7726fa459cSmrg                                                  size_t max_clusters,
7826fa459cSmrg                                                  size_t max_num_pairs) CODE({
7926fa459cSmrg  double cost_diff_threshold = 0.0;
8026fa459cSmrg  size_t min_cluster_size = 1;
8126fa459cSmrg  size_t num_pairs = 0;
8226fa459cSmrg
8326fa459cSmrg  {
8426fa459cSmrg    /* We maintain a vector of histogram pairs, with the property that the pair
8526fa459cSmrg       with the maximum bit cost reduction is the first. */
8626fa459cSmrg    size_t idx1;
8726fa459cSmrg    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
8826fa459cSmrg      size_t idx2;
8926fa459cSmrg      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
9026fa459cSmrg        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
9126fa459cSmrg            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
9226fa459cSmrg      }
9326fa459cSmrg    }
9426fa459cSmrg  }
9526fa459cSmrg
9626fa459cSmrg  while (num_clusters > min_cluster_size) {
9726fa459cSmrg    uint32_t best_idx1;
9826fa459cSmrg    uint32_t best_idx2;
9926fa459cSmrg    size_t i;
10026fa459cSmrg    if (pairs[0].cost_diff >= cost_diff_threshold) {
10126fa459cSmrg      cost_diff_threshold = 1e99;
10226fa459cSmrg      min_cluster_size = max_clusters;
10326fa459cSmrg      continue;
10426fa459cSmrg    }
10526fa459cSmrg    /* Take the best pair from the top of heap. */
10626fa459cSmrg    best_idx1 = pairs[0].idx1;
10726fa459cSmrg    best_idx2 = pairs[0].idx2;
10826fa459cSmrg    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
10926fa459cSmrg    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
11026fa459cSmrg    cluster_size[best_idx1] += cluster_size[best_idx2];
11126fa459cSmrg    for (i = 0; i < symbols_size; ++i) {
11226fa459cSmrg      if (symbols[i] == best_idx2) {
11326fa459cSmrg        symbols[i] = best_idx1;
11426fa459cSmrg      }
11526fa459cSmrg    }
11626fa459cSmrg    for (i = 0; i < num_clusters; ++i) {
11726fa459cSmrg      if (clusters[i] == best_idx2) {
11826fa459cSmrg        memmove(&clusters[i], &clusters[i + 1],
11926fa459cSmrg                (num_clusters - i - 1) * sizeof(clusters[0]));
12026fa459cSmrg        break;
12126fa459cSmrg      }
12226fa459cSmrg    }
12326fa459cSmrg    --num_clusters;
12426fa459cSmrg    {
12526fa459cSmrg      /* Remove pairs intersecting the just combined best pair. */
12626fa459cSmrg      size_t copy_to_idx = 0;
12726fa459cSmrg      for (i = 0; i < num_pairs; ++i) {
12826fa459cSmrg        HistogramPair* p = &pairs[i];
12926fa459cSmrg        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
13026fa459cSmrg            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
13126fa459cSmrg          /* Remove invalid pair from the queue. */
13226fa459cSmrg          continue;
13326fa459cSmrg        }
13426fa459cSmrg        if (HistogramPairIsLess(&pairs[0], p)) {
13526fa459cSmrg          /* Replace the top of the queue if needed. */
13626fa459cSmrg          HistogramPair front = pairs[0];
13726fa459cSmrg          pairs[0] = *p;
13826fa459cSmrg          pairs[copy_to_idx] = front;
13926fa459cSmrg        } else {
14026fa459cSmrg          pairs[copy_to_idx] = *p;
14126fa459cSmrg        }
14226fa459cSmrg        ++copy_to_idx;
14326fa459cSmrg      }
14426fa459cSmrg      num_pairs = copy_to_idx;
14526fa459cSmrg    }
14626fa459cSmrg
14726fa459cSmrg    /* Push new pairs formed with the combined histogram to the heap. */
14826fa459cSmrg    for (i = 0; i < num_clusters; ++i) {
14926fa459cSmrg      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
15026fa459cSmrg                                      max_num_pairs, &pairs[0], &num_pairs);
15126fa459cSmrg    }
15226fa459cSmrg  }
15326fa459cSmrg  return num_clusters;
15426fa459cSmrg})
15526fa459cSmrg
15626fa459cSmrg/* What is the bit cost of moving histogram from cur_symbol to candidate. */
15726fa459cSmrgBROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
15826fa459cSmrg    const HistogramType* histogram, const HistogramType* candidate) CODE({
15926fa459cSmrg  if (histogram->total_count_ == 0) {
16026fa459cSmrg    return 0.0;
16126fa459cSmrg  } else {
16226fa459cSmrg    HistogramType tmp = *histogram;
16326fa459cSmrg    FN(HistogramAddHistogram)(&tmp, candidate);
16426fa459cSmrg    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
16526fa459cSmrg  }
16626fa459cSmrg})
16726fa459cSmrg
16826fa459cSmrg/* Find the best 'out' histogram for each of the 'in' histograms.
16926fa459cSmrg   When called, clusters[0..num_clusters) contains the unique values from
17026fa459cSmrg   symbols[0..in_size), but this property is not preserved in this function.
17126fa459cSmrg   Note: we assume that out[]->bit_cost_ is already up-to-date. */
17226fa459cSmrgBROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
17326fa459cSmrg    size_t in_size, const uint32_t* clusters, size_t num_clusters,
17426fa459cSmrg    HistogramType* out, uint32_t* symbols) CODE({
17526fa459cSmrg  size_t i;
17626fa459cSmrg  for (i = 0; i < in_size; ++i) {
17726fa459cSmrg    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
17826fa459cSmrg    double best_bits =
17926fa459cSmrg        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
18026fa459cSmrg    size_t j;
18126fa459cSmrg    for (j = 0; j < num_clusters; ++j) {
18226fa459cSmrg      const double cur_bits =
18326fa459cSmrg          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
18426fa459cSmrg      if (cur_bits < best_bits) {
18526fa459cSmrg        best_bits = cur_bits;
18626fa459cSmrg        best_out = clusters[j];
18726fa459cSmrg      }
18826fa459cSmrg    }
18926fa459cSmrg    symbols[i] = best_out;
19026fa459cSmrg  }
19126fa459cSmrg
19226fa459cSmrg  /* Recompute each out based on raw and symbols. */
19326fa459cSmrg  for (i = 0; i < num_clusters; ++i) {
19426fa459cSmrg    FN(HistogramClear)(&out[clusters[i]]);
19526fa459cSmrg  }
19626fa459cSmrg  for (i = 0; i < in_size; ++i) {
19726fa459cSmrg    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
19826fa459cSmrg  }
19926fa459cSmrg})
20026fa459cSmrg
20126fa459cSmrg/* Reorders elements of the out[0..length) array and changes values in
20226fa459cSmrg   symbols[0..length) array in the following way:
20326fa459cSmrg     * when called, symbols[] contains indexes into out[], and has N unique
20426fa459cSmrg       values (possibly N < length)
20526fa459cSmrg     * on return, symbols'[i] = f(symbols[i]) and
20626fa459cSmrg                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
20726fa459cSmrg       where f is a bijection between the range of symbols[] and [0..N), and
20826fa459cSmrg       the first occurrences of values in symbols'[i] come in consecutive
20926fa459cSmrg       increasing order.
21026fa459cSmrg   Returns N, the number of unique values in symbols[]. */
21126fa459cSmrgBROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
21226fa459cSmrg    HistogramType* out, uint32_t* symbols, size_t length) CODE({
21326fa459cSmrg  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
21426fa459cSmrg  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
21526fa459cSmrg  uint32_t next_index;
21626fa459cSmrg  HistogramType* tmp;
21726fa459cSmrg  size_t i;
21826fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0;
21926fa459cSmrg  for (i = 0; i < length; ++i) {
22026fa459cSmrg      new_index[i] = kInvalidIndex;
22126fa459cSmrg  }
22226fa459cSmrg  next_index = 0;
22326fa459cSmrg  for (i = 0; i < length; ++i) {
22426fa459cSmrg    if (new_index[symbols[i]] == kInvalidIndex) {
22526fa459cSmrg      new_index[symbols[i]] = next_index;
22626fa459cSmrg      ++next_index;
22726fa459cSmrg    }
22826fa459cSmrg  }
22926fa459cSmrg  /* TODO: by using idea of "cycle-sort" we can avoid allocation of
23026fa459cSmrg     tmp and reduce the number of copying by the factor of 2. */
23126fa459cSmrg  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
23226fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0;
23326fa459cSmrg  next_index = 0;
23426fa459cSmrg  for (i = 0; i < length; ++i) {
23526fa459cSmrg    if (new_index[symbols[i]] == next_index) {
23626fa459cSmrg      tmp[next_index] = out[symbols[i]];
23726fa459cSmrg      ++next_index;
23826fa459cSmrg    }
23926fa459cSmrg    symbols[i] = new_index[symbols[i]];
24026fa459cSmrg  }
24126fa459cSmrg  BROTLI_FREE(m, new_index);
24226fa459cSmrg  for (i = 0; i < next_index; ++i) {
24326fa459cSmrg    out[i] = tmp[i];
24426fa459cSmrg  }
24526fa459cSmrg  BROTLI_FREE(m, tmp);
24626fa459cSmrg  return next_index;
24726fa459cSmrg})
24826fa459cSmrg
24926fa459cSmrgBROTLI_INTERNAL void FN(BrotliClusterHistograms)(
25026fa459cSmrg    MemoryManager* m, const HistogramType* in, const size_t in_size,
25126fa459cSmrg    size_t max_histograms, HistogramType* out, size_t* out_size,
25226fa459cSmrg    uint32_t* histogram_symbols) CODE({
25326fa459cSmrg  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
25426fa459cSmrg  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
25526fa459cSmrg  size_t num_clusters = 0;
25626fa459cSmrg  const size_t max_input_histograms = 64;
25726fa459cSmrg  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
25826fa459cSmrg  /* For the first pass of clustering, we allow all pairs. */
25926fa459cSmrg  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
26026fa459cSmrg  size_t i;
26126fa459cSmrg
26226fa459cSmrg  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
26326fa459cSmrg      BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)) {
26426fa459cSmrg    return;
26526fa459cSmrg  }
26626fa459cSmrg
26726fa459cSmrg  for (i = 0; i < in_size; ++i) {
26826fa459cSmrg    cluster_size[i] = 1;
26926fa459cSmrg  }
27026fa459cSmrg
27126fa459cSmrg  for (i = 0; i < in_size; ++i) {
27226fa459cSmrg    out[i] = in[i];
27326fa459cSmrg    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
27426fa459cSmrg    histogram_symbols[i] = (uint32_t)i;
27526fa459cSmrg  }
27626fa459cSmrg
27726fa459cSmrg  for (i = 0; i < in_size; i += max_input_histograms) {
27826fa459cSmrg    size_t num_to_combine =
27926fa459cSmrg        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
28026fa459cSmrg    size_t num_new_clusters;
28126fa459cSmrg    size_t j;
28226fa459cSmrg    for (j = 0; j < num_to_combine; ++j) {
28326fa459cSmrg      clusters[num_clusters + j] = (uint32_t)(i + j);
28426fa459cSmrg    }
28526fa459cSmrg    num_new_clusters =
28626fa459cSmrg        FN(BrotliHistogramCombine)(out, cluster_size,
28726fa459cSmrg                                   &histogram_symbols[i],
28826fa459cSmrg                                   &clusters[num_clusters], pairs,
28926fa459cSmrg                                   num_to_combine, num_to_combine,
29026fa459cSmrg                                   max_histograms, pairs_capacity);
29126fa459cSmrg    num_clusters += num_new_clusters;
29226fa459cSmrg  }
29326fa459cSmrg
29426fa459cSmrg  {
29526fa459cSmrg    /* For the second pass, we limit the total number of histogram pairs.
29626fa459cSmrg       After this limit is reached, we only keep searching for the best pair. */
29726fa459cSmrg    size_t max_num_pairs = BROTLI_MIN(size_t,
29826fa459cSmrg        64 * num_clusters, (num_clusters / 2) * num_clusters);
29926fa459cSmrg    BROTLI_ENSURE_CAPACITY(
30026fa459cSmrg        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
30126fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
30226fa459cSmrg
30326fa459cSmrg    /* Collapse similar histograms. */
30426fa459cSmrg    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
30526fa459cSmrg                                              histogram_symbols, clusters,
30626fa459cSmrg                                              pairs, num_clusters, in_size,
30726fa459cSmrg                                              max_histograms, max_num_pairs);
30826fa459cSmrg  }
30926fa459cSmrg  BROTLI_FREE(m, pairs);
31026fa459cSmrg  BROTLI_FREE(m, cluster_size);
31126fa459cSmrg  /* Find the optimal map from original histograms to the final ones. */
31226fa459cSmrg  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
31326fa459cSmrg                           out, histogram_symbols);
31426fa459cSmrg  BROTLI_FREE(m, clusters);
31526fa459cSmrg  /* Convert the context map to a canonical form. */
31626fa459cSmrg  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
31726fa459cSmrg  if (BROTLI_IS_OOM(m)) return;
31826fa459cSmrg})
31926fa459cSmrg
32026fa459cSmrg#undef HistogramType
321