1/* NOLINT(build/header_guard) */
2/* Copyright 2013 Google Inc. All Rights Reserved.
3
4   Distributed under MIT license.
5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN, DataType */
9
10#define HistogramType FN(Histogram)
11
12static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
13                                    size_t stride,
14                                    size_t num_histograms,
15                                    HistogramType* histograms) {
16  uint32_t seed = 7;
17  size_t block_length = length / num_histograms;
18  size_t i;
19  FN(ClearHistograms)(histograms, num_histograms);
20  for (i = 0; i < num_histograms; ++i) {
21    size_t pos = length * i / num_histograms;
22    if (i != 0) {
23      pos += MyRand(&seed) % block_length;
24    }
25    if (pos + stride >= length) {
26      pos = length - stride - 1;
27    }
28    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
29  }
30}
31
32static void FN(RandomSample)(uint32_t* seed,
33                             const DataType* data,
34                             size_t length,
35                             size_t stride,
36                             HistogramType* sample) {
37  size_t pos = 0;
38  if (stride >= length) {
39    stride = length;
40  } else {
41    pos = MyRand(seed) % (length - stride + 1);
42  }
43  FN(HistogramAddVector)(sample, data + pos, stride);
44}
45
46static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
47                                   size_t stride,
48                                   size_t num_histograms,
49                                   HistogramType* histograms) {
50  size_t iters =
51      kIterMulForRefining * length / stride + kMinItersForRefining;
52  uint32_t seed = 7;
53  size_t iter;
54  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
55  for (iter = 0; iter < iters; ++iter) {
56    HistogramType sample;
57    FN(HistogramClear)(&sample);
58    FN(RandomSample)(&seed, data, length, stride, &sample);
59    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
60  }
61}
62
63/* Assigns a block id from the range [0, num_histograms) to each data element
64   in data[0..length) and fills in block_id[0..length) with the assigned values.
65   Returns the number of blocks, i.e. one plus the number of block switches. */
66static size_t FN(FindBlocks)(const DataType* data, const size_t length,
67                             const double block_switch_bitcost,
68                             const size_t num_histograms,
69                             const HistogramType* histograms,
70                             double* insert_cost,
71                             double* cost,
72                             uint8_t* switch_signal,
73                             uint8_t* block_id) {
74  const size_t data_size = FN(HistogramDataSize)();
75  const size_t bitmaplen = (num_histograms + 7) >> 3;
76  size_t num_blocks = 1;
77  size_t i;
78  size_t j;
79  BROTLI_DCHECK(num_histograms <= 256);
80  if (num_histograms <= 1) {
81    for (i = 0; i < length; ++i) {
82      block_id[i] = 0;
83    }
84    return 1;
85  }
86  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
87  for (i = 0; i < num_histograms; ++i) {
88    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
89  }
90  for (i = data_size; i != 0;) {
91    --i;
92    for (j = 0; j < num_histograms; ++j) {
93      insert_cost[i * num_histograms + j] =
94          insert_cost[j] - BitCost(histograms[j].data_[i]);
95    }
96  }
97  memset(cost, 0, sizeof(cost[0]) * num_histograms);
98  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
99  /* After each iteration of this loop, cost[k] will contain the difference
100     between the minimum cost of arriving at the current byte position using
101     entropy code k, and the minimum cost of arriving at the current byte
102     position. This difference is capped at the block switch cost, and if it
103     reaches block switch cost, it means that when we trace back from the last
104     position, we need to switch here. */
105  for (i = 0; i < length; ++i) {
106    const size_t byte_ix = i;
107    size_t ix = byte_ix * bitmaplen;
108    size_t insert_cost_ix = data[byte_ix] * num_histograms;
109    double min_cost = 1e99;
110    double block_switch_cost = block_switch_bitcost;
111    size_t k;
112    for (k = 0; k < num_histograms; ++k) {
113      /* We are coding the symbol in data[byte_ix] with entropy code k. */
114      cost[k] += insert_cost[insert_cost_ix + k];
115      if (cost[k] < min_cost) {
116        min_cost = cost[k];
117        block_id[byte_ix] = (uint8_t)k;
118      }
119    }
120    /* More blocks for the beginning. */
121    if (byte_ix < 2000) {
122      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
123    }
124    for (k = 0; k < num_histograms; ++k) {
125      cost[k] -= min_cost;
126      if (cost[k] >= block_switch_cost) {
127        const uint8_t mask = (uint8_t)(1u << (k & 7));
128        cost[k] = block_switch_cost;
129        BROTLI_DCHECK((k >> 3) < bitmaplen);
130        switch_signal[ix + (k >> 3)] |= mask;
131      }
132    }
133  }
134  {  /* Trace back from the last position and switch at the marked places. */
135    size_t byte_ix = length - 1;
136    size_t ix = byte_ix * bitmaplen;
137    uint8_t cur_id = block_id[byte_ix];
138    while (byte_ix > 0) {
139      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
140      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
141      --byte_ix;
142      ix -= bitmaplen;
143      if (switch_signal[ix + (cur_id >> 3)] & mask) {
144        if (cur_id != block_id[byte_ix]) {
145          cur_id = block_id[byte_ix];
146          ++num_blocks;
147        }
148      }
149      block_id[byte_ix] = cur_id;
150    }
151  }
152  return num_blocks;
153}
154
155static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
156                                uint16_t* new_id, const size_t num_histograms) {
157  static const uint16_t kInvalidId = 256;
158  uint16_t next_id = 0;
159  size_t i;
160  for (i = 0; i < num_histograms; ++i) {
161    new_id[i] = kInvalidId;
162  }
163  for (i = 0; i < length; ++i) {
164    BROTLI_DCHECK(block_ids[i] < num_histograms);
165    if (new_id[block_ids[i]] == kInvalidId) {
166      new_id[block_ids[i]] = next_id++;
167    }
168  }
169  for (i = 0; i < length; ++i) {
170    block_ids[i] = (uint8_t)new_id[block_ids[i]];
171    BROTLI_DCHECK(block_ids[i] < num_histograms);
172  }
173  BROTLI_DCHECK(next_id <= num_histograms);
174  return next_id;
175}
176
177static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
178                                     const uint8_t* block_ids,
179                                     const size_t num_histograms,
180                                     HistogramType* histograms) {
181  size_t i;
182  FN(ClearHistograms)(histograms, num_histograms);
183  for (i = 0; i < length; ++i) {
184    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
185  }
186}
187
188static void FN(ClusterBlocks)(MemoryManager* m,
189                              const DataType* data, const size_t length,
190                              const size_t num_blocks,
191                              uint8_t* block_ids,
192                              BlockSplit* split) {
193  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
194  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
195  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
196      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
197  size_t all_histograms_size = 0;
198  size_t all_histograms_capacity = expected_num_clusters;
199  HistogramType* all_histograms =
200      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
201  size_t cluster_size_size = 0;
202  size_t cluster_size_capacity = expected_num_clusters;
203  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
204  size_t num_clusters = 0;
205  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
206      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
207  size_t max_num_pairs =
208      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
209  size_t pairs_capacity = max_num_pairs + 1;
210  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
211  size_t pos = 0;
212  uint32_t* clusters;
213  size_t num_final_clusters;
214  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
215  uint32_t* new_index;
216  size_t i;
217  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
218  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
219  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
220  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
221
222  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
223      BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
224      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
225      BROTLI_IS_NULL(pairs)) {
226    return;
227  }
228
229  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
230
231  {
232    size_t block_idx = 0;
233    for (i = 0; i < length; ++i) {
234      BROTLI_DCHECK(block_idx < num_blocks);
235      ++block_lengths[block_idx];
236      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
237        ++block_idx;
238      }
239    }
240    BROTLI_DCHECK(block_idx == num_blocks);
241  }
242
243  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
244    const size_t num_to_combine =
245        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
246    size_t num_new_clusters;
247    size_t j;
248    for (j = 0; j < num_to_combine; ++j) {
249      size_t k;
250      FN(HistogramClear)(&histograms[j]);
251      for (k = 0; k < block_lengths[i + j]; ++k) {
252        FN(HistogramAdd)(&histograms[j], data[pos++]);
253      }
254      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
255      new_clusters[j] = (uint32_t)j;
256      symbols[j] = (uint32_t)j;
257      sizes[j] = 1;
258    }
259    num_new_clusters = FN(BrotliHistogramCombine)(
260        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
261        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
262    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
263        all_histograms_capacity, all_histograms_size + num_new_clusters);
264    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
265        cluster_size_capacity, cluster_size_size + num_new_clusters);
266    if (BROTLI_IS_OOM(m)) return;
267    for (j = 0; j < num_new_clusters; ++j) {
268      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
269      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
270      remap[new_clusters[j]] = (uint32_t)j;
271    }
272    for (j = 0; j < num_to_combine; ++j) {
273      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
274    }
275    num_clusters += num_new_clusters;
276    BROTLI_DCHECK(num_clusters == cluster_size_size);
277    BROTLI_DCHECK(num_clusters == all_histograms_size);
278  }
279  BROTLI_FREE(m, histograms);
280
281  max_num_pairs =
282      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
283  if (pairs_capacity < max_num_pairs + 1) {
284    BROTLI_FREE(m, pairs);
285    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
286    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
287  }
288
289  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
290  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
291  for (i = 0; i < num_clusters; ++i) {
292    clusters[i] = (uint32_t)i;
293  }
294  num_final_clusters = FN(BrotliHistogramCombine)(
295      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
296      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
297      max_num_pairs);
298  BROTLI_FREE(m, pairs);
299  BROTLI_FREE(m, cluster_size);
300
301  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
302  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
303  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
304  pos = 0;
305  {
306    uint32_t next_index = 0;
307    for (i = 0; i < num_blocks; ++i) {
308      HistogramType histo;
309      size_t j;
310      uint32_t best_out;
311      double best_bits;
312      FN(HistogramClear)(&histo);
313      for (j = 0; j < block_lengths[i]; ++j) {
314        FN(HistogramAdd)(&histo, data[pos++]);
315      }
316      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
317      best_bits =
318          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
319      for (j = 0; j < num_final_clusters; ++j) {
320        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
321            &histo, &all_histograms[clusters[j]]);
322        if (cur_bits < best_bits) {
323          best_bits = cur_bits;
324          best_out = clusters[j];
325        }
326      }
327      histogram_symbols[i] = best_out;
328      if (new_index[best_out] == kInvalidIndex) {
329        new_index[best_out] = next_index++;
330      }
331    }
332  }
333  BROTLI_FREE(m, clusters);
334  BROTLI_FREE(m, all_histograms);
335  BROTLI_ENSURE_CAPACITY(
336      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
337  BROTLI_ENSURE_CAPACITY(
338      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
339  if (BROTLI_IS_OOM(m)) return;
340  {
341    uint32_t cur_length = 0;
342    size_t block_idx = 0;
343    uint8_t max_type = 0;
344    for (i = 0; i < num_blocks; ++i) {
345      cur_length += block_lengths[i];
346      if (i + 1 == num_blocks ||
347          histogram_symbols[i] != histogram_symbols[i + 1]) {
348        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
349        split->types[block_idx] = id;
350        split->lengths[block_idx] = cur_length;
351        max_type = BROTLI_MAX(uint8_t, max_type, id);
352        cur_length = 0;
353        ++block_idx;
354      }
355    }
356    split->num_blocks = block_idx;
357    split->num_types = (size_t)max_type + 1;
358  }
359  BROTLI_FREE(m, new_index);
360  BROTLI_FREE(m, block_lengths);
361  BROTLI_FREE(m, histogram_symbols);
362}
363
364static void FN(SplitByteVector)(MemoryManager* m,
365                                const DataType* data, const size_t length,
366                                const size_t literals_per_histogram,
367                                const size_t max_histograms,
368                                const size_t sampling_stride_length,
369                                const double block_switch_cost,
370                                const BrotliEncoderParams* params,
371                                BlockSplit* split) {
372  const size_t data_size = FN(HistogramDataSize)();
373  size_t num_histograms = length / literals_per_histogram + 1;
374  HistogramType* histograms;
375  if (num_histograms > max_histograms) {
376    num_histograms = max_histograms;
377  }
378  if (length == 0) {
379    split->num_types = 1;
380    return;
381  } else if (length < kMinLengthForBlockSplitting) {
382    BROTLI_ENSURE_CAPACITY(m, uint8_t,
383        split->types, split->types_alloc_size, split->num_blocks + 1);
384    BROTLI_ENSURE_CAPACITY(m, uint32_t,
385        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
386    if (BROTLI_IS_OOM(m)) return;
387    split->num_types = 1;
388    split->types[split->num_blocks] = 0;
389    split->lengths[split->num_blocks] = (uint32_t)length;
390    split->num_blocks++;
391    return;
392  }
393  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
394  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
395  /* Find good entropy codes. */
396  FN(InitialEntropyCodes)(data, length,
397                          sampling_stride_length,
398                          num_histograms, histograms);
399  FN(RefineEntropyCodes)(data, length,
400                         sampling_stride_length,
401                         num_histograms, histograms);
402  {
403    /* Find a good path through literals with the good entropy codes. */
404    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
405    size_t num_blocks = 0;
406    const size_t bitmaplen = (num_histograms + 7) >> 3;
407    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
408    double* cost = BROTLI_ALLOC(m, double, num_histograms);
409    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
410    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
411    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
412    size_t i;
413    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
414        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
415        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
416      return;
417    }
418    for (i = 0; i < iters; ++i) {
419      num_blocks = FN(FindBlocks)(data, length,
420                                  block_switch_cost,
421                                  num_histograms, histograms,
422                                  insert_cost, cost, switch_signal,
423                                  block_ids);
424      num_histograms = FN(RemapBlockIds)(block_ids, length,
425                                         new_id, num_histograms);
426      FN(BuildBlockHistograms)(data, length, block_ids,
427                               num_histograms, histograms);
428    }
429    BROTLI_FREE(m, insert_cost);
430    BROTLI_FREE(m, cost);
431    BROTLI_FREE(m, switch_signal);
432    BROTLI_FREE(m, new_id);
433    BROTLI_FREE(m, histograms);
434    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
435    if (BROTLI_IS_OOM(m)) return;
436    BROTLI_FREE(m, block_ids);
437  }
438}
439
440#undef HistogramType
441