1/* Copyright 2013 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Block split point selection utilities. */
8
9#include "./block_splitter.h"
10
11#include <string.h>  /* memcpy, memset */
12
13#include "../common/platform.h"
14#include "./bit_cost.h"
15#include "./cluster.h"
16#include "./command.h"
17#include "./fast_log.h"
18#include "./histogram.h"
19#include "./memory.h"
20#include "./quality.h"
21
22#if defined(__cplusplus) || defined(c_plusplus)
23extern "C" {
24#endif
25
26static const size_t kMaxLiteralHistograms = 100;
27static const size_t kMaxCommandHistograms = 50;
28static const double kLiteralBlockSwitchCost = 28.1;
29static const double kCommandBlockSwitchCost = 13.5;
30static const double kDistanceBlockSwitchCost = 14.6;
31static const size_t kLiteralStrideLength = 70;
32static const size_t kCommandStrideLength = 40;
33static const size_t kSymbolsPerLiteralHistogram = 544;
34static const size_t kSymbolsPerCommandHistogram = 530;
35static const size_t kSymbolsPerDistanceHistogram = 544;
36static const size_t kMinLengthForBlockSplitting = 128;
37static const size_t kIterMulForRefining = 2;
38static const size_t kMinItersForRefining = 100;
39
40static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
41  /* Count how many we have. */
42  size_t total_length = 0;
43  size_t i;
44  for (i = 0; i < num_commands; ++i) {
45    total_length += cmds[i].insert_len_;
46  }
47  return total_length;
48}
49
50static void CopyLiteralsToByteArray(const Command* cmds,
51                                    const size_t num_commands,
52                                    const uint8_t* data,
53                                    const size_t offset,
54                                    const size_t mask,
55                                    uint8_t* literals) {
56  size_t pos = 0;
57  size_t from_pos = offset & mask;
58  size_t i;
59  for (i = 0; i < num_commands; ++i) {
60    size_t insert_len = cmds[i].insert_len_;
61    if (from_pos + insert_len > mask) {
62      size_t head_size = mask + 1 - from_pos;
63      memcpy(literals + pos, data + from_pos, head_size);
64      from_pos = 0;
65      pos += head_size;
66      insert_len -= head_size;
67    }
68    if (insert_len > 0) {
69      memcpy(literals + pos, data + from_pos, insert_len);
70      pos += insert_len;
71    }
72    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
73  }
74}
75
76static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
77  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
78  *seed *= 16807U;
79  return *seed;
80}
81
82static BROTLI_INLINE double BitCost(size_t count) {
83  return count == 0 ? -2.0 : FastLog2(count);
84}
85
86#define HISTOGRAMS_PER_BATCH 64
87#define CLUSTERS_PER_BATCH 16
88
89#define FN(X) X ## Literal
90#define DataType uint8_t
91/* NOLINTNEXTLINE(build/include) */
92#include "./block_splitter_inc.h"
93#undef DataType
94#undef FN
95
96#define FN(X) X ## Command
97#define DataType uint16_t
98/* NOLINTNEXTLINE(build/include) */
99#include "./block_splitter_inc.h"
100#undef FN
101
102#define FN(X) X ## Distance
103/* NOLINTNEXTLINE(build/include) */
104#include "./block_splitter_inc.h"
105#undef DataType
106#undef FN
107
108void BrotliInitBlockSplit(BlockSplit* self) {
109  self->num_types = 0;
110  self->num_blocks = 0;
111  self->types = 0;
112  self->lengths = 0;
113  self->types_alloc_size = 0;
114  self->lengths_alloc_size = 0;
115}
116
117void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
118  BROTLI_FREE(m, self->types);
119  BROTLI_FREE(m, self->lengths);
120}
121
122void BrotliSplitBlock(MemoryManager* m,
123                      const Command* cmds,
124                      const size_t num_commands,
125                      const uint8_t* data,
126                      const size_t pos,
127                      const size_t mask,
128                      const BrotliEncoderParams* params,
129                      BlockSplit* literal_split,
130                      BlockSplit* insert_and_copy_split,
131                      BlockSplit* dist_split) {
132  {
133    size_t literals_count = CountLiterals(cmds, num_commands);
134    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
135    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
136    /* Create a continuous array of literals. */
137    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
138    /* Create the block split on the array of literals.
139       Literal histograms have alphabet size 256. */
140    SplitByteVectorLiteral(
141        m, literals, literals_count,
142        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
143        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
144        literal_split);
145    if (BROTLI_IS_OOM(m)) return;
146    BROTLI_FREE(m, literals);
147  }
148
149  {
150    /* Compute prefix codes for commands. */
151    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
152    size_t i;
153    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
154    for (i = 0; i < num_commands; ++i) {
155      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
156    }
157    /* Create the block split on the array of command prefixes. */
158    SplitByteVectorCommand(
159        m, insert_and_copy_codes, num_commands,
160        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
161        kCommandStrideLength, kCommandBlockSwitchCost, params,
162        insert_and_copy_split);
163    if (BROTLI_IS_OOM(m)) return;
164    /* TODO: reuse for distances? */
165    BROTLI_FREE(m, insert_and_copy_codes);
166  }
167
168  {
169    /* Create a continuous array of distance prefixes. */
170    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
171    size_t j = 0;
172    size_t i;
173    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
174    for (i = 0; i < num_commands; ++i) {
175      const Command* cmd = &cmds[i];
176      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
177        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
178      }
179    }
180    /* Create the block split on the array of distance prefixes. */
181    SplitByteVectorDistance(
182        m, distance_prefixes, j,
183        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
184        kCommandStrideLength, kDistanceBlockSwitchCost, params,
185        dist_split);
186    if (BROTLI_IS_OOM(m)) return;
187    BROTLI_FREE(m, distance_prefixes);
188  }
189}
190
191
192#if defined(__cplusplus) || defined(c_plusplus)
193}  /* extern "C" */
194#endif
195