126fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved.
226fa459cSmrg
326fa459cSmrg   Distributed under MIT license.
426fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
526fa459cSmrg*/
626fa459cSmrg
726fa459cSmrg/* Block split point selection utilities. */
826fa459cSmrg
926fa459cSmrg#include "./block_splitter.h"
1026fa459cSmrg
1126fa459cSmrg#include <string.h>  /* memcpy, memset */
1226fa459cSmrg
1326fa459cSmrg#include "../common/platform.h"
1426fa459cSmrg#include "./bit_cost.h"
1526fa459cSmrg#include "./cluster.h"
1626fa459cSmrg#include "./command.h"
1726fa459cSmrg#include "./fast_log.h"
1826fa459cSmrg#include "./histogram.h"
1926fa459cSmrg#include "./memory.h"
2026fa459cSmrg#include "./quality.h"
2126fa459cSmrg
2226fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
2326fa459cSmrgextern "C" {
2426fa459cSmrg#endif
2526fa459cSmrg
2626fa459cSmrgstatic const size_t kMaxLiteralHistograms = 100;
2726fa459cSmrgstatic const size_t kMaxCommandHistograms = 50;
2826fa459cSmrgstatic const double kLiteralBlockSwitchCost = 28.1;
2926fa459cSmrgstatic const double kCommandBlockSwitchCost = 13.5;
3026fa459cSmrgstatic const double kDistanceBlockSwitchCost = 14.6;
3126fa459cSmrgstatic const size_t kLiteralStrideLength = 70;
3226fa459cSmrgstatic const size_t kCommandStrideLength = 40;
3326fa459cSmrgstatic const size_t kSymbolsPerLiteralHistogram = 544;
3426fa459cSmrgstatic const size_t kSymbolsPerCommandHistogram = 530;
3526fa459cSmrgstatic const size_t kSymbolsPerDistanceHistogram = 544;
3626fa459cSmrgstatic const size_t kMinLengthForBlockSplitting = 128;
3726fa459cSmrgstatic const size_t kIterMulForRefining = 2;
3826fa459cSmrgstatic const size_t kMinItersForRefining = 100;
3926fa459cSmrg
4026fa459cSmrgstatic size_t CountLiterals(const Command* cmds, const size_t num_commands) {
4126fa459cSmrg  /* Count how many we have. */
4226fa459cSmrg  size_t total_length = 0;
4326fa459cSmrg  size_t i;
4426fa459cSmrg  for (i = 0; i < num_commands; ++i) {
4526fa459cSmrg    total_length += cmds[i].insert_len_;
4626fa459cSmrg  }
4726fa459cSmrg  return total_length;
4826fa459cSmrg}
4926fa459cSmrg
5026fa459cSmrgstatic void CopyLiteralsToByteArray(const Command* cmds,
5126fa459cSmrg                                    const size_t num_commands,
5226fa459cSmrg                                    const uint8_t* data,
5326fa459cSmrg                                    const size_t offset,
5426fa459cSmrg                                    const size_t mask,
5526fa459cSmrg                                    uint8_t* literals) {
5626fa459cSmrg  size_t pos = 0;
5726fa459cSmrg  size_t from_pos = offset & mask;
5826fa459cSmrg  size_t i;
5926fa459cSmrg  for (i = 0; i < num_commands; ++i) {
6026fa459cSmrg    size_t insert_len = cmds[i].insert_len_;
6126fa459cSmrg    if (from_pos + insert_len > mask) {
6226fa459cSmrg      size_t head_size = mask + 1 - from_pos;
6326fa459cSmrg      memcpy(literals + pos, data + from_pos, head_size);
6426fa459cSmrg      from_pos = 0;
6526fa459cSmrg      pos += head_size;
6626fa459cSmrg      insert_len -= head_size;
6726fa459cSmrg    }
6826fa459cSmrg    if (insert_len > 0) {
6926fa459cSmrg      memcpy(literals + pos, data + from_pos, insert_len);
7026fa459cSmrg      pos += insert_len;
7126fa459cSmrg    }
7226fa459cSmrg    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
7326fa459cSmrg  }
7426fa459cSmrg}
7526fa459cSmrg
7626fa459cSmrgstatic BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
7726fa459cSmrg  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
7826fa459cSmrg  *seed *= 16807U;
7926fa459cSmrg  return *seed;
8026fa459cSmrg}
8126fa459cSmrg
8226fa459cSmrgstatic BROTLI_INLINE double BitCost(size_t count) {
8326fa459cSmrg  return count == 0 ? -2.0 : FastLog2(count);
8426fa459cSmrg}
8526fa459cSmrg
8626fa459cSmrg#define HISTOGRAMS_PER_BATCH 64
8726fa459cSmrg#define CLUSTERS_PER_BATCH 16
8826fa459cSmrg
8926fa459cSmrg#define FN(X) X ## Literal
9026fa459cSmrg#define DataType uint8_t
9126fa459cSmrg/* NOLINTNEXTLINE(build/include) */
9226fa459cSmrg#include "./block_splitter_inc.h"
9326fa459cSmrg#undef DataType
9426fa459cSmrg#undef FN
9526fa459cSmrg
9626fa459cSmrg#define FN(X) X ## Command
9726fa459cSmrg#define DataType uint16_t
9826fa459cSmrg/* NOLINTNEXTLINE(build/include) */
9926fa459cSmrg#include "./block_splitter_inc.h"
10026fa459cSmrg#undef FN
10126fa459cSmrg
10226fa459cSmrg#define FN(X) X ## Distance
10326fa459cSmrg/* NOLINTNEXTLINE(build/include) */
10426fa459cSmrg#include "./block_splitter_inc.h"
10526fa459cSmrg#undef DataType
10626fa459cSmrg#undef FN
10726fa459cSmrg
10826fa459cSmrgvoid BrotliInitBlockSplit(BlockSplit* self) {
10926fa459cSmrg  self->num_types = 0;
11026fa459cSmrg  self->num_blocks = 0;
11126fa459cSmrg  self->types = 0;
11226fa459cSmrg  self->lengths = 0;
11326fa459cSmrg  self->types_alloc_size = 0;
11426fa459cSmrg  self->lengths_alloc_size = 0;
11526fa459cSmrg}
11626fa459cSmrg
11726fa459cSmrgvoid BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
11826fa459cSmrg  BROTLI_FREE(m, self->types);
11926fa459cSmrg  BROTLI_FREE(m, self->lengths);
12026fa459cSmrg}
12126fa459cSmrg
12226fa459cSmrgvoid BrotliSplitBlock(MemoryManager* m,
12326fa459cSmrg                      const Command* cmds,
12426fa459cSmrg                      const size_t num_commands,
12526fa459cSmrg                      const uint8_t* data,
12626fa459cSmrg                      const size_t pos,
12726fa459cSmrg                      const size_t mask,
12826fa459cSmrg                      const BrotliEncoderParams* params,
12926fa459cSmrg                      BlockSplit* literal_split,
13026fa459cSmrg                      BlockSplit* insert_and_copy_split,
13126fa459cSmrg                      BlockSplit* dist_split) {
13226fa459cSmrg  {
13326fa459cSmrg    size_t literals_count = CountLiterals(cmds, num_commands);
13426fa459cSmrg    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
13526fa459cSmrg    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
13626fa459cSmrg    /* Create a continuous array of literals. */
13726fa459cSmrg    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
13826fa459cSmrg    /* Create the block split on the array of literals.
13926fa459cSmrg       Literal histograms have alphabet size 256. */
14026fa459cSmrg    SplitByteVectorLiteral(
14126fa459cSmrg        m, literals, literals_count,
14226fa459cSmrg        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
14326fa459cSmrg        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
14426fa459cSmrg        literal_split);
14526fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
14626fa459cSmrg    BROTLI_FREE(m, literals);
14726fa459cSmrg  }
14826fa459cSmrg
14926fa459cSmrg  {
15026fa459cSmrg    /* Compute prefix codes for commands. */
15126fa459cSmrg    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
15226fa459cSmrg    size_t i;
15326fa459cSmrg    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
15426fa459cSmrg    for (i = 0; i < num_commands; ++i) {
15526fa459cSmrg      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
15626fa459cSmrg    }
15726fa459cSmrg    /* Create the block split on the array of command prefixes. */
15826fa459cSmrg    SplitByteVectorCommand(
15926fa459cSmrg        m, insert_and_copy_codes, num_commands,
16026fa459cSmrg        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
16126fa459cSmrg        kCommandStrideLength, kCommandBlockSwitchCost, params,
16226fa459cSmrg        insert_and_copy_split);
16326fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
16426fa459cSmrg    /* TODO: reuse for distances? */
16526fa459cSmrg    BROTLI_FREE(m, insert_and_copy_codes);
16626fa459cSmrg  }
16726fa459cSmrg
16826fa459cSmrg  {
16926fa459cSmrg    /* Create a continuous array of distance prefixes. */
17026fa459cSmrg    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
17126fa459cSmrg    size_t j = 0;
17226fa459cSmrg    size_t i;
17326fa459cSmrg    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
17426fa459cSmrg    for (i = 0; i < num_commands; ++i) {
17526fa459cSmrg      const Command* cmd = &cmds[i];
17626fa459cSmrg      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
17726fa459cSmrg        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
17826fa459cSmrg      }
17926fa459cSmrg    }
18026fa459cSmrg    /* Create the block split on the array of distance prefixes. */
18126fa459cSmrg    SplitByteVectorDistance(
18226fa459cSmrg        m, distance_prefixes, j,
18326fa459cSmrg        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
18426fa459cSmrg        kCommandStrideLength, kDistanceBlockSwitchCost, params,
18526fa459cSmrg        dist_split);
18626fa459cSmrg    if (BROTLI_IS_OOM(m)) return;
18726fa459cSmrg    BROTLI_FREE(m, distance_prefixes);
18826fa459cSmrg  }
18926fa459cSmrg}
19026fa459cSmrg
19126fa459cSmrg
19226fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
19326fa459cSmrg}  /* extern "C" */
19426fa459cSmrg#endif
195